Merge the latest version of `develop`: bitshift v1.0 (beta).tags/v1.0^0
@@ -1,4 +1,10 @@ | |||
static/css/* | |||
!lib | |||
*.swp | |||
.sass-cache | |||
.DS_Store | |||
.my.cnf | |||
# github premade rules | |||
*.py[cod] | |||
@@ -18,7 +24,6 @@ var | |||
sdist | |||
develop-eggs | |||
.installed.cfg | |||
lib | |||
lib64 | |||
__pycache__ | |||
@@ -37,3 +42,15 @@ nosetests.xml | |||
.mr.developer.cfg | |||
.project | |||
.pydevproject | |||
# Maven | |||
target | |||
# Ruby | |||
!parsers/ruby/lib | |||
# Ctags | |||
*/tags | |||
logs | |||
Gemfile.lock | |||
parsing.jar |
@@ -1,6 +1,6 @@ | |||
The MIT License (MIT) | |||
Copyright (c) 2014 Ben Kurtovic | |||
Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
SOFTWARE. |
@@ -1,4 +1,44 @@ | |||
bitshift | |||
======== | |||
bitshift is an online code snippet exchange. | |||
bitshift is a semantic search engine for source code developed by Benjamin | |||
Attal, Ben Kurtovic, and Severyn Kozak. This README is intended for developers | |||
only. For a user overview of the project: | |||
* read our [about page](http://bitshift.it/) | |||
* watch our [demo video](https://vimeo.com/98697078) | |||
Branches | |||
-------- | |||
- `master`: working, tested, version-numbered code - no direct commits; should | |||
only accept merges from `develop` when ready to release | |||
- `develop`: integration branch with unreleased but mostly functional code - | |||
direct commits allowed but should be minor | |||
- `feature/*`: individual components of the project with untested, likely | |||
horribly broken code - branch off from and merge into `develop` when done | |||
Style | |||
----- | |||
bitshift uses [SASS][SASS] for styling; compile the stylesheets to CSS with | |||
`sass --watch static/sass/:static/css`. | |||
Documentation | |||
------------- | |||
To build documentation, run `make html` from the `docs` subdirectory. You can | |||
then browse from `docs/build/html/index.html`. | |||
To automatically update the API documentation structure (necessary when adding | |||
new modules or packages, but *not* when adding functions or changing | |||
docstrings), run `sphinx-apidoc -fo docs/source/api bitshift` from the project | |||
root. Note that this will revert any custom changes made to the files in | |||
`docs/source/api`, so you might want to update them by hand instead. | |||
[SASS]: http://sass-lang.com/guide | |||
Releasing | |||
--------- | |||
- Update `__version__` in `bitshift/__init__.py`, `version` in `setup.py`, and | |||
`version` and `release` in `docs/conf.py`. |
@@ -2,21 +2,67 @@ | |||
Module to contain all the project's Flask server plumbing. | |||
""" | |||
from flask import Flask | |||
from flask import render_template, session | |||
from json import dumps | |||
from bitshift import * | |||
from flask import Flask, make_response, render_template, request | |||
app = Flask(__name__) | |||
from bitshift import assets | |||
from bitshift.database import Database | |||
from bitshift.languages import LANGS | |||
from bitshift.query import parse_query, QueryParseException | |||
app = Flask(__name__, static_folder="static", static_url_path="") | |||
app.config.from_object("bitshift.config") | |||
app_env = app.jinja_env | |||
app_env.line_statement_prefix = "=" | |||
app_env.globals.update(assets = assets) | |||
app_env.globals.update(assets=assets) | |||
database = Database() | |||
@app.route("/") | |||
def index(): | |||
return render_template("index.html") | |||
return render_template("index.html", autocomplete_languages=LANGS) | |||
@app.route("/search.json") | |||
def search(): | |||
def reply(json): | |||
resp = make_response(dumps(json)) | |||
resp.mimetype = "application/json" | |||
return resp | |||
query = request.args.get("q") | |||
if not query: | |||
return reply({"error": "No query given"}) | |||
try: | |||
tree = parse_query(query) | |||
except QueryParseException as exc: | |||
return reply({"error": exc.args[0]}) | |||
page = request.args.get("p", 1) | |||
try: | |||
page = int(page) | |||
except ValueError: | |||
return reply({"error": u"Invalid page number: %s" % page}) | |||
highlight = request.args.get("hl", "0") | |||
highlight = highlight.lower() not in ["0", "false", "no"] | |||
count, codelets = database.search(tree, page) | |||
results = [clt.serialize(highlight) for clt in codelets] | |||
return reply({"count": count, "results": results}) | |||
@app.route("/about") | |||
def about(): | |||
return render_template("about.html") | |||
@app.route("/docs") | |||
def docs(): | |||
return render_template("docs.html") | |||
@app.errorhandler(404) | |||
def error404(error): | |||
return render_template("error404.html"), 404 | |||
if __name__ == "__main__": | |||
app.run() | |||
app.run(debug=True) |
@@ -1 +1,8 @@ | |||
__all__ = ["config", "assets"] | |||
# -*- coding: utf-8 -*- | |||
__author__ = "Benjamin Attal, Ben Kurtovic, Severyn Kozak" | |||
__copyright__ = "Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak" | |||
__license__ = "MIT License" | |||
__version__ = "0.1.dev" | |||
from . import assets, codelet, config, crawler, database, parser, query |
@@ -1,22 +1,46 @@ | |||
""" | |||
Module contains helper functions to be used inside the project's Jinja | |||
templates. | |||
:synopsis: Helper functions for use inside the project's Jinja templates. | |||
""" | |||
import re | |||
from flask import Markup | |||
ASSET_HTML_TEMPLATES = { | |||
'css': "<link rel='stylesheet' type='text/css' href='/static/css/%s'>", | |||
'js': "<script src='/static/js/%s'></script>" | |||
'css': "<link rel='stylesheet' type='text/css' href='/css/%s'>", | |||
'js': "<script src='/js/%s'></script>" | |||
} | |||
def tag(filename): | |||
""" | |||
Return HTML tag for asset named filename. | |||
Generate an HTML tag for a CSS/JS asset, based on its file extension. | |||
:param filename: The filename of the asset to create a tag for. | |||
:type filename: str | |||
Return either a <script> or <link> tag to the file named filename, | |||
based on its extension. | |||
:return: A string containing a `<source>` tag for JS files, and a `<link>` | |||
for CSS files. | |||
:rtype: str | |||
""" | |||
file_ext = filename.split(".")[-1] | |||
return Markup(ASSET_HTML_TEMPLATES[file_ext] % filename) | |||
def syntax_highlight(msg): | |||
""" | |||
Inserts HTML `<span>` elements into a string, for symbol/word styling. | |||
Args: | |||
msg : (str) A message. | |||
""" | |||
msg.replace("<", "&;lt") | |||
msg.replace(">", "&;gt") | |||
font_size = 16.0 / len(msg) | |||
msg = re.sub('([!()"%])', '<span class="dark">\\1</span>', msg) | |||
msg = re.sub('([:.;,])', '<span class="red">\\1</span>', msg) | |||
msg = msg.replace("404", '<span class="red">404</span>') | |||
return "<span class='light' style='font-size: %fem'>%s</span>" % ( | |||
font_size, msg) |
@@ -0,0 +1,103 @@ | |||
from operator import concat | |||
from pygments import highlight | |||
from pygments.lexers import find_lexer_class, get_lexer_by_name | |||
from pygments.formatters.html import HtmlFormatter | |||
from .languages import LANGS | |||
__all__ = ["Codelet"] | |||
class Codelet(object): | |||
""" | |||
A source-code object with code metadata and composition analysis. | |||
:ivar name: (str) A suitable name for the codelet. | |||
:ivar code: (str) A containing the raw source code. | |||
:ivar filename: (str, or None) The filename of the snippet. | |||
:ivar language: (int, or None) The inferred language of `code`. | |||
:ivar authors: (array of tuples (str, str or None)) An array of tuples | |||
containing an author's name and profile URL (on the service the code | |||
was pulled from). | |||
:ivar url: (str) The url of the (page containing the) source code. | |||
:ivar date_created: (:class:`datetime.datetime`, or None) The date the code | |||
was published. | |||
:ivar date_modified: (:class:`datetime.datetime`, or None) The date the | |||
code was last modified. | |||
:ivar rank: (float) A quanitification of the source code's quality, as | |||
per available ratings (stars, forks, upvotes, etc.). | |||
:ivar symbols: (dict) Dictionary containing dictionaries of functions, | |||
classes, variable definitions, etc. | |||
:ivar origin: (tuple) 2-tuple of (site_name, site_url), as added by the | |||
database. | |||
""" | |||
def __init__(self, name, code, filename, language, authors, url, | |||
date_created, date_modified, rank, symbols=None, origin=None): | |||
""" | |||
Create a Codelet instance. | |||
:param name: see :attr:`self.name` | |||
:param code: see :attr:`self.code` | |||
:param filename: see :attr:`self.filename` | |||
:param language: see :attr:`self.language` | |||
:param authors: see :attr:`self.authors` | |||
:param url: see :attr:`self.url` | |||
:param date_created: see :attr:`self.date_created` | |||
:param date_modified: see :attr:`self.date_modified` | |||
:param rank: see :attr:`self.rank` | |||
:param symbols: see :attr:`self.symbols` | |||
:param origin: see :attr:`self.origin` | |||
:type name: see :attr:`self.name` | |||
:type code: see :attr:`self.code` | |||
:type filename: see :attr:`self.filename` | |||
:type language: see :attr:`self.language` | |||
:type authors: see :attr:`self.authors` | |||
:type url: see :attr:`self.url` | |||
:type date_created: see :attr:`self.date_created` | |||
:type date_modified: see :attr:`self.date_modified` | |||
:type rank: see :attr:`self.rank` | |||
:type symbols: see :attr:`self.symbols` | |||
:type origin: see :attr:`self.origin` | |||
""" | |||
self.name = name | |||
self.code = code | |||
self.filename = filename | |||
self.language = language | |||
self.authors = authors | |||
self.url = url | |||
self.date_created = date_created | |||
self.date_modified = date_modified | |||
self.rank = rank | |||
self.symbols = symbols or {} | |||
self.origin = origin or (None, None) | |||
def serialize(self, highlight_code=False): | |||
""" | |||
Convert the codelet into a dictionary that can be sent as JSON. | |||
:param highlight_code: Whether to return code as pygments-highlighted | |||
HTML or as plain source. | |||
:type highlight_code: bool | |||
:return: The codelet as a dictionary. | |||
:rtype: str | |||
""" | |||
lang = LANGS[self.language] | |||
code = self.code | |||
if highlight_code: | |||
lexer = find_lexer_class(lang)() or get_lexer_by_name("text") | |||
symbols = reduce(concat, self.symbols.values(), []) | |||
lines = reduce(concat, [[loc[0] for loc in sym[1] + sym[2]] | |||
for sym in symbols], []) | |||
formatter = HtmlFormatter(linenos=True, hl_lines=lines) | |||
code = highlight(code, lexer, formatter) | |||
return { | |||
"name": self.name, "code": code, "lang": lang, | |||
"authors": self.authors, "url": self.url, | |||
"created": self.date_created.isoformat(), | |||
"modified": self.date_modified.isoformat(), "origin": self.origin | |||
} |
@@ -0,0 +1,94 @@ | |||
""" | |||
:synopsis: Parent crawler module, which supervises all crawlers. | |||
Contains functions for initializing all subsidiary, threaded crawlers. | |||
""" | |||
import logging | |||
import logging.handlers | |||
import os | |||
import Queue | |||
import sys | |||
import time | |||
from threading import Event | |||
from .crawler import GitHubCrawler, BitbucketCrawler | |||
from .indexer import GitIndexer, GitRepository | |||
__all__ = ["crawl"] | |||
MAX_URL_QUEUE_SIZE = 5e3 | |||
def crawl(): | |||
""" | |||
Initialize all crawlers (and indexers). | |||
Start the: | |||
1. GitHub crawler, :class:`crawler.GitHubCrawler`. | |||
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`. | |||
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`. | |||
""" | |||
_configure_logging() | |||
time.sleep(5) | |||
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE) | |||
run_event = Event() | |||
run_event.set() | |||
threads = [GitIndexer(repo_clone_queue, run_event)] | |||
if sys.argv[1:]: | |||
names = sys.argv[1:] | |||
ranks = GitHubCrawler.get_ranks(names) | |||
for name in names: | |||
repo = GitRepository("https://github.com/" + name, name, "GitHub", | |||
ranks[name]) | |||
repo_clone_queue.put(repo) | |||
else: | |||
threads += [GitHubCrawler(repo_clone_queue, run_event), | |||
BitbucketCrawler(repo_clone_queue, run_event)] | |||
for thread in threads: | |||
thread.start() | |||
try: | |||
while 1: | |||
time.sleep(0.1) | |||
except KeyboardInterrupt: | |||
run_event.clear() | |||
with repo_clone_queue.mutex: | |||
repo_clone_queue.queue.clear() | |||
for thread in threads: | |||
thread.join() | |||
def _configure_logging(): | |||
# This isn't ideal, since it means the bitshift python package must be kept | |||
# inside the app, but it works for now: | |||
root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) | |||
log_dir = os.path.join(root, "logs") | |||
if not os.path.exists(log_dir): | |||
os.mkdir(log_dir) | |||
logging.getLogger("requests").setLevel(logging.WARNING) | |||
logging.getLogger("urllib3").setLevel(logging.WARNING) | |||
formatter = logging.Formatter( | |||
fmt=("%(asctime)s %(levelname)s %(name)s %(message)s"), | |||
datefmt="%y-%m-%d %H:%M:%S") | |||
file_handler = logging.handlers.TimedRotatingFileHandler( | |||
"%s/%s" % (log_dir, "app.log"), when="H", interval=1, | |||
backupCount=20) | |||
stream_handler = logging.StreamHandler() | |||
file_handler.setFormatter(formatter) | |||
stream_handler.setFormatter(formatter) | |||
root_logger = logging.getLogger() | |||
root_logger.handlers = [] | |||
root_logger.addHandler(file_handler) | |||
root_logger.addHandler(stream_handler) | |||
root_logger.setLevel(logging.NOTSET) | |||
if __name__ == "__main__": | |||
crawl() |
@@ -0,0 +1,243 @@ | |||
""" | |||
:synopsis: Main crawler module, to oversee all site-specific crawlers. | |||
Contains all website/framework-specific Class crawlers. | |||
""" | |||
import logging | |||
import math | |||
import time | |||
import threading | |||
import requests | |||
from . import indexer | |||
class GitHubCrawler(threading.Thread): | |||
""" | |||
Crawler that retrieves links to all of GitHub's public repositories. | |||
GitHubCrawler is a threaded singleton that queries GitHub's API for urls | |||
to its public repositories, which it inserts into a :class:`Queue.Queue` | |||
shared with :class:`indexer.GitIndexer`. | |||
:ivar clone_queue: (:class:`Queue.Queue`) Contains :class:`GitRepository` | |||
with repository metadata retrieved by :class:`GitHubCrawler`, and other Git | |||
crawlers, to be processed by :class:`indexer.GitIndexer`. | |||
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object. | |||
""" | |||
AUTHENTICATION = { | |||
"client_id" : "436cb884ae09be7f2a4e", | |||
"client_secret" : "8deeefbc2439409c5b7a092fd086772fe8b1f24e" | |||
} | |||
def __init__(self, clone_queue, run_event): | |||
""" | |||
Create an instance of the singleton `GitHubCrawler`. | |||
:param clone_queue: see :attr:`self.clone_queue` | |||
:type clone_queue: see :attr:`self.clone_queue` | |||
""" | |||
self.clone_queue = clone_queue | |||
self.run_event = run_event | |||
self._logger = logging.getLogger("%s.%s" % | |||
(__name__, self.__class__.__name__)) | |||
self._logger.info("Starting.") | |||
super(GitHubCrawler, self).__init__(name=self.__class__.__name__) | |||
def run(self): | |||
""" | |||
Query the GitHub API for data about every public repository. | |||
Pull all of GitHub's repositories by making calls to its API in a loop, | |||
accessing a subsequent page of results via the "next" URL returned in an | |||
API response header. Uses Severyn Kozak's (sevko) authentication | |||
credentials. For every new repository, a :class:`GitRepository` is | |||
inserted into :attr:`self.clone_queue`. | |||
""" | |||
next_api_url = "https://api.github.com/repositories" | |||
api_request_interval = 5e3 / 60 ** 2 | |||
while next_api_url and self.run_event.is_set(): | |||
start_time = time.time() | |||
try: | |||
resp = requests.get(next_api_url, params=self.AUTHENTICATION) | |||
except requests.ConnectionError: | |||
self._logger.exception("API %s call failed:" % next_api_url) | |||
time.sleep(0.5) | |||
continue | |||
queue_percent_full = (float(self.clone_queue.qsize()) / | |||
self.clone_queue.maxsize) * 100 | |||
self._logger.info("API call made. Queue size: %d/%d, %d%%." % | |||
((self.clone_queue.qsize(), self.clone_queue.maxsize, | |||
queue_percent_full))) | |||
repo_names = [repo["full_name"] for repo in resp.json()] | |||
repo_ranks = self.get_ranks(repo_names) | |||
for repo in resp.json(): | |||
while self.clone_queue.full(): | |||
time.sleep(1) | |||
self.clone_queue.put(indexer.GitRepository( | |||
repo["html_url"], repo["full_name"], "GitHub", | |||
repo_ranks[repo["full_name"]])) | |||
if int(resp.headers["x-ratelimit-remaining"]) == 0: | |||
time.sleep(int(resp.headers["x-ratelimit-reset"]) - | |||
time.time()) | |||
next_api_url = resp.headers["link"].split(">")[0][1:] | |||
sleep_time = api_request_interval - (time.time() - start_time) | |||
if sleep_time > 0: | |||
time.sleep(sleep_time) | |||
@classmethod | |||
def get_ranks(cls, repo_names): | |||
""" | |||
Return the ranks for several repositories. | |||
Queries the GitHub API for the number of stargazers for any given | |||
repositories, and blocks if the query limit is exceeded. The rank is | |||
calculated using these numbers. | |||
:param repo_names: An array of repository names, in | |||
`username/repository_name` format. | |||
:type repo_names: str | |||
:return: A dictionary mapping repository names to ranks. | |||
Example dictionary: | |||
.. code-block:: python | |||
{ | |||
"user/repository" : 0.2564949357461537 | |||
} | |||
:rtype: dictionary | |||
""" | |||
API_URL = "https://api.github.com/search/repositories" | |||
REPOS_PER_QUERY = 25 | |||
repo_ranks = {} | |||
for names in [repo_names[ind:ind + REPOS_PER_QUERY] for ind in | |||
xrange(0, len(repo_names), REPOS_PER_QUERY)]: | |||
query_url = "%s?q=%s" % (API_URL, | |||
"+".join("repo:%s" % name for name in names)) | |||
params = cls.AUTHENTICATION | |||
resp = requests.get(query_url, | |||
params=params, | |||
headers={ | |||
"Accept" : "application/vnd.github.preview" | |||
}) | |||
if int(resp.headers["x-ratelimit-remaining"]) == 0: | |||
sleep_time = int(resp.headers["x-ratelimit-reset"]) - \ | |||
time.time() + 1 | |||
if sleep_time > 0: | |||
logging.info("API quota exceeded. Sleep time: %d." % | |||
sleep_time) | |||
time.sleep(sleep_time) | |||
for repo in resp.json()["items"]: | |||
stars = repo["stargazers_count"] | |||
rank = min(math.log(max(stars, 1), 5000), 1.0) | |||
repo_ranks[repo["full_name"]] = rank | |||
for name in repo_names: | |||
if name not in repo_ranks: | |||
repo_ranks[name] = 0.1 | |||
return repo_ranks | |||
class BitbucketCrawler(threading.Thread): | |||
""" | |||
Crawler that retrieves links to all of Bitbucket's public repositories. | |||
BitbucketCrawler is a threaded singleton that queries Bitbucket's API for | |||
urls to its public repositories, and inserts them as | |||
:class:`indexer.GitRepository` into a :class:`Queue.Queue` shared with | |||
:class:`indexer.GitIndexer`. | |||
:ivar clone_queue: (:class:`Queue.Queue`) The shared queue to insert | |||
:class:`indexer.GitRepository` repository urls into. | |||
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object. | |||
""" | |||
def __init__(self, clone_queue, run_event): | |||
""" | |||
Create an instance of the singleton `BitbucketCrawler`. | |||
:param clone_queue: see :attr:`self.clone_queue` | |||
:type clone_queue: see :attr:`self.clone_queue` | |||
""" | |||
self.clone_queue = clone_queue | |||
self.run_event = run_event | |||
self._logger = logging.getLogger("%s.%s" % | |||
(__name__, self.__class__.__name__)) | |||
self._logger.info("Starting.") | |||
super(BitbucketCrawler, self).__init__(name=self.__class__.__name__) | |||
def run(self): | |||
""" | |||
Query the Bitbucket API for data about every public repository. | |||
Query the Bitbucket API's "/repositories" endpoint and read its | |||
paginated responses in a loop; any "git" repositories have their | |||
clone-urls and names inserted into a :class:`indexer.GitRepository` in | |||
:attr:`self.clone_queue`. | |||
""" | |||
next_api_url = "https://api.bitbucket.org/2.0/repositories" | |||
while self.run_event.is_set(): | |||
try: | |||
response = requests.get(next_api_url).json() | |||
except requests.ConnectionError: | |||
self._logger.exception("API %s call failed:", next_api_url) | |||
time.sleep(0.5) | |||
continue | |||
queue_percent_full = (float(self.clone_queue.qsize()) / | |||
self.clone_queue.maxsize) * 100 | |||
self._logger.info("API call made. Queue size: %d/%d, %d%%." % | |||
((self.clone_queue.qsize(), self.clone_queue.maxsize, | |||
queue_percent_full))) | |||
for repo in response["values"]: | |||
if repo["scm"] == "git": | |||
while self.clone_queue.full(): | |||
time.sleep(1) | |||
clone_links = repo["links"]["clone"] | |||
clone_url = (clone_links[0]["href"] if | |||
clone_links[0]["name"] == "https" else | |||
clone_links[1]["href"]) | |||
try: | |||
watchers = requests.get( | |||
repo["links"]["watchers"]["href"]) | |||
num = len(watchers.json()["values"]) | |||
rank = min(math.log(max(num, 1), 500), 1.0) | |||
except requests.ConnectionError: | |||
err = "API %s call failed:" % next_api_url | |||
self._logger.exception(err) | |||
time.sleep(0.5) | |||
continue | |||
self.clone_queue.put(indexer.GitRepository( | |||
clone_url, repo["full_name"], "Bitbucket"), rank) | |||
next_api_url = response["next"] | |||
time.sleep(0.2) |
@@ -0,0 +1,348 @@ | |||
""" | |||
:synopsis: Contains a singleton GitIndexer class, which clones and indexes git | |||
repositories. | |||
""" | |||
from datetime import datetime | |||
import logging | |||
import os | |||
import Queue | |||
import shutil | |||
import string | |||
import time | |||
import threading | |||
from bs4 import UnicodeDammit | |||
import git | |||
from ..database import Database | |||
from ..parser import parse, UnsupportedFileError | |||
from ..codelet import Codelet | |||
GIT_CLONE_DIR = "/tmp/bitshift" | |||
THREAD_QUEUE_SLEEP = 0.5 | |||
MAX_INDEX_QUEUE_SIZE = 10 | |||
class GitRepository(object): | |||
""" | |||
A representation of a Git repository's metadata. | |||
:ivar url: (str) The repository's url. | |||
:ivar name: (str) The name of the repository. | |||
:ivar framework_name: (str) The name of the online Git framework that the | |||
repository belongs to (eg, GitHub, BitBucket). | |||
:ivar rank: (float) The rank of the repository, as assigned by | |||
:class:`crawler.GitHubCrawler`. | |||
:ivar path: (str) The repository's on-disk directory path. | |||
:ivar repo: (git.Repo) A git.Repo representation of the repository. | |||
""" | |||
def __init__(self, url, name, framework_name, rank): | |||
""" | |||
Create a GitRepository instance. | |||
:param url: see :attr:`GitRepository.url` | |||
:param name: see :attr:`GitRepository.name` | |||
:param framework_name: see :attr:`GitRepository.framework_name` | |||
:param rank: see :attr:`GitRepository.rank` | |||
:type url: str | |||
:type name: str | |||
:type framework_name: str | |||
:type rank: float | |||
""" | |||
self.url = url | |||
self.name = name | |||
self.framework_name = framework_name | |||
self.rank = rank | |||
dirname = name.replace("/", "-") + "-" + str(int(time.time())) | |||
self.path = os.path.join(GIT_CLONE_DIR, dirname) | |||
self.repo = None | |||
class GitIndexer(threading.Thread): | |||
""" | |||
A singleton Git repository indexer. | |||
:class:`GitIndexer` indexes the repositories cloned by the | |||
:class:`_GitCloner` singleton. | |||
:ivar index_queue: (:class:`Queue.Queue`) A queue containing | |||
:class:`GitRepository` objects for every new repository succesfully | |||
cloned by :class:`_GitCloner`, which are to be indexed. | |||
:ivar git_cloner: (:class:`_GitCloner`) The corresponding repository | |||
cloner, which feeds :class:`GitIndexer`. | |||
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object. | |||
""" | |||
def __init__(self, clone_queue, run_event): | |||
""" | |||
Create an instance of the singleton `GitIndexer`. | |||
:param clone_queue: see :attr:`self.index_queue` | |||
:type index_queue: see :attr:`self.index_queue` | |||
""" | |||
self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE) | |||
self.run_event = run_event | |||
self.git_cloner = _GitCloner(clone_queue, self.index_queue, run_event) | |||
self.git_cloner.start() | |||
self.database = Database() | |||
self._logger = logging.getLogger("%s.%s" % | |||
(__name__, self.__class__.__name__)) | |||
self._logger.info("Starting.") | |||
if not os.path.exists(GIT_CLONE_DIR): | |||
os.makedirs(GIT_CLONE_DIR) | |||
super(GitIndexer, self).__init__(name=self.__class__.__name__) | |||
def run(self): | |||
""" | |||
Retrieve metadata about newly cloned repositories and index them. | |||
Blocks until new repositories appear in :attr:`self.index_queue`, then | |||
retrieves one, and attempts indexing it. Should any errors occur, the | |||
new repository will be discarded and the indexer will index the next in | |||
the queue. | |||
""" | |||
while True: | |||
while self.index_queue.empty() and self.run_event.is_set(): | |||
time.sleep(THREAD_QUEUE_SLEEP) | |||
if not self.run_event.is_set(): | |||
break | |||
repo = self.index_queue.get() | |||
self.index_queue.task_done() | |||
self._index_repository(repo) | |||
def _index_repository(self, repo): | |||
""" | |||
Clone and index (create and insert Codeletes for) a Git repository. | |||
`git clone` the Git repository located at **repo.url**, call | |||
`_insert_repository_codelets()`, then remove said repository. | |||
:param repo: The metadata of the repository to be indexed. | |||
:type repo: :class:`GitRepository` | |||
""" | |||
self._logger.info(u"Indexing repo: %s", repo.name) | |||
try: | |||
self._insert_repository_codelets(repo) | |||
except Exception: | |||
self._logger.exception("Exception raised while indexing:") | |||
finally: | |||
if os.path.isdir(repo.path): | |||
shutil.rmtree(repo.path) | |||
def _insert_repository_codelets(self, repo): | |||
""" | |||
Create and insert a Codelet for the files inside a Git repository. | |||
Create a new Codelet, and insert it into the Database singleton, for | |||
every file inside the current working directory's default branch | |||
(usually *master*). | |||
:param repo_url: The metadata of the repository to be indexed. | |||
:type repo_url: :class:`GitRepository` | |||
""" | |||
file_meta = self._get_file_metadata(repo.repo) | |||
if file_meta is None: | |||
return | |||
for filename, data in file_meta.iteritems(): | |||
name = ("%s: %s" % (repo.name, filename)).encode("utf8") | |||
authors = [(author, None) for author in data["authors"]] | |||
encoded_source = data["blob"].data_stream.read() | |||
source = UnicodeDammit(encoded_source).unicode_markup | |||
url = self._generate_file_url(filename, repo) | |||
codelet = Codelet(name, source, filename, None, authors, url, | |||
data["time_created"], data["time_last_modified"], | |||
repo.rank) | |||
self._logger.debug("Indexing file: %s", codelet.name) | |||
try: | |||
parse(codelet) | |||
except UnsupportedFileError: | |||
continue | |||
except Exception: | |||
self._logger.exception("Exception raised while parsing:") | |||
self.database.insert(codelet) | |||
def _generate_file_url(self, filename, repo): | |||
""" | |||
Return a url for a filename from a Git wrapper framework. | |||
:param filename: The path of the file. | |||
:param repo: The git repo. | |||
:type filename: str | |||
:type repo: :class:`GitRepository` | |||
:return: The file's full url on the given framework, if successfully | |||
derived. | |||
:rtype: str, or None | |||
""" | |||
if repo.framework_name == "GitHub": | |||
default_branch = repo.repo.active_branch.name | |||
parts = [repo.url, "blob", default_branch, filename] | |||
elif repo.framework_name == "Bitbucket": | |||
try: | |||
commit_hash = repo.repo.head.commit.hexsha | |||
except ValueError: # No commits | |||
return None | |||
parts = [repo.url, "src", commit_hash, filename] | |||
return "/".join(s.strip("/") for s in parts) | |||
def _get_file_metadata(self, repo): | |||
""" | |||
Return a dictionary containing every valuable tracked file's metadata. | |||
:return: A dictionary with author names, time of creation, and time of | |||
last modification for every filename key. | |||
.. code-block:: python | |||
sample_returned_dict = { | |||
"my_file" : { | |||
"blob": (GitPython Blob) <object>, | |||
"authors" : (str list) ["author1", "author2"], | |||
"time_created" : (`datetime.datetime`) <object>, | |||
"time_last_modified" : (`datetime.datetime`) <object> | |||
} | |||
} | |||
:rtype: dictionary of dictionaries | |||
""" | |||
try: | |||
tree = repo.head.commit.tree | |||
except ValueError: # No commits | |||
return {} | |||
files = {} | |||
self._logger.debug("Building file metadata") | |||
for item in tree.traverse(): | |||
if item.type != "blob" or not self._is_ascii(item.data_stream): | |||
continue | |||
log = repo.git.log("--follow", '--format=%an %ct', "--", item.path) | |||
lines = log.splitlines() | |||
authors = {line.rsplit(" ", 1)[0].decode("utf8") for line in lines} | |||
last_mod = int(lines[0].rsplit(" ", 1)[1]) | |||
created = int(lines[-1].rsplit(" ", 1)[1]) | |||
files[item.path] = { | |||
"blob": item, | |||
"authors" : authors, | |||
"time_last_modified": datetime.fromtimestamp(last_mod), | |||
"time_created": datetime.fromtimestamp(created) | |||
} | |||
return files | |||
def _is_ascii(self, source): | |||
""" | |||
Heuristically determine whether a file is ASCII text or binary. | |||
If a portion of the file contains null bytes, or the percentage of bytes | |||
that aren't ASCII is greater than 30%, then the file is concluded to be | |||
binary. This heuristic is used by the `file` utility, Perl's inbuilt `-T` | |||
operator, and is the de-facto method for in : passdetermining whether a | |||
file is ASCII. | |||
:param source: The file object to test. | |||
:type source: `file` | |||
:return: Whether the file is probably ASCII. | |||
:rtype: Boolean | |||
""" | |||
file_snippet = source.read(512) | |||
if not file_snippet: | |||
return True | |||
ascii_characters = "".join(map(chr, range(32, 127)) + | |||
list("\n\r\t\b")) | |||
null_trans = string.maketrans("", "") | |||
if "\0" in file_snippet: | |||
return False | |||
non_ascii = file_snippet.translate(null_trans, ascii_characters) | |||
return not float(len(non_ascii)) / len(file_snippet) > 0.30 | |||
class _GitCloner(threading.Thread): | |||
""" | |||
A singleton Git repository cloner. | |||
Clones the repositories crawled by :class:`crawler.GitHubCrawler` for | |||
:class:`GitIndexer` to index. | |||
:ivar clone_queue: (:class:`Queue.Queue`) see | |||
:attr:`crawler.GitHubCrawler.clone_queue`. | |||
:ivar index_queue: (:class:`Queue.Queue`) see | |||
:attr:`GitIndexer.index_queue`. | |||
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object. | |||
""" | |||
def __init__(self, clone_queue, index_queue, run_event): | |||
""" | |||
Create an instance of the singleton :class:`_GitCloner`. | |||
:param clone_queue: see :attr:`self.clone_queue` | |||
:param index_queue: see :attr:`self.index_queue` | |||
:type clone_queue: see :attr:`self.clone_queue` | |||
:type index_queue: see :attr:`self.index_queue` | |||
""" | |||
self.clone_queue = clone_queue | |||
self.index_queue = index_queue | |||
self.run_event = run_event | |||
self._logger = logging.getLogger("%s.%s" % | |||
(__name__, self.__class__.__name__)) | |||
self._logger.info("Starting.") | |||
super(_GitCloner, self).__init__(name=self.__class__.__name__) | |||
def run(self): | |||
""" | |||
Retrieve metadata about newly crawled repositories and clone them. | |||
Blocks until new :class:`GitRepository` appear in | |||
:attr:`self.clone_queue`, then attempts cloning them. If | |||
succcessful, the cloned repository is added to :attr:`self.index_queue` | |||
for the `GitIndexer` to clone; otherwise, it is discarded. | |||
""" | |||
while True: | |||
while self.clone_queue.empty() and self.run_event.is_set(): | |||
time.sleep(THREAD_QUEUE_SLEEP) | |||
if not self.run_event.is_set(): | |||
break | |||
repo = self.clone_queue.get() | |||
self.clone_queue.task_done() | |||
try: | |||
self._clone_repository(repo) | |||
except Exception: | |||
self._logger.exception("Exception raised while cloning:") | |||
def _clone_repository(self, repo): | |||
""" | |||
Attempt cloning a Git repository. | |||
:param repo: Metadata about the repository to clone. | |||
:type repo: :class:`GitRepository` | |||
""" | |||
self._logger.info("Cloning repo: %s", repo.url) | |||
repo.repo = git.Repo.clone_from(repo.url, to_path=repo.path, bare=True, | |||
single_branch=True) | |||
while self.index_queue.full() and self.run_event.is_set(): | |||
time.sleep(THREAD_QUEUE_SLEEP) | |||
if self.run_event.is_set(): | |||
self.index_queue.put(repo) |
@@ -0,0 +1,239 @@ | |||
""" | |||
Subpackage with classes and functions to handle communication with the MySQL | |||
database backend, which manages the search index. | |||
""" | |||
import codecs | |||
import os | |||
import mmh3 | |||
import oursql | |||
from .migration import VERSION, MIGRATIONS | |||
from ..codelet import Codelet | |||
from ..query.nodes import (String, Regex, Text, Language, Author, Date, Symbol, | |||
BinaryOp, UnaryOp) | |||
__all__ = ["Database"] | |||
class Database(object): | |||
"""Represents the MySQL database.""" | |||
def __init__(self, migrate=False): | |||
self._conn = self._connect() | |||
self._check_version(migrate) | |||
def _connect(self): | |||
"""Establish a connection to the database.""" | |||
try: | |||
codecs.lookup("utf8mb4") | |||
except LookupError: | |||
utf8 = codecs.lookup("utf8") | |||
codecs.register(lambda name: utf8 if name == "utf8mb4" else None) | |||
root = os.path.dirname(os.path.abspath(__file__)) | |||
default_file = os.path.join(root, ".my.cnf") | |||
return oursql.connect( | |||
db="bitshift", read_default_file=default_file, autoping=True, | |||
autoreconnect=True, charset="utf8mb4") | |||
def _migrate(self, cursor, current): | |||
"""Migrate the database to the latest schema version.""" | |||
for version in xrange(current, VERSION): | |||
print "Migrating to %d..." % (version + 1) | |||
for query in MIGRATIONS[version - 1]: | |||
cursor.execute(query) | |||
cursor.execute("UPDATE version SET version = ?", (version + 1,)) | |||
def _check_version(self, migrate): | |||
"""Check the database schema version and respond accordingly. | |||
If the schema is out of date, migrate if *migrate* is True, else raise | |||
an exception. | |||
""" | |||
with self._conn.cursor() as cursor: | |||
cursor.execute("SELECT version FROM version") | |||
version = cursor.fetchone()[0] | |||
if version < VERSION: | |||
if migrate: | |||
self._migrate(cursor, version) | |||
else: | |||
err = "Database schema out of date. " \ | |||
"Run `python -m bitshift.database.migration`." | |||
raise RuntimeError(err) | |||
def _search_with_query(self, cursor, tree, page): | |||
"""Execute an SQL query based on a query tree, and return results. | |||
The returned data is a 2-tuple of (list of codelet IDs, estimated | |||
number of total results). | |||
""" | |||
query, args = tree.build_query(page) | |||
cursor.execute(query, args) | |||
ids = [cid for cid, _ in cursor.fetchall()] | |||
num_results = len(ids) # TODO: This is not entirely correct | |||
return ids, num_results | |||
def _get_authors_for_codelet(self, cursor, codelet_id): | |||
"""Return a list of authors for a given codelet.""" | |||
query = """SELECT author_name, author_url | |||
FROM authors | |||
WHERE author_codelet = ?""" | |||
cursor.execute(query, (codelet_id,)) | |||
return cursor.fetchall() | |||
def _get_symbols_for_code(self, cursor, code_id, tree): | |||
"""Return a list of symbols for a given codelet.""" | |||
query = """SELECT symbol_type, symbol_name, sloc_type, sloc_row, | |||
sloc_col, sloc_end_row, sloc_end_col | |||
FROM symbols | |||
INNER JOIN symbol_locations ON sloc_symbol = symbol_id | |||
WHERE symbol_code = ? AND (%s)""" | |||
conds, args = [], [code_id] | |||
for node in tree.walk(Symbol): | |||
node_cond, node_args, _, _ = node.parameterize(set()) | |||
conds.append(node_cond) | |||
args += node_args | |||
if not conds: | |||
return {} | |||
cond = " OR ".join(conds) | |||
symbols = {type_: {} for type_ in Symbol.TYPES} | |||
cursor.execute(query % cond, tuple(args)) | |||
for type_, name, loc_type, row, col, erow, ecol in cursor.fetchall(): | |||
sdict = symbols[Symbol.TYPES[type_]] | |||
if name not in sdict: | |||
sdict[name] = ([], []) | |||
sdict[name][loc_type].append((row, col, erow, ecol)) | |||
for type_, sdict in symbols.items(): | |||
symbols[type_] = [(n, d, u) for n, (d, u) in sdict.iteritems()] | |||
return symbols | |||
def _get_codelets_from_ids(self, cursor, ids, tree): | |||
"""Return a list of Codelet objects given a list of codelet IDs.""" | |||
query = """SELECT * | |||
FROM codelets | |||
INNER JOIN code ON codelet_code_id = code_id | |||
INNER JOIN origins ON codelet_origin = origin_id | |||
WHERE codelet_id = ?""" | |||
with self._conn.cursor(oursql.DictCursor) as dict_cursor: | |||
for codelet_id in ids: | |||
dict_cursor.execute(query, (codelet_id,)) | |||
row = dict_cursor.fetchall()[0] | |||
code_id = row["code_id"] | |||
if row["origin_url_base"]: | |||
url = row["origin_url_base"] + row["codelet_url"] | |||
else: | |||
url = row["codelet_url"] | |||
origin = (row["origin_name"], row["origin_url"]) | |||
authors = self._get_authors_for_codelet(cursor, codelet_id) | |||
symbols = self._get_symbols_for_code(cursor, code_id, tree) | |||
yield Codelet( | |||
row["codelet_name"], row["code_code"], None, | |||
row["code_lang"], authors, url, | |||
row["codelet_date_created"], row["codelet_date_modified"], | |||
row["codelet_rank"], symbols, origin) | |||
def _decompose_url(self, cursor, url): | |||
"""Break up a URL into an origin (with a URL base) and a suffix.""" | |||
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base) + 1) | |||
FROM origins | |||
WHERE origin_url_base IS NOT NULL | |||
AND ? LIKE CONCAT(origin_url_base, "%")""" | |||
cursor.execute(query, (url, url)) | |||
result = cursor.fetchone() | |||
return result if result else (1, url) | |||
def _insert_symbols(self, cursor, code_id, sym_type, symbols): | |||
"""Insert a list of symbols of a given type into the database.""" | |||
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)" | |||
query2 = """INSERT INTO symbol_locations VALUES | |||
(DEFAULT, ?, ?, ?, ?, ?, ?)""" | |||
build = lambda id, L, typ: [tuple([id, typ] + list(loc)) for loc in L] | |||
type_id = Symbol.TYPES.index(sym_type) | |||
for (name, defs, uses) in symbols: | |||
cursor.execute(query1, (code_id, type_id, name)) | |||
sym_id = cursor.lastrowid | |||
params = (build(sym_id, defs, Symbol.DEFINE) + | |||
build(sym_id, uses, Symbol.USE)) | |||
cursor.executemany(query2, params) | |||
def close(self): | |||
"""Disconnect from the database.""" | |||
self._conn.close() | |||
def search(self, tree, page=1): | |||
""" | |||
Search the database for a query and return the *n*\ th page of results. | |||
:param tree: The query to search for. | |||
:type tree: :py:class:`~.query.tree.Tree` | |||
:param page: The result page to display. | |||
:type page: int | |||
:return: The total number of results, and the *n*\ th page of results. | |||
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s) | |||
""" | |||
query1 = "SELECT 1 FROM cache WHERE cache_id = ?" | |||
query2 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp | |||
FROM cache | |||
INNER JOIN cache_data ON cache_id = cdata_cache | |||
WHERE cache_id = ? | |||
ORDER BY cdata_index ASC""" | |||
query3 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)" | |||
query4 = "INSERT INTO cache_data VALUES (?, ?, ?)" | |||
cache_id = mmh3.hash64(str(page) + ":" + tree.serialize())[0] | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (cache_id,)) | |||
cache_hit = cursor.fetchall() | |||
if cache_hit: | |||
cursor.execute(query2, (cache_id,)) | |||
rows = cursor.fetchall() | |||
num_results = rows[0][1] * (10 ** rows[0][2]) if rows else 0 | |||
ids = [row[0] for row in rows] | |||
else: | |||
ids, num_results = self._search_with_query(cursor, tree, page) | |||
num_exp = max(len(str(num_results)) - 3, 0) | |||
num_results = int(round(num_results, -num_exp)) | |||
num_mnt = num_results / (10 ** num_exp) | |||
cursor.execute(query3, (cache_id, num_mnt, num_exp)) | |||
cdata = [(cache_id, c_id, i) for i, c_id in enumerate(ids)] | |||
cursor.executemany(query4, cdata) | |||
codelet_gen = self._get_codelets_from_ids(cursor, ids, tree) | |||
return (num_results, list(codelet_gen)) | |||
def insert(self, codelet): | |||
""" | |||
Insert a codelet into the database. | |||
:param codelet: The codelet to insert. | |||
:type codelet: :py:class:`.Codelet` | |||
""" | |||
query1 = """INSERT INTO code VALUES (?, ?, ?) | |||
ON DUPLICATE KEY UPDATE code_id=code_id""" | |||
query2 = """INSERT INTO codelets VALUES | |||
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)""" | |||
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)" | |||
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8") | |||
code_id = mmh3.hash64(hash_key)[0] | |||
with self._conn.cursor() as cursor: | |||
cursor.execute(query1, (code_id, codelet.language, codelet.code)) | |||
if cursor.rowcount == 1: | |||
for sym_type, symbols in codelet.symbols.iteritems(): | |||
self._insert_symbols(cursor, code_id, sym_type, symbols) | |||
origin, url = self._decompose_url(cursor, codelet.url) | |||
cursor.execute(query2, (codelet.name, code_id, origin, url, | |||
codelet.rank, codelet.date_created, | |||
codelet.date_modified)) | |||
codelet_id = cursor.lastrowid | |||
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors] | |||
cursor.executemany(query3, authors) |
@@ -0,0 +1,147 @@ | |||
""" | |||
Contains information about database schema versions, and SQL queries to update | |||
between them. | |||
""" | |||
VERSION = 12 | |||
MIGRATIONS = [ | |||
# 1 -> 2 | |||
[ | |||
"""ALTER TABLE `codelets` | |||
DROP FOREIGN KEY `codelets_ibfk_1`""", | |||
"""ALTER TABLE `code` | |||
DROP KEY `code_hash`, | |||
DROP COLUMN `code_hash`, | |||
MODIFY COLUMN `code_id` BIGINT NOT NULL""", | |||
"""ALTER TABLE `codelets` | |||
MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL, | |||
ADD KEY (`codelet_lang`), | |||
ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE RESTRICT ON UPDATE CASCADE""", | |||
"""ALTER TABLE `symbols` | |||
ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL, | |||
ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL""" | |||
], | |||
# 2 -> 3 | |||
[ | |||
"""ALTER TABLE `symbols` | |||
DROP FOREIGN KEY `symbols_ibfk_1`, | |||
CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL, | |||
ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE""" | |||
], | |||
# 3 -> 4 | |||
[ | |||
"""ALTER TABLE `symbols` | |||
DROP COLUMN `symbol_row`, | |||
DROP COLUMN `symbol_col`, | |||
DROP COLUMN `symbol_end_row`, | |||
DROP COLUMN `symbol_end_col`""", | |||
"""CREATE TABLE `symbol_locations` ( | |||
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`sloc_symbol` BIGINT UNSIGNED NOT NULL, | |||
`sloc_type` TINYINT UNSIGNED NOT NULL, | |||
`sloc_row` INT UNSIGNED NOT NULL, | |||
`sloc_col` INT UNSIGNED NOT NULL, | |||
`sloc_end_row` INT UNSIGNED NOT NULL, | |||
`sloc_end_col` INT UNSIGNED NOT NULL, | |||
PRIMARY KEY (`sloc_id`), | |||
FOREIGN KEY (`sloc_symbol`) | |||
REFERENCES `symbols` (`symbol_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB""" | |||
], | |||
# 4 -> 5 | |||
[ | |||
"""ALTER TABLE `origins` | |||
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL, | |||
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL, | |||
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL""" | |||
], | |||
# 5 -> 6 | |||
[ | |||
"""ALTER TABLE `code` | |||
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL | |||
AFTER `code_id`, | |||
ADD KEY (`code_lang`)""", | |||
"""ALTER TABLE `codelets` | |||
DROP KEY `codelet_lang`, | |||
DROP COLUMN `codelet_lang`""", | |||
"""ALTER TABLE `cache_data` | |||
DROP FOREIGN KEY `cache_data_ibfk_1`""", | |||
"""ALTER TABLE `cache` | |||
MODIFY COLUMN `cache_id` BIGINT NOT NULL, | |||
DROP COLUMN `cache_hash`, | |||
DROP COLUMN `cache_last_used`, | |||
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""", | |||
"""ALTER TABLE `cache_data` | |||
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL, | |||
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE""", | |||
"""CREATE EVENT `flush_cache` | |||
ON SCHEDULE EVERY 1 HOUR | |||
DO | |||
DELETE FROM `cache` | |||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);""" | |||
], | |||
# 6 -> 7 | |||
[ | |||
"""DELETE FROM `cache`""", | |||
"""ALTER TABLE `cache_data` | |||
ADD COLUMN `cdata_index` TINYINT UNSIGNED NOT NULL | |||
AFTER `cdata_codelet`""" | |||
], | |||
# 7 -> 8 | |||
[ | |||
"""ALTER TABLE `origins` | |||
DROP COLUMN `origin_image`""" | |||
], | |||
# 8 -> 9 | |||
[ | |||
"""DELIMITER // | |||
CREATE PROCEDURE `empty_database`() | |||
BEGIN | |||
DELETE FROM `codelets`; | |||
DELETE FROM `code`; | |||
DELETE FROM `cache`; | |||
ALTER TABLE `codelets` AUTO_INCREMENT = 1; | |||
ALTER TABLE `authors` AUTO_INCREMENT = 1; | |||
ALTER TABLE `symbols` AUTO_INCREMENT = 1; | |||
ALTER TABLE `symbol_locations` AUTO_INCREMENT = 1; | |||
END// | |||
DELIMITER ;""" | |||
], | |||
# 9 -> 10 | |||
[ | |||
"""ALTER TABLE `symbol_locations` | |||
MODIFY COLUMN `sloc_col` INT UNSIGNED DEFAULT NULL, | |||
MODIFY COLUMN `sloc_end_row` INT UNSIGNED DEFAULT NULL, | |||
MODIFY COLUMN `sloc_end_col` INT UNSIGNED DEFAULT NULL""" | |||
], | |||
# 10 -> 11 | |||
[ | |||
"""ALTER DATABASE `bitshift` | |||
CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci""" | |||
], | |||
# 11 -> 12 | |||
[ | |||
"""CREATE TABLE `stopwords` ( | |||
`value` varchar(18) NOT NULL DEFAULT "" | |||
) ENGINE=InnoDB DEFAULT CHARSET=utf8""", | |||
"""INSERT INTO `stopwords` VALUES | |||
("a"), ("about"), ("an"), ("are"), ("as"), ("at"), ("be"), ("by"), | |||
("how"), ("i"), ("it"), ("la"), ("of"), ("on"), ("that"), ("the"), | |||
("to"), ("und"), ("was"), ("what"), ("when"), ("where"), ("who"), | |||
("will")""" | |||
] | |||
] | |||
if __name__ == "__main__": | |||
from . import Database | |||
Database(migrate=True).close() |
@@ -0,0 +1,141 @@ | |||
-- Schema version 12 | |||
CREATE DATABASE `bitshift` | |||
DEFAULT CHARACTER SET utf8mb4 | |||
COLLATE utf8mb4_unicode_ci; | |||
USE `bitshift`; | |||
CREATE TABLE `version` ( | |||
`version` INT UNSIGNED NOT NULL | |||
) ENGINE=InnoDB; | |||
INSERT INTO `version` VALUES (12); | |||
CREATE TABLE `stopwords` | |||
LIKE information_schema.innodb_ft_default_stopword | |||
ENGINE=InnoDB; | |||
CREATE TABLE `stopwords` ( | |||
`value` varchar(18) NOT NULL DEFAULT "" | |||
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |||
INSERT INTO `stopwords` VALUES | |||
("a"), ("about"), ("an"), ("are"), ("as"), ("at"), ("be"), ("by"), ("how"), | |||
("i"), ("it"), ("la"), ("of"), ("on"), ("that"), ("the"), ("to"), ("und"), | |||
("was"), ("what"), ("when"), ("where"), ("who"), ("will"); | |||
CREATE TABLE `origins` ( | |||
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`origin_name` VARCHAR(64) DEFAULT NULL, | |||
`origin_url` VARCHAR(512) DEFAULT NULL, | |||
`origin_url_base` VARCHAR(512) DEFAULT NULL, | |||
PRIMARY KEY (`origin_id`) | |||
) ENGINE=InnoDB; | |||
INSERT INTO `origins` VALUES (1, NULL, NULL, NULL); | |||
CREATE TABLE `code` ( | |||
`code_id` BIGINT NOT NULL, | |||
`code_lang` SMALLINT UNSIGNED DEFAULT NULL, | |||
`code_code` MEDIUMTEXT NOT NULL, | |||
PRIMARY KEY (`code_id`), | |||
KEY (`code_lang`), | |||
FULLTEXT KEY (`code_code`) | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `codelets` ( | |||
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`codelet_name` VARCHAR(300) NOT NULL, | |||
`codelet_code_id` BIGINT NOT NULL, | |||
`codelet_origin` TINYINT UNSIGNED NOT NULL, | |||
`codelet_url` VARCHAR(512) NOT NULL, | |||
`codelet_rank` FLOAT NOT NULL, | |||
`codelet_date_created` DATETIME DEFAULT NULL, | |||
`codelet_date_modified` DATETIME DEFAULT NULL, | |||
PRIMARY KEY (`codelet_id`), | |||
FULLTEXT KEY (`codelet_name`), | |||
KEY (`codelet_rank`), | |||
KEY (`codelet_date_created`), | |||
KEY (`codelet_date_modified`), | |||
FOREIGN KEY (`codelet_code_id`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE RESTRICT ON UPDATE CASCADE, | |||
FOREIGN KEY (`codelet_origin`) | |||
REFERENCES `origins` (`origin_id`) | |||
ON DELETE RESTRICT ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `authors` ( | |||
`author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`author_codelet` BIGINT UNSIGNED NOT NULL, | |||
`author_name` VARCHAR(128) NOT NULL, | |||
`author_url` VARCHAR(512) DEFAULT NULL, | |||
PRIMARY KEY (`author_id`), | |||
FULLTEXT KEY (`author_name`), | |||
FOREIGN KEY (`author_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `symbols` ( | |||
`symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`symbol_code` BIGINT NOT NULL, | |||
`symbol_type` TINYINT UNSIGNED NOT NULL, | |||
`symbol_name` VARCHAR(512) NOT NULL, | |||
PRIMARY KEY (`symbol_id`), | |||
KEY (`symbol_type`, `symbol_name`(32)), | |||
FOREIGN KEY (`symbol_code`) | |||
REFERENCES `code` (`code_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `symbol_locations` ( | |||
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, | |||
`sloc_symbol` BIGINT UNSIGNED NOT NULL, | |||
`sloc_type` TINYINT UNSIGNED NOT NULL, | |||
`sloc_row` INT UNSIGNED NOT NULL, | |||
`sloc_col` INT UNSIGNED DEFAULT NULL, | |||
`sloc_end_row` INT UNSIGNED DEFAULT NULL, | |||
`sloc_end_col` INT UNSIGNED DEFAULT NULL, | |||
PRIMARY KEY (`sloc_id`), | |||
FOREIGN KEY (`sloc_symbol`) | |||
REFERENCES `symbols` (`symbol_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `cache` ( | |||
`cache_id` BIGINT NOT NULL, | |||
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL, | |||
`cache_count_exp` TINYINT UNSIGNED NOT NULL, | |||
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
PRIMARY KEY (`cache_id`) | |||
) ENGINE=InnoDB; | |||
CREATE TABLE `cache_data` ( | |||
`cdata_cache` BIGINT NOT NULL, | |||
`cdata_codelet` BIGINT UNSIGNED NOT NULL, | |||
`cdata_index` TINYINT UNSIGNED NOT NULL, | |||
PRIMARY KEY (`cdata_cache`, `cdata_codelet`), | |||
FOREIGN KEY (`cdata_cache`) | |||
REFERENCES `cache` (`cache_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE, | |||
FOREIGN KEY (`cdata_codelet`) | |||
REFERENCES `codelets` (`codelet_id`) | |||
ON DELETE CASCADE ON UPDATE CASCADE | |||
) ENGINE=InnoDB; | |||
DELIMITER // | |||
CREATE PROCEDURE `empty_database`() | |||
BEGIN | |||
DELETE FROM `codelets`; | |||
DELETE FROM `code`; | |||
DELETE FROM `cache`; | |||
ALTER TABLE `codelets` AUTO_INCREMENT = 1; | |||
ALTER TABLE `authors` AUTO_INCREMENT = 1; | |||
ALTER TABLE `symbols` AUTO_INCREMENT = 1; | |||
ALTER TABLE `symbol_locations` AUTO_INCREMENT = 1; | |||
END// | |||
DELIMITER ; | |||
CREATE EVENT `flush_cache` | |||
ON SCHEDULE EVERY 1 HOUR | |||
DO | |||
DELETE FROM `cache` | |||
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY); |
@@ -0,0 +1,22 @@ | |||
from os import path | |||
import yaml | |||
__all__ = ["LANGS", "LANGS_ALL"] | |||
def _load_langs(): | |||
filename = path.join(path.dirname(__file__), "languages.yml") | |||
with open(filename) as fp: | |||
data = yaml.load(fp)["languages"] | |||
langs = [(it.keys()[0] if isinstance(it, dict) else it).encode("utf8") | |||
for it in data] | |||
all_langs = {} | |||
for i, lang in enumerate(data): | |||
if isinstance(lang, dict): | |||
for val in lang.values()[0]: | |||
all_langs[val] = i | |||
else: | |||
all_langs[lang] = i | |||
return langs, all_langs | |||
LANGS, LANGS_ALL = _load_langs() |
@@ -0,0 +1,368 @@ | |||
# A list of programming languages supported by bitshift: | |||
languages: | |||
# With parsers: | |||
- Python: | |||
- Python | |||
- Python 3 | |||
- Python 3.0 Traceback | |||
- Python console session | |||
- Python Traceback | |||
- NumPy | |||
- C | |||
- Java | |||
- Ruby: | |||
- Ruby | |||
- Ruby irb session | |||
# Without parsers: | |||
- ABAP | |||
- APL | |||
- ActionScript: | |||
- ActionScript | |||
- ActionScript 3 | |||
- ANTLR: | |||
- ANTLR | |||
- ANTLR With ActionScript Target | |||
- ANTLR With CPP Target | |||
- "ANTLR With C# Target" | |||
- ANTLR With Java Target | |||
- ANTLR With ObjectiveC Target | |||
- ANTLR With Perl Target | |||
- ANTLR With Python Target | |||
- ANTLR With Ruby Target | |||
- Ada | |||
- Agda: | |||
- Agda | |||
- Literate Agda | |||
- Alloy | |||
- AmbientTalk | |||
- ApacheConf | |||
- AppleScript | |||
- AspectJ | |||
- aspx-cs | |||
- aspx-vb | |||
- Asymptote | |||
- autohotkey | |||
- AutoIt | |||
- Awk | |||
- BBCode | |||
- BUGS | |||
- Bash: | |||
- Bash | |||
- Bash Session | |||
- Batchfile | |||
- Befunge | |||
- BlitzBasic: | |||
- BlitzBasic | |||
- BlitzMax | |||
- Boo | |||
- Brainfuck | |||
- Bro | |||
- "C#" | |||
- C++ | |||
- ca65 | |||
- CBM BASIC V2 | |||
- Ceylon | |||
- CFEngine3 | |||
- cfstatement | |||
- ChaiScript | |||
- Chapel | |||
- Cheetah | |||
- Cirru | |||
- Clay | |||
- Clojure: | |||
- Clojure | |||
- ClojureScript | |||
- CMake | |||
- COBOL: | |||
- COBOL | |||
- COBOLFree | |||
- CoffeeScript | |||
- Coldfusion CFC | |||
- Coldfusion HTML | |||
- Common Lisp | |||
- Coq | |||
- Croc | |||
- Cryptol: | |||
- Cryptol | |||
- Literate Cryptol | |||
- CSS: | |||
- CSS | |||
- CSS+Django/Jinja | |||
- CSS+Genshi Text | |||
- CSS+Lasso | |||
- CSS+Mako | |||
- CSS+Mako | |||
- CSS+Myghty | |||
- CSS+PHP | |||
- CSS+Ruby | |||
- CSS+Smarty | |||
- CUDA | |||
- Cypher | |||
- Cython | |||
- D | |||
- Darcs Patch | |||
- Dart | |||
- Debian Control file | |||
- Debian Sourcelist | |||
- Delphi | |||
- dg | |||
- Diff | |||
- Django/Jinja | |||
- Docker | |||
- DTD | |||
- Duel | |||
- Dylan: | |||
- Dylan | |||
- Dylan session | |||
- DylanLID | |||
- EBNF | |||
- eC | |||
- ECL | |||
- Eiffel | |||
- Elixir: | |||
- Elixir | |||
- Elixir iex session | |||
- Embedded Ragel | |||
- ERB: | |||
- ERB | |||
- RHTML | |||
- Erlang: | |||
- Erlang | |||
- Erlang erl session | |||
- Evoque | |||
- Factor | |||
- Fancy | |||
- Fantom | |||
- Felix | |||
- Fortran | |||
- FoxPro | |||
- FSharp | |||
- GAP | |||
- GAS | |||
- Genshi | |||
- Genshi Text | |||
- Gettext Catalog | |||
- Gherkin | |||
- GLSL | |||
- Gnuplot | |||
- Go | |||
- Golo | |||
- GoodData-CL | |||
- Gosu | |||
- Gosu Template | |||
- Groff | |||
- Groovy | |||
- Haml | |||
- Handlebars | |||
- Haskell: | |||
- Haskell | |||
- Literate Haskell | |||
- Haxe | |||
- HTML: | |||
- HTML | |||
- HTML+Cheetah | |||
- HTML+Django/Jinja | |||
- HTML+Evoque | |||
- HTML+Genshi | |||
- HTML+Lasso | |||
- HTML+Mako | |||
- HTML+Mako | |||
- HTML+Myghty | |||
- HTML+PHP | |||
- HTML+Smarty | |||
- HTML+Velocity | |||
- Hxml | |||
- Hy | |||
- Hybris | |||
- IDL | |||
- Idris: | |||
- Idris | |||
- Literate Idris | |||
- Igor | |||
- Inform 6: | |||
- Inform 6 | |||
- Inform 6 template | |||
- Inform 7 | |||
- INI | |||
- Io | |||
- Ioke | |||
- Jade | |||
- JAGS | |||
- Jasmin | |||
- Java Server Page | |||
- JavaScript: | |||
- JavaScript | |||
- JavaScript+Cheetah | |||
- JavaScript+Django/Jinja | |||
- JavaScript+Genshi Text | |||
- JavaScript+Lasso | |||
- JavaScript+Mak | |||
- JavaScript+Mako | |||
- JavaScript+Myghty | |||
- JavaScript+PHP | |||
- JavaScript+Ruby | |||
- JavaScript+Smarty | |||
- JSON | |||
- Julia: | |||
- Julia | |||
- Julia console | |||
- Kal | |||
- Kconfig | |||
- Koka | |||
- Kotlin | |||
- Lasso | |||
- Lighttpd configuration file | |||
- Limbo | |||
- LiveScript | |||
- LLVM | |||
- Logos | |||
- Logtalk | |||
- LSL | |||
- Lua | |||
- Makefile | |||
- Makefile | |||
- Base Makefile | |||
- Mako | |||
- MAQL | |||
- Mask | |||
- Mason | |||
- Mathematica | |||
- Matlab: | |||
- Matlab | |||
- Matlab session | |||
- MiniD | |||
- Modelica | |||
- Modula-2 | |||
- Monkey | |||
- MOOCode | |||
- MoonScript | |||
- MQL | |||
- Mscgen | |||
- MuPAD | |||
- MXML | |||
- Myghty | |||
- NASM | |||
- Nemerle | |||
- nesC | |||
- NewLisp | |||
- Newspeak | |||
- Nginx configuration file | |||
- Nimrod | |||
- Nix | |||
- NSIS | |||
- Objective-C | |||
- Objective-C++ | |||
- Objective-J | |||
- OCaml | |||
- Octave | |||
- Ooc | |||
- Opa | |||
- OpenEdge ABL | |||
- Pan | |||
- Pawn | |||
- Perl: | |||
- Perl | |||
- Perl6 | |||
- PHP | |||
- Pig | |||
- Pike | |||
- PostScript | |||
- POVRay | |||
- PowerShell | |||
- Prolog | |||
- Properties | |||
- Protocol Buffer | |||
- Puppet | |||
- PyPy Log | |||
- QBasic | |||
- QML | |||
- Racket | |||
- Ragel: | |||
- Ragel | |||
- Ragel in C Host | |||
- Ragel in CPP Host | |||
- Ragel in D Host | |||
- Ragel in Java Host | |||
- Ragel in Objective C Host | |||
- Ragel in Ruby Host | |||
- RConsole | |||
- Rd | |||
- REBOL | |||
- Red | |||
- Redcode | |||
- reg | |||
- reStructuredText | |||
- Rexx | |||
- RobotFramework | |||
- RPMSpec | |||
- RQL | |||
- RSL | |||
- Rust | |||
- S | |||
- Sass: | |||
- Sass | |||
- SCSS | |||
- Scala | |||
- Scalate Server Page | |||
- Scaml | |||
- Scheme | |||
- Scilab | |||
- Shell Session | |||
- Slim | |||
- Smali | |||
- Smalltalk | |||
- Smarty | |||
- Snobol | |||
- SourcePawn | |||
- SPARQL | |||
- SQL: | |||
- SQL | |||
- MySQL | |||
- PL/pgSQL | |||
- PostgreSQL console (psql) | |||
- PostgreSQL SQL dialect | |||
- sqlite3con | |||
- SquidConf | |||
- Stan | |||
- Standard ML | |||
- SWIG | |||
- systemverilog | |||
- Tcl | |||
- Tcsh | |||
- Tea | |||
- TeX | |||
- Todotxt | |||
- Treetop | |||
- TypeScript | |||
- UrbiScript | |||
- Vala | |||
- VB.net | |||
- VCTreeStatus | |||
- Velocity | |||
- verilog | |||
- VGL | |||
- vhdl | |||
- VimL | |||
- XML: | |||
- XML | |||
- XML+Cheetah | |||
- XML+Django/Jinja | |||
- XML+Evoque | |||
- XML+Lasso | |||
- XML+Mako | |||
- XML+Mako | |||
- XML+Myghty | |||
- XML+PHP | |||
- XML+Ruby | |||
- XML+Smarty | |||
- XML+Velocity | |||
- XQuery | |||
- XSLT | |||
- Xtend | |||
- YAML: | |||
- YAML | |||
- YAML+Jinja | |||
- Zephir |
@@ -0,0 +1,90 @@ | |||
import json | |||
import subprocess | |||
from os import path | |||
from pygments import lexers as pgl, util | |||
from ..languages import LANGS, LANGS_ALL | |||
from .python import parse_py | |||
__all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] | |||
# TODO: Change these | |||
PARSER_COMMANDS = { | |||
'Java': ['java', '-cp', | |||
path.join(path.dirname(__file__), "../../parsers/java/parsing.jar"), | |||
'com.bitshift.parsing.Parse'], | |||
'Ruby': ['rake', '-f', | |||
path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"), | |||
'parse'] | |||
} | |||
class UnsupportedFileError(Exception): | |||
pass | |||
def _lang(codelet): | |||
""" | |||
Private function to identify the language of a codelet. | |||
:param codelet: The codelet object to identified. | |||
:type code: Codelet | |||
.. todo:: | |||
Modify function to incorporate tags from stackoverflow. | |||
""" | |||
try: | |||
if codelet.filename: | |||
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) | |||
else: | |||
lex = pgl.guess_lexer(codelet.code) | |||
return LANGS_ALL[lex.name] | |||
except (util.ClassNotFound, KeyError): | |||
raise UnsupportedFileError(codelet.filename) | |||
def parse_via_proc(codelet): | |||
proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]], | |||
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | |||
data = proc.communicate(codelet.code)[0] | |||
symbols = json.loads(data) | |||
return symbols | |||
PARSERS = { | |||
"Python": parse_py, | |||
"Java": parse_via_proc, | |||
"Ruby": parse_via_proc, | |||
} | |||
def parse(codelet): | |||
""" | |||
Dispatches the codelet to the correct parser based on its language. | |||
It is the job of the respective parsers to accumulate data about the | |||
code and to convert it into a string representing a python dict. | |||
The codelet is then given dict as its 'symbols' field. | |||
:param codelet: The codelet object to parsed. | |||
:type code: Codelet | |||
""" | |||
lang = _lang(codelet) | |||
lang_string = LANGS[lang] | |||
codelet.language = lang | |||
def loc_helper(l): | |||
for i in l: | |||
if i == -1: | |||
yield None | |||
else: | |||
yield i | |||
if lang_string in PARSERS: | |||
symbols = PARSERS[lang_string](codelet) | |||
symbols = { | |||
key: [(name, | |||
[tuple(loc_helper(loc)) for loc in syms[name]["assignments"]], | |||
[tuple(loc_helper(loc)) for loc in syms[name]["uses"]]) | |||
for name in syms] | |||
for key, syms in symbols.iteritems()} | |||
codelet.symbols = symbols |
@@ -0,0 +1,106 @@ | |||
from pycparser import c_parser, c_ast | |||
class _TreeCutter(c_ast.NodeVisitor): | |||
""" | |||
Local node visitor for c abstract syntax trees. | |||
:ivar accum: (dict) Information on variables, functions, and structs | |||
accumulated from an abstract syntax tree. | |||
:ivar cache: (dict or None) Information stored about parent nodes. Added | |||
to accum when node reaches the lowest possible level. | |||
.. todo:: | |||
Add visit function for c_ast.ID to record all uses of a variable. | |||
Use self.cache to store extra information about variables. | |||
""" | |||
def __init__(self): | |||
""" | |||
Create a _TreeCutter instance. | |||
""" | |||
self.accum = {'vars': {}, 'functions': {}, 'structs': {}} | |||
self.cache = None | |||
def start_n_end(self, node): | |||
pass | |||
def visit_FuncDecl(self, node): | |||
""" | |||
Visits FuncDecl nodes in a tree. Adds relevant data about them to accum | |||
after visiting all of its children as well. | |||
:param node: The current node. | |||
:type node: c_ast.FuncDecl | |||
.. todo:: | |||
Add other relevant information about functions like parameters and | |||
return type. | |||
""" | |||
self.cache['group'] = 'functions' | |||
self.cache['meta']['end_ln'] = node.coord.line | |||
self.cache['meta']['end_col'] = node.coord.column | |||
self.generic_visit(node) | |||
def visit_Struct(self, node): | |||
""" | |||
Visits Struct nodes in a tree. Adds relevant data about them to accum | |||
after visiting all of its children as well. | |||
:param node: The current node. | |||
:type node: c_ast.Struct | |||
.. todo:: | |||
Find other relevant information to add about structs. | |||
""" | |||
self.cache['group'] = 'structs' | |||
self.cache['meta']['end_ln'] = node.coord.line | |||
self.cache['meta']['end_col'] = node.coord.column | |||
self.generic_visit(node) | |||
def visit_Decl(self, node): | |||
""" | |||
Visits Decl nodes in a tree. Adds relevant data about them to accum | |||
after visiting all of its children as well. | |||
:param node: The current node. | |||
:type node: c_ast.Decl | |||
""" | |||
self.cache = {'group': 'vars', 'meta': {}} | |||
self.cache['meta']['start_ln'] = node.coord.line | |||
self.cache['meta']['start_col'] = node.coord.column | |||
self.cache['meta']['end_ln'] = node.coord.line | |||
self.cache['meta']['end_col'] = node.coord.column | |||
self.generic_visit(node) | |||
self.accum[self.cache['group']][node.name] = self.cache['meta'] | |||
self.cache = None | |||
def parse_c(codelet): | |||
""" | |||
Adds 'symbols' field to the codelet after parsing the c code. | |||
:param codelet: The codelet object to parsed. | |||
:type code: Codelet | |||
.. todo:: | |||
Preprocess c code so that no ParseErrors are thrown. | |||
""" | |||
tree = c_parser.CParser().parse(codelet.code) | |||
cutter = _TreeCutter() | |||
cutter.visit(tree) | |||
codelet.symbols = cutter.accum |
@@ -0,0 +1,217 @@ | |||
import ast | |||
import re | |||
encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) | |||
class _TreeWalker(ast.NodeVisitor): | |||
""" | |||
Local node visitor for python abstract syntax trees. | |||
:ivar symbols: (dict) Information on variables, functions, and classes | |||
symbolsulated from an abstract syntax tree. | |||
:ivar cache: (dict or None) Information stored about parent nodes. Added | |||
to symbols when node reaches the lowest possible level. | |||
.. todo:: | |||
Add visit funciton for ast.Name to record all uses of a variable. | |||
Use self.cache to store extra information about nodes. | |||
""" | |||
def __init__(self): | |||
""" | |||
Create a _TreeCutter instance. | |||
""" | |||
self.symbols = {'vars': {}, 'functions': {}, 'classes': {}} | |||
self.cache = [] | |||
def clear_cache(self): | |||
self.cache = [] | |||
def block_position(self, node): | |||
""" | |||
Helper function to get the start and end lines of an AST node. | |||
:param node: The node. | |||
:type node: ast.FunctionDef or ast.ClassDef or ast.Module | |||
""" | |||
start_line, start_col = node.lineno, node.col_offset | |||
temp_node = node | |||
while 'body' in temp_node.__dict__: | |||
temp_node = temp_node.body[-1] | |||
end_line, end_col = temp_node.lineno, temp_node.col_offset | |||
if start_line == end_line: | |||
return [start_line, start_col, end_line, -1] | |||
return [start_line, start_col, end_line, end_col] | |||
def visit_Assign(self, node): | |||
""" | |||
Visits Assign nodes in a tree. Adds relevant data about them to symbols. | |||
:param node: The current node. | |||
:type node: ast.Assign | |||
.. todo:: | |||
Add value and type metadata to symbols. | |||
""" | |||
pos = self.block_position(node) | |||
for t in node.targets: | |||
self.visit(t) | |||
for name in self.cache: | |||
if not self.symbols['vars'].has_key(name): | |||
self.symbols['vars'][name] = {'assignments': [], 'uses': []} | |||
self.symbols['vars'][name]['assignments'].append(pos) | |||
self.clear_cache() | |||
self.visit(node.value) | |||
for name in self.cache: | |||
if not self.symbols['vars'].has_key(name): | |||
self.symbols['vars'][name] = {'assignments': [], 'uses': []} | |||
self.symbols['vars'][name]['uses'].append(pos) | |||
self.clear_cache() | |||
def visit_FunctionDef(self, node): | |||
""" | |||
Visits FunctionDef nodes in a tree. Adds relevant data about them to symbols. | |||
:param node: The current node. | |||
:type node: ast.FunctionDef | |||
.. todo:: | |||
Add arguments and decorators metadata to symbols. | |||
""" | |||
pos = self.block_position(node) | |||
if not self.symbols['functions'].has_key(node.name): | |||
self.symbols['functions'][node.name] = {'assignments': [], 'uses': []} | |||
self.symbols['functions'][node.name]['assignments'].append(pos) | |||
self.generic_visit(node) | |||
def visit_Call(self, node): | |||
""" | |||
Visits Function Call nodes in a tree. Adds relevant data about them | |||
in the functions section for symbols. | |||
:param node: The current node. | |||
:type node: ast.Call | |||
.. todo:: | |||
Add arguments and decorators metadata to symbols. | |||
""" | |||
pos = self.block_position(node) | |||
self.visit(node.func) | |||
if not self.cache: | |||
return | |||
name = self.cache.pop() | |||
if not self.symbols['functions'].has_key(name): | |||
self.symbols['functions'][name] = {'assignments': [], 'uses': []} | |||
self.symbols['functions'][name]['uses'].append(pos) | |||
for name in self.cache: | |||
if not self.symbols['vars'].has_key(name): | |||
self.symbols['vars'][name] = {'assignments': [], 'uses': []} | |||
self.symbols['vars'][name]['uses'].append(pos) | |||
self.clear_cache() | |||
for a in node.args: | |||
self.visit(a) | |||
for name in self.cache: | |||
if not self.symbols['vars'].has_key(name): | |||
self.symbols['vars'][name] = {'assignments': [], 'uses': []} | |||
self.symbols['vars'][name]['uses'].append(pos) | |||
self.clear_cache() | |||
def visit_ClassDef(self, node): | |||
""" | |||
Visits ClassDef nodes in a tree. Adds relevant data about them to symbols. | |||
:param node: The current node. | |||
:type node: ast.ClassDef | |||
.. todo:: | |||
Add arguments, inherits, and decorators metadata to symbols. | |||
""" | |||
pos = self.block_position(node) | |||
if node.name not in self.symbols['classes']: | |||
self.symbols['classes'][node.name] = {'assignments': [], 'uses': []} | |||
self.symbols['classes'][node.name]['assignments'].append(pos) | |||
self.generic_visit(node) | |||
def visit_Name(self, node): | |||
self.cache.append(node.id) | |||
def visit_Attribute(self, node): | |||
self.visit(node.value) | |||
self.cache.append(node.attr) | |||
def visit_Import(self, node): | |||
pos = self.block_position(node) | |||
# look through aliases | |||
def parse_py(codelet): | |||
""" | |||
Adds 'symbols' field to the codelet after parsing the python code. | |||
:param codelet: The codelet object to parsed. | |||
:type code: Codelet | |||
""" | |||
def strip_encoding(lines): | |||
"""Strips the encoding line from a file, which breaks the parser.""" | |||
it = iter(lines) | |||
try: | |||
first = next(it) | |||
if not encoding_re.match(first): | |||
yield first | |||
second = next(it) | |||
if not encoding_re.match(second): | |||
yield second | |||
except StopIteration: | |||
return | |||
for line in it: | |||
yield line | |||
try: | |||
tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines()))) | |||
except SyntaxError: | |||
## TODO: add some logging here? | |||
return {} | |||
walker = _TreeWalker() | |||
walker.visit(tree) | |||
return walker.symbols |
@@ -0,0 +1,320 @@ | |||
""" | |||
This subpackage contains code to parse search queries received from the | |||
frontend into trees that can be used by the database backend. | |||
""" | |||
from __future__ import unicode_literals | |||
from re import IGNORECASE, search | |||
from sys import maxsize | |||
from dateutil.parser import parse as parse_date | |||
from .nodes import (String, Regex, Text, Language, Author, Date, Symbol, | |||
BinaryOp, UnaryOp) | |||
from .tree import Tree | |||
from ..languages import LANGS | |||
__all__ = ["QueryParseException", "parse_query"] | |||
class QueryParseException(Exception): | |||
"""Raised by parse_query() when a query is invalid.""" | |||
pass | |||
class _QueryParser(object): | |||
"""Wrapper class with methods to parse queries. Used as a singleton.""" | |||
def __init__(self): | |||
self._prefixes = { | |||
self._parse_language: ["l", "lang", "language"], | |||
self._parse_author: ["a", "author"], | |||
self._parse_modified: ["m", "mod", "modified", "modify"], | |||
self._parse_created: ["cr", "create", "created"], | |||
self._parse_symbol: ["s", "sym", "symb", "symbol"], | |||
self._parse_function: ["f", "fn", "fun", "func", "function", | |||
"meth", "method"], | |||
self._parse_class: ["cl", "class", "clss"], | |||
self._parse_variable: ["v", "var", "variable"], | |||
self._parse_namespace: ["n", "ns", "namespace", "module"], | |||
self._parse_interface: ["in", "inter", "interface", "implements"], | |||
self._parse_import: ["im", "imp", "import", "include", "require", | |||
"imports", "requires"] | |||
} | |||
def _scan_query(self, query, markers): | |||
"""Scan a query (sub)string for the first occurance of some markers. | |||
Returns a 2-tuple of (first_marker_found, marker_index). | |||
""" | |||
def is_escaped(query, index): | |||
"""Return whether a query marker is backslash-escaped.""" | |||
return (index > 0 and query[index - 1] == "\\" and | |||
(index < 2 or query[index - 2] != "\\")) | |||
best_marker, best_index = None, maxsize | |||
for marker in markers: | |||
index = query.find(marker) | |||
if is_escaped(query, index): | |||
_, new_index = self._scan_query(query[index + 1:], marker) | |||
index += new_index + 1 | |||
if index >= 0 and index < best_index: | |||
best_marker, best_index = marker, index | |||
return best_marker, best_index | |||
def _split_query(self, query, markers, parens=False): | |||
"""Split a query string into a nested list of query terms. | |||
Returns a list of terms and/or nested sublists of terms. Each term and | |||
sublist is guarenteed to be non-empty. | |||
""" | |||
query = query.lstrip() | |||
if not query: | |||
return [] | |||
marker, index = self._scan_query(query, markers) | |||
if not marker: | |||
return [query] | |||
nest = [query[:index]] if index > 0 else [] | |||
after = query[index + 1:] | |||
if marker == " ": | |||
nest += self._split_query(after, markers, parens) | |||
elif marker in ('"', "'"): | |||
close_marker, close_index = self._scan_query(after, marker) | |||
if close_marker: | |||
if close_index > 0: | |||
nest.append(after[:close_index]) | |||
after = after[close_index + 1:] | |||
nest += self._split_query(after, markers, parens) | |||
elif after: | |||
nest.append(after) | |||
elif marker == "(": | |||
inner, after = self._split_query(after, markers, True), [] | |||
if inner and isinstance(inner[-1], tuple): | |||
after = self._split_query(inner.pop()[0], markers, parens) | |||
if inner: | |||
nest.append(inner) | |||
if after: | |||
nest += after | |||
elif marker == ")": | |||
if parens: | |||
nest.append((after,)) | |||
else: | |||
nest += self._split_query(after, markers) | |||
return nest | |||
def _parse_literal(self, literal): | |||
"""Parse part of a search query into a string or regular expression.""" | |||
if literal.startswith(("r:", "re:", "regex:", "regexp:")): | |||
arg = literal.split(":", 1)[1] | |||
if not arg: | |||
err = 'Incomplete query term: "%s"' % literal | |||
raise QueryParseException(err) | |||
return Regex(arg) | |||
return String(literal) | |||
def _parse_language(self, term): | |||
"""Parse part of a query into a language node and return it.""" | |||
term = self._parse_literal(term) | |||
if isinstance(term, Regex): | |||
langs = [i for i, lang in enumerate(LANGS) | |||
if search(term.regex, lang, IGNORECASE)] | |||
if not langs: | |||
err = 'No languages found for regex: "%s"' % term.regex | |||
raise QueryParseException(err) | |||
node = Language(langs.pop()) | |||
while langs: | |||
node = BinaryOp(Language(langs.pop()), BinaryOp.OR, node) | |||
return node | |||
needle = term.string.lower() | |||
for i, lang in enumerate(LANGS): | |||
if lang.lower() == needle: | |||
return Language(i) | |||
for i, lang in enumerate(LANGS): | |||
if lang.lower().startswith(needle): | |||
return Language(i) | |||
err = 'No languages found for string: "%s"' % term.string | |||
raise QueryParseException(err) | |||
def _parse_author(self, term): | |||
"""Parse part of a query into an author node and return it.""" | |||
return Author(self._parse_literal(term)) | |||
def _parse_date(self, term, type_): | |||
"""Parse part of a query into a date node and return it.""" | |||
if ":" not in term: | |||
err = "A date relationship is required " \ | |||
'("before:<date>" or "after:<date>"): "%s"' | |||
raise QueryParseException(err % term) | |||
relstr, dtstr = term.split(":", 1) | |||
if relstr.lower() in ("before", "b"): | |||
relation = Date.BEFORE | |||
elif relstr.lower() in ("after", "a"): | |||
relation = Date.AFTER | |||
else: | |||
err = 'Bad date relationship (should be "before" or "after"): "%s"' | |||
raise QueryParseException(err % relstr) | |||
try: | |||
dt = parse_date(dtstr) | |||
except (TypeError, ValueError): | |||
raise QueryParseException('Bad date/time string: "%s"' % dtstr) | |||
return Date(type_, relation, dt) | |||
def _parse_modified(self, term): | |||
"""Parse part of a query into a date modified node and return it.""" | |||
return self._parse_date(term, Date.MODIFY) | |||
def _parse_created(self, term): | |||
"""Parse part of a query into a date created node and return it.""" | |||
return self._parse_date(term, Date.CREATE) | |||
def _parse_symbol(self, term, stype=Symbol.ALL): | |||
"""Parse part of a query into a symbol node and return it.""" | |||
defines = ("a:", "assign:", "assignment:", "d:", "def:", "definition:", | |||
"decl:", "declare:", "declaration:") | |||
uses = ("u:", "use:", "c:", "call:") | |||
if term.startswith(defines) or term.startswith(uses): | |||
context = Symbol.DEFINE if term.startswith(defines) else Symbol.USE | |||
term_part = term.split(":", 1)[1] | |||
if not term_part: | |||
raise QueryParseException('Incomplete query term: "%s"' % term) | |||
term = term_part | |||
else: | |||
context = Symbol.ALL | |||
literal = self._parse_literal(term) | |||
if isinstance(literal, String): | |||
make_symbol = lambda lit: Symbol(context, stype, String(lit)) | |||
symbols = self._split_query(literal.string, " \"'") | |||
node = make_symbol(symbols.pop()) | |||
while symbols: | |||
node = BinaryOp(make_symbol(symbols.pop()), BinaryOp.OR, node) | |||
return node | |||
return Symbol(context, stype, literal) | |||
def _parse_function(self, term): | |||
"""Parse part of a query into a function node and return it.""" | |||
return self._parse_symbol(term, Symbol.FUNCTION) | |||
def _parse_class(self, term): | |||
"""Parse part of a query into a class node and return it.""" | |||
return self._parse_symbol(term, Symbol.CLASS) | |||
def _parse_variable(self, term): | |||
"""Parse part of a query into a variable node and return it.""" | |||
return self._parse_symbol(term, Symbol.VARIABLE) | |||
def _parse_namespace(self, term): | |||
"""Parse part of a query into a namespace node and return it.""" | |||
return self._parse_symbol(term, Symbol.NAMESPACE) | |||
def _parse_interface(self, term): | |||
"""Parse part of a query into a interface node and return it.""" | |||
return self._parse_symbol(term, Symbol.INTERFACE) | |||
def _parse_import(self, term): | |||
"""Parse part of a query into a import node and return it.""" | |||
return self._parse_symbol(term, Symbol.IMPORT) | |||
def _parse_term(self, term): | |||
"""Parse a query term into a tree node and return it.""" | |||
term = term.replace('\\"', '"').replace("\\\\", "\\") | |||
if ":" in term and not term[0] == ":": | |||
prefix, arg = term.split(":", 1) | |||
invert = prefix.lower() == "not" | |||
if invert: | |||
prefix, arg = arg.split(":", 1) | |||
if not arg: | |||
raise QueryParseException('Incomplete query term: "%s"' % term) | |||
for meth, prefixes in self._prefixes.iteritems(): | |||
if prefix.lower() in prefixes: | |||
if invert: | |||
return UnaryOp(UnaryOp.NOT, meth(arg)) | |||
return meth(arg) | |||
return Text(self._parse_literal(term)) | |||
def _parse_boolean_operators(self, nest): | |||
"""Parse boolean operators in a nested query list.""" | |||
op_lookup = { | |||
"and": BinaryOp.AND, | |||
"or": BinaryOp.OR, | |||
"not": UnaryOp.NOT | |||
} | |||
for i, term in enumerate(nest): | |||
if isinstance(term, list): | |||
self._parse_boolean_operators(term) | |||
else: | |||
nest[i] = op_lookup.get(term.lower(), term) | |||
def _parse_nest(self, nest): | |||
"""Recursively parse a nested list of search query terms.""" | |||
def parse_binary_op(op): | |||
"""Parse a binary operator in a nested query list.""" | |||
index = nest.index(op) | |||
if index == 0 or index == len(nest) - 1: | |||
err = "Invalid query: '%s' given without argument." | |||
raise QueryParseException(err % BinaryOp.OPS[op]) | |||
left = self._parse_nest(nest[:index]) | |||
right = self._parse_nest(nest[index + 1:]) | |||
return BinaryOp(left, op, right) | |||
if not nest: | |||
err = "Error while parsing query: empty nest detected." | |||
raise QueryParseException(err) | |||
elif BinaryOp.OR in nest: | |||
return parse_binary_op(BinaryOp.OR) | |||
elif BinaryOp.AND in nest: | |||
return parse_binary_op(BinaryOp.AND) | |||
elif UnaryOp.NOT in nest: | |||
index = nest.index(UnaryOp.NOT) | |||
if index == len(nest) - 1: | |||
err = "Invalid query: '%s' given without argument." | |||
raise QueryParseException(err % UnaryOp.OPS[UnaryOp.NOT]) | |||
right = UnaryOp(UnaryOp.NOT, self._parse_nest(nest[index + 1:])) | |||
if index > 0: | |||
left = self._parse_nest(nest[:index]) | |||
return BinaryOp(left, BinaryOp.AND, right) | |||
return right | |||
elif len(nest) > 1: | |||
left, right = self._parse_term(nest[0]), self._parse_nest(nest[1:]) | |||
return BinaryOp(left, BinaryOp.AND, right) | |||
elif isinstance(nest[0], list): | |||
return self._parse_nest(nest[0]) | |||
else: | |||
return self._parse_term(nest[0]) | |||
def _balance_tree(self, node): | |||
"""Auto-balance a tree using a string sorting function.""" | |||
if isinstance(node, BinaryOp): | |||
self._balance_tree(node.left) | |||
self._balance_tree(node.right) | |||
if node.right.sortkey() < node.left.sortkey(): | |||
node.left, node.right = node.right, node.left | |||
elif isinstance(node, UnaryOp): | |||
self._balance_tree(node.node) | |||
def parse(self, query): | |||
""" | |||
Parse a search query. | |||
The result is normalized with a sorting function so that | |||
``"foo OR bar"`` and ``"bar OR foo"`` result in the same tree. This is | |||
important for caching purposes. | |||
:param query: The query be converted. | |||
:type query: str | |||
:return: A tree storing the data in the query. | |||
:rtype: :py:class:`~.query.tree.Tree` | |||
:raises: :py:class:`.QueryParseException` | |||
""" | |||
nest = self._split_query(query.rstrip(), " \"'()") | |||
if not nest: | |||
raise QueryParseException('Empty query: "%s"' % query) | |||
self._parse_boolean_operators(nest) | |||
root = self._parse_nest(nest) | |||
self._balance_tree(root) | |||
return Tree(root) | |||
parse_query = _QueryParser().parse |
@@ -0,0 +1,297 @@ | |||
from ..languages import LANGS | |||
__all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol", | |||
"BinaryOp", "UnaryOp"] | |||
class _Node(object): | |||
"""Represents a single node in a query tree. | |||
Generally speaking, a node is a constraint applied to the database. Thus, | |||
a :py:class:`~.Language` node represents a constraint where only codelets | |||
of a specific language are selected. | |||
""" | |||
def _null_regex(self, expr): | |||
"""Implements a regex search with support for a null expression.""" | |||
return "IF(ISNULL(%s), 0, %s REGEXP ?)" % (expr, expr) | |||
def sortkey(self): | |||
"""Return a string sort key for the node.""" | |||
return "" | |||
def parameterize(self, tables): | |||
"""Parameterize the node. | |||
Returns a 4-tuple of (conditional string, parameter list, rank list, | |||
should-we-rank boolean). If the rank list is empty, then it is assumed | |||
to contain the conditional string. | |||
""" | |||
return "", [], [], False | |||
class _Literal(object): | |||
"""Represents a literal component of a search query, present at the leaves. | |||
A literal might be a string or a regular expression. | |||
""" | |||
pass | |||
class String(_Literal): | |||
"""Represents a string literal.""" | |||
def __init__(self, string): | |||
""" | |||
:type string: unicode | |||
""" | |||
self.string = string | |||
def __repr__(self): | |||
return "String({0!r})".format(self.string) | |||
def sortkey(self): | |||
return self.string | |||
class Regex(_Literal): | |||
"""Represents a regular expression literal.""" | |||
def __init__(self, regex): | |||
""" | |||
:type string: unicode | |||
""" | |||
self.regex = regex | |||
def __repr__(self): | |||
return "Regex({0!r})".format(self.regex) | |||
def sortkey(self): | |||
return self.regex | |||
class Text(_Node): | |||
"""Represents a text node. | |||
Searches in codelet names (full-text search), symbols (equality), and | |||
source code (full-text search). | |||
""" | |||
def __init__(self, text): | |||
""" | |||
:type text: :py:class:`._Literal` | |||
""" | |||
self.text = text | |||
def __repr__(self): | |||
return "Text({0})".format(self.text) | |||
def sortkey(self): | |||
return self.text.sortkey() | |||
def parameterize(self, tables): | |||
tables |= {"code", "symbols"} | |||
if isinstance(self.text, Regex): | |||
ranks = ["(codelet_name REGEXP ?)", "(code_code REGEXP ?)", | |||
self._null_regex("symbol_name")] | |||
text = self.text.regex | |||
else: | |||
ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))", | |||
"(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))", | |||
"(symbol_name <=> ?)"] | |||
text = self.text.string | |||
cond = "(" + " OR ".join(ranks) + ")" | |||
return cond, [text] * 3, ranks, True | |||
class Language(_Node): | |||
"""Represents a language node. | |||
Searches in the code_lang field. | |||
""" | |||
def __init__(self, lang): | |||
""" | |||
:type lang: int | |||
""" | |||
self.lang = lang | |||
def __repr__(self): | |||
return "Language({0})".format(LANGS[self.lang]) | |||
def sortkey(self): | |||
return LANGS[self.lang] | |||
def parameterize(self, tables): | |||
tables |= {"code"} | |||
return "(code_lang <=> ?)", [self.lang], [], False | |||
class Author(_Node): | |||
"""Represents a author node. | |||
Searches in the author_name field (full-text search). | |||
""" | |||
def __init__(self, name): | |||
""" | |||
:type name: :py:class:`_Literal` | |||
""" | |||
self.name = name | |||
def __repr__(self): | |||
return "Author({0})".format(self.name) | |||
def sortkey(self): | |||
return self.name.sortkey() | |||
def parameterize(self, tables): | |||
tables |= {"authors"} | |||
if isinstance(self.name, Regex): | |||
cond = self._null_regex("author_name") | |||
return cond, [self.name.regex], [], False | |||
cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))" | |||
return cond, [self.name.string], [], True | |||
class Date(_Node): | |||
"""Represents a date node. | |||
Searches in the codelet_date_created or codelet_date_modified fields. | |||
""" | |||
CREATE = 1 | |||
MODIFY = 2 | |||
BEFORE = 1 | |||
AFTER = 2 | |||
def __init__(self, type_, relation, date): | |||
""" | |||
:type type_: int (``CREATE`` or ``MODIFY``) | |||
:type relation: int (``BEFORE``, ``AFTER``) | |||
:type date: datetime.datetime | |||
""" | |||
self.type = type_ | |||
self.relation = relation | |||
self.date = date | |||
def __repr__(self): | |||
types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"} | |||
relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"} | |||
tm = "Date({0}, {1}, {2})" | |||
return tm.format(types[self.type], relations[self.relation], self.date) | |||
def sortkey(self): | |||
return self.date.strftime("%Y%m%d%H%M%S") | |||
def parameterize(self, tables): | |||
column = {self.CREATE: "codelet_date_created", | |||
self.MODIFY: "codelet_date_modified"}[self.type] | |||
op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation] | |||
cond = "IF(ISNULL(%s), 0, %s %s ?)" % (column, column, op) | |||
return cond, [self.date], [], False | |||
class Symbol(_Node): | |||
"""Represents a symbol node. | |||
Searches in symbol_type and symbol_name. | |||
""" | |||
ALL = -1 | |||
DEFINE = 0 | |||
USE = 1 | |||
FUNCTION = 0 | |||
CLASS = 1 | |||
VARIABLE = 2 | |||
NAMESPACE = 3 | |||
INTERFACE = 4 | |||
IMPORT = 5 | |||
TYPES = ["functions", "classes", "vars", "namespaces", "interfaces", | |||
"imports"] | |||
TYPE_REPR = ["FUNCTION", "CLASS", "VARIABLE", "NAMESPACE", "INTERFACE", | |||
"IMPORT"] | |||
def __init__(self, context, type_, name): | |||
""" | |||
:type context: int (``DEFINE`` or ``USE``) | |||
:type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.) | |||
:type name: :py:class:`._Literal` | |||
""" | |||
self.context = context | |||
self.type = type_ | |||
self.name = name | |||
def __repr__(self): | |||
context = ["DEFINE", "USE", "ALL"][self.context] | |||
type_ = self.TYPE_REPR[self.type] if self.type >= 0 else "ALL" | |||
return "Symbol({0}, {1}, {2})".format(context, type_, self.name) | |||
def sortkey(self): | |||
return self.name.sortkey() | |||
def parameterize(self, tables): | |||
tables |= {"code", "symbols"} | |||
if isinstance(self.name, Regex): | |||
cond, name = self._null_regex("symbol_name"), self.name.regex | |||
else: | |||
cond, name = "symbol_name <=> ?", self.name.string | |||
if self.type == self.ALL: | |||
types = ", ".join(str(typ) for typ in xrange(len(self.TYPES))) | |||
part = " AND IF(ISNULL(symbol_type), 0, symbol_type IN (%s))" | |||
cond += part % types | |||
if self.type != self.ALL: | |||
cond += " AND symbol_type <=> %d" % self.type | |||
if self.context != self.ALL: | |||
tables |= {"symbol_locations"} | |||
cond += " AND sloc_type <=> %d" % self.context | |||
return "(" + cond + ")", [name], [], False | |||
class BinaryOp(_Node): | |||
"""Represents a relationship between two nodes: ``and``, ``or``.""" | |||
AND = object() | |||
OR = object() | |||
OPS = {AND: "AND", OR: "OR"} | |||
def __init__(self, left, op, right): | |||
self.left = left | |||
self.op = op | |||
self.right = right | |||
def __repr__(self): | |||
tmpl = "BinaryOp({0}, {1}, {2})" | |||
return tmpl.format(self.left, self.OPS[self.op], self.right) | |||
def sortkey(self): | |||
return self.left.sortkey() + self.right.sortkey() | |||
def parameterize(self, tables): | |||
lcond, largs, lranks, need_lranks = self.left.parameterize(tables) | |||
rcond, rargs, rranks, need_rranks = self.right.parameterize(tables) | |||
lranks, rranks = lranks or [lcond], rranks or [rcond] | |||
op = self.OPS[self.op] | |||
cond = "(" + lcond + " " + op + " " + rcond + ")" | |||
need_ranks = need_lranks or need_rranks or self.op == self.OR | |||
return cond, largs + rargs, lranks + rranks, need_ranks | |||
class UnaryOp(_Node): | |||
"""Represents a transformation applied to one node: ``not``.""" | |||
NOT = object() | |||
OPS = {NOT: "NOT"} | |||
def __init__(self, op, node): | |||
self.op = op | |||
self.node = node | |||
def __repr__(self): | |||
return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node) | |||
def sortkey(self): | |||
return self.node.sortkey() | |||
def parameterize(self, tables): | |||
cond, args, ranks, need_ranks = self.node.parameterize(tables) | |||
new_cond = "(" + self.OPS[self.op] + " " + cond + ")" | |||
ranks = ranks or [cond] | |||
return new_cond, args, ranks, need_ranks |
@@ -0,0 +1,84 @@ | |||
from . import nodes | |||
__all__ = ["Tree"] | |||
QUERY_TEMPLATE = """SELECT codelet_id, MAX(codelet_rank%s) AS score | |||
FROM codelets %s | |||
WHERE %s | |||
GROUP BY codelet_id | |||
ORDER BY score DESC | |||
LIMIT %d OFFSET %d""".replace("\n", " ") | |||
class Tree(object): | |||
"""Represents a query tree.""" | |||
def __init__(self, root): | |||
self._root = root | |||
def __repr__(self): | |||
return "Tree({0})".format(self._root) | |||
@property | |||
def root(self): | |||
"""The root node of the tree.""" | |||
return self._root | |||
def sortkey(self): | |||
"""Return a string sort key for the query tree.""" | |||
return self._root.sortkey() | |||
def serialize(self): | |||
"""Create a string representation of the query for caching. | |||
:return: Query string representation. | |||
:rtype: str | |||
""" | |||
return repr(self) | |||
def walk(self, node_type=None): | |||
"""Walk through the query tree, returning nodes of a specific type.""" | |||
pending = [self._root] | |||
while pending: | |||
node = pending.pop() | |||
if not node_type or isinstance(node, node_type): | |||
yield node | |||
if isinstance(node, nodes.UnaryOp): | |||
pending.append(node.node) | |||
elif isinstance(node, nodes.BinaryOp): | |||
pending.extend([node.left, node.right]) | |||
def build_query(self, page=1, page_size=10): | |||
"""Convert the query tree into a parameterized SQL SELECT statement. | |||
:param page: The page number to get results for. | |||
:type page: int | |||
:param page_size: The number of results per page. | |||
:type page_size: int | |||
:return: SQL query data. | |||
:rtype: 2-tuple of (SQL statement string, query parameter tuple) | |||
""" | |||
def get_table_joins(tables): | |||
joins = [ | |||
("INNER", "code", "codelet_code_id", "code_id"), | |||
("LEFT", "authors", "author_codelet", "codelet_id"), | |||
("LEFT", "symbols", "symbol_code", "code_id"), | |||
("LEFT", "symbol_locations", "sloc_symbol", "symbol_id") | |||
] | |||
tmpl = "%s JOIN %s ON %s = %s" | |||
for args in joins: | |||
if args[1] in tables: | |||
yield tmpl % args | |||
tables = set() | |||
cond, arglist, ranks, need_ranks = self._root.parameterize(tables) | |||
ranks = ranks or [cond] | |||
if need_ranks: | |||
score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks)) | |||
else: | |||
score = "" | |||
joins = " ".join(get_table_joins(tables)) | |||
offset = (page - 1) * page_size | |||
query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset) | |||
return query, tuple(arglist * 2 if need_ranks else arglist) |
@@ -0,0 +1,177 @@ | |||
# Makefile for Sphinx documentation | |||
# | |||
# You can set these variables from the command line. | |||
SPHINXOPTS = | |||
SPHINXBUILD = sphinx-build | |||
PAPER = | |||
BUILDDIR = build | |||
# User-friendly check for sphinx-build | |||
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) | |||
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) | |||
endif | |||
# Internal variables. | |||
PAPEROPT_a4 = -D latex_paper_size=a4 | |||
PAPEROPT_letter = -D latex_paper_size=letter | |||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source | |||
# the i18n builder cannot share the environment and doctrees with the others | |||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source | |||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext | |||
help: | |||
@echo "Please use \`make <target>' where <target> is one of" | |||
@echo " html to make standalone HTML files" | |||
@echo " dirhtml to make HTML files named index.html in directories" | |||
@echo " singlehtml to make a single large HTML file" | |||
@echo " pickle to make pickle files" | |||
@echo " json to make JSON files" | |||
@echo " htmlhelp to make HTML files and a HTML help project" | |||
@echo " qthelp to make HTML files and a qthelp project" | |||
@echo " devhelp to make HTML files and a Devhelp project" | |||
@echo " epub to make an epub" | |||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" | |||
@echo " latexpdf to make LaTeX files and run them through pdflatex" | |||
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" | |||
@echo " text to make text files" | |||
@echo " man to make manual pages" | |||
@echo " texinfo to make Texinfo files" | |||
@echo " info to make Texinfo files and run them through makeinfo" | |||
@echo " gettext to make PO message catalogs" | |||
@echo " changes to make an overview of all changed/added/deprecated items" | |||
@echo " xml to make Docutils-native XML files" | |||
@echo " pseudoxml to make pseudoxml-XML files for display purposes" | |||
@echo " linkcheck to check all external links for integrity" | |||
@echo " doctest to run all doctests embedded in the documentation (if enabled)" | |||
clean: | |||
rm -rf $(BUILDDIR)/* | |||
html: | |||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html | |||
@echo | |||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html." | |||
dirhtml: | |||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml | |||
@echo | |||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." | |||
singlehtml: | |||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml | |||
@echo | |||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." | |||
pickle: | |||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle | |||
@echo | |||
@echo "Build finished; now you can process the pickle files." | |||
json: | |||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json | |||
@echo | |||
@echo "Build finished; now you can process the JSON files." | |||
htmlhelp: | |||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp | |||
@echo | |||
@echo "Build finished; now you can run HTML Help Workshop with the" \ | |||
".hhp project file in $(BUILDDIR)/htmlhelp." | |||
qthelp: | |||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp | |||
@echo | |||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \ | |||
".qhcp project file in $(BUILDDIR)/qthelp, like this:" | |||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bitshift.qhcp" | |||
@echo "To view the help file:" | |||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bitshift.qhc" | |||
devhelp: | |||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp | |||
@echo | |||
@echo "Build finished." | |||
@echo "To view the help file:" | |||
@echo "# mkdir -p $$HOME/.local/share/devhelp/bitshift" | |||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bitshift" | |||
@echo "# devhelp" | |||
epub: | |||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub | |||
@echo | |||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub." | |||
latex: | |||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||
@echo | |||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." | |||
@echo "Run \`make' in that directory to run these through (pdf)latex" \ | |||
"(use \`make latexpdf' here to do that automatically)." | |||
latexpdf: | |||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||
@echo "Running LaTeX files through pdflatex..." | |||
$(MAKE) -C $(BUILDDIR)/latex all-pdf | |||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." | |||
latexpdfja: | |||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex | |||
@echo "Running LaTeX files through platex and dvipdfmx..." | |||
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja | |||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." | |||
text: | |||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text | |||
@echo | |||
@echo "Build finished. The text files are in $(BUILDDIR)/text." | |||
man: | |||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man | |||
@echo | |||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man." | |||
texinfo: | |||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo | |||
@echo | |||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." | |||
@echo "Run \`make' in that directory to run these through makeinfo" \ | |||
"(use \`make info' here to do that automatically)." | |||
info: | |||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo | |||
@echo "Running Texinfo files through makeinfo..." | |||
make -C $(BUILDDIR)/texinfo info | |||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." | |||
gettext: | |||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale | |||
@echo | |||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." | |||
changes: | |||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes | |||
@echo | |||
@echo "The overview file is in $(BUILDDIR)/changes." | |||
linkcheck: | |||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck | |||
@echo | |||
@echo "Link check complete; look for any errors in the above output " \ | |||
"or in $(BUILDDIR)/linkcheck/output.txt." | |||
doctest: | |||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest | |||
@echo "Testing of doctests in the sources finished, look at the " \ | |||
"results in $(BUILDDIR)/doctest/output.txt." | |||
xml: | |||
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml | |||
@echo | |||
@echo "Build finished. The XML files are in $(BUILDDIR)/xml." | |||
pseudoxml: | |||
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml | |||
@echo | |||
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." |
@@ -0,0 +1,27 @@ | |||
crawler Package | |||
=============== | |||
:mod:`crawler` Package | |||
---------------------- | |||
.. automodule:: bitshift.crawler | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`crawler` Module | |||
--------------------- | |||
.. automodule:: bitshift.crawler.crawler | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`indexer` Module | |||
--------------------- | |||
.. automodule:: bitshift.crawler.indexer | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
@@ -0,0 +1,19 @@ | |||
database Package | |||
================ | |||
:mod:`database` Package | |||
----------------------- | |||
.. automodule:: bitshift.database | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`migration` Module | |||
----------------------- | |||
.. automodule:: bitshift.database.migration | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
@@ -0,0 +1,11 @@ | |||
query Package | |||
============= | |||
:mod:`query` Package | |||
-------------------- | |||
.. automodule:: bitshift.query | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
@@ -0,0 +1,45 @@ | |||
bitshift Package | |||
================ | |||
:mod:`bitshift` Package | |||
----------------------- | |||
.. automodule:: bitshift.__init__ | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`assets` Module | |||
-------------------- | |||
.. automodule:: bitshift.assets | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`codelet` Module | |||
--------------------- | |||
.. automodule:: bitshift.codelet | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`config` Module | |||
-------------------- | |||
.. automodule:: bitshift.config | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
Subpackages | |||
----------- | |||
.. toctree:: | |||
bitshift.crawler | |||
bitshift.database | |||
bitshift.parser | |||
bitshift.query | |||
@@ -0,0 +1,7 @@ | |||
bitshift | |||
======== | |||
.. toctree:: | |||
:maxdepth: 4 | |||
bitshift |
@@ -0,0 +1,268 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# bitshift documentation build configuration file, created by | |||
# sphinx-quickstart on Mon Apr 7 21:09:45 2014. | |||
# | |||
# This file is execfile()d with the current directory set to its | |||
# containing dir. | |||
# | |||
# Note that not all possible configuration values are present in this | |||
# autogenerated file. | |||
# | |||
# All configuration values have a default; values that are commented out | |||
# serve to show the default. | |||
import sys | |||
import os | |||
# If extensions (or modules to document with autodoc) are in another directory, | |||
# add these directories to sys.path here. If the directory is relative to the | |||
# documentation root, use os.path.abspath to make it absolute, like shown here. | |||
sys.path.insert(0, os.path.abspath('../..')) | |||
# -- General configuration ------------------------------------------------ | |||
# If your documentation needs a minimal Sphinx version, state it here. | |||
#needs_sphinx = '1.0' | |||
# Add any Sphinx extension module names here, as strings. They can be | |||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | |||
# ones. | |||
extensions = [ | |||
'sphinx.ext.autodoc', | |||
'sphinx.ext.intersphinx', | |||
'sphinx.ext.coverage', | |||
'sphinx.ext.mathjax', | |||
'sphinx.ext.viewcode', | |||
] | |||
# Add any paths that contain templates here, relative to this directory. | |||
templates_path = ['_templates'] | |||
# The suffix of source filenames. | |||
source_suffix = '.rst' | |||
# The encoding of source files. | |||
#source_encoding = 'utf-8-sig' | |||
# The master toctree document. | |||
master_doc = 'index' | |||
# General information about the project. | |||
project = u'bitshift' | |||
copyright = u'2014, Benjamin Attal, Ben Kurtovic, Severyn Kozak' | |||
# The version info for the project you're documenting, acts as replacement for | |||
# |version| and |release|, also used in various other places throughout the | |||
# built documents. | |||
# | |||
# The short X.Y version. | |||
version = '0.1' | |||
# The full version, including alpha/beta/rc tags. | |||
release = '0.1.dev' | |||
# The language for content autogenerated by Sphinx. Refer to documentation | |||
# for a list of supported languages. | |||
#language = None | |||
# There are two options for replacing |today|: either, you set today to some | |||
# non-false value, then it is used: | |||
#today = '' | |||
# Else, today_fmt is used as the format for a strftime call. | |||
#today_fmt = '%B %d, %Y' | |||
# List of patterns, relative to source directory, that match files and | |||
# directories to ignore when looking for source files. | |||
exclude_patterns = [] | |||
# The reST default role (used for this markup: `text`) to use for all | |||
# documents. | |||
#default_role = None | |||
# If true, '()' will be appended to :func: etc. cross-reference text. | |||
#add_function_parentheses = True | |||
# If true, the current module name will be prepended to all description | |||
# unit titles (such as .. function::). | |||
#add_module_names = True | |||
# If true, sectionauthor and moduleauthor directives will be shown in the | |||
# output. They are ignored by default. | |||
#show_authors = False | |||
# The name of the Pygments (syntax highlighting) style to use. | |||
pygments_style = 'sphinx' | |||
# A list of ignored prefixes for module index sorting. | |||
#modindex_common_prefix = [] | |||
# If true, keep warnings as "system message" paragraphs in the built documents. | |||
#keep_warnings = False | |||
# -- Options for HTML output ---------------------------------------------- | |||
# The theme to use for HTML and HTML Help pages. See the documentation for | |||
# a list of builtin themes. | |||
html_theme = 'nature' | |||
# Theme options are theme-specific and customize the look and feel of a theme | |||
# further. For a list of options available for each theme, see the | |||
# documentation. | |||
#html_theme_options = {} | |||
# Add any paths that contain custom themes here, relative to this directory. | |||
#html_theme_path = [] | |||
# The name for this set of Sphinx documents. If None, it defaults to | |||
# "<project> v<release> documentation". | |||
#html_title = None | |||
# A shorter title for the navigation bar. Default is the same as html_title. | |||
#html_short_title = None | |||
# The name of an image file (relative to this directory) to place at the top | |||
# of the sidebar. | |||
#html_logo = None | |||
# The name of an image file (within the static path) to use as favicon of the | |||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 | |||
# pixels large. | |||
#html_favicon = None | |||
# Add any paths that contain custom static files (such as style sheets) here, | |||
# relative to this directory. They are copied after the builtin static files, | |||
# so a file named "default.css" will overwrite the builtin "default.css". | |||
html_static_path = ['_static'] | |||
# Add any extra paths that contain custom files (such as robots.txt or | |||
# .htaccess) here, relative to this directory. These files are copied | |||
# directly to the root of the documentation. | |||
#html_extra_path = [] | |||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, | |||
# using the given strftime format. | |||
#html_last_updated_fmt = '%b %d, %Y' | |||
# If true, SmartyPants will be used to convert quotes and dashes to | |||
# typographically correct entities. | |||
#html_use_smartypants = True | |||
# Custom sidebar templates, maps document names to template names. | |||
#html_sidebars = {} | |||
# Additional templates that should be rendered to pages, maps page names to | |||
# template names. | |||
#html_additional_pages = {} | |||
# If false, no module index is generated. | |||
#html_domain_indices = True | |||
# If false, no index is generated. | |||
#html_use_index = True | |||
# If true, the index is split into individual pages for each letter. | |||
#html_split_index = False | |||
# If true, links to the reST sources are added to the pages. | |||
#html_show_sourcelink = True | |||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. | |||
#html_show_sphinx = True | |||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. | |||
#html_show_copyright = True | |||
# If true, an OpenSearch description file will be output, and all pages will | |||
# contain a <link> tag referring to it. The value of this option must be the | |||
# base URL from which the finished HTML is served. | |||
#html_use_opensearch = '' | |||
# This is the file name suffix for HTML files (e.g. ".xhtml"). | |||
#html_file_suffix = None | |||
# Output file base name for HTML help builder. | |||
htmlhelp_basename = 'bitshiftdoc' | |||
# -- Options for LaTeX output --------------------------------------------- | |||
latex_elements = { | |||
# The paper size ('letterpaper' or 'a4paper'). | |||
#'papersize': 'letterpaper', | |||
# The font size ('10pt', '11pt' or '12pt'). | |||
#'pointsize': '10pt', | |||
# Additional stuff for the LaTeX preamble. | |||
#'preamble': '', | |||
} | |||
# Grouping the document tree into LaTeX files. List of tuples | |||
# (source start file, target name, title, | |||
# author, documentclass [howto, manual, or own class]). | |||
latex_documents = [ | |||
('index', 'bitshift.tex', u'bitshift Documentation', | |||
u'Benjamin Attal, Ben Kurtovic, Severyn Kozak', 'manual'), | |||
] | |||
# The name of an image file (relative to this directory) to place at the top of | |||
# the title page. | |||
#latex_logo = None | |||
# For "manual" documents, if this is true, then toplevel headings are parts, | |||
# not chapters. | |||
#latex_use_parts = False | |||
# If true, show page references after internal links. | |||
#latex_show_pagerefs = False | |||
# If true, show URL addresses after external links. | |||
#latex_show_urls = False | |||
# Documents to append as an appendix to all manuals. | |||
#latex_appendices = [] | |||
# If false, no module index is generated. | |||
#latex_domain_indices = True | |||
# -- Options for manual page output --------------------------------------- | |||
# One entry per manual page. List of tuples | |||
# (source start file, name, description, authors, manual section). | |||
man_pages = [ | |||
('index', 'bitshift', u'bitshift Documentation', | |||
[u'Benjamin Attal, Ben Kurtovic, Severyn Kozak'], 1) | |||
] | |||
# If true, show URL addresses after external links. | |||
#man_show_urls = False | |||
# -- Options for Texinfo output ------------------------------------------- | |||
# Grouping the document tree into Texinfo files. List of tuples | |||
# (source start file, target name, title, author, | |||
# dir menu entry, description, category) | |||
texinfo_documents = [ | |||
('index', 'bitshift', u'bitshift Documentation', | |||
u'Benjamin Attal, Ben Kurtovic, Severyn Kozak', 'bitshift', 'One line description of project.', | |||
'Miscellaneous'), | |||
] | |||
# Documents to append as an appendix to all manuals. | |||
#texinfo_appendices = [] | |||
# If false, no module index is generated. | |||
#texinfo_domain_indices = True | |||
# How to display URL addresses: 'footnote', 'no', or 'inline'. | |||
#texinfo_show_urls = 'footnote' | |||
# If true, do not generate a @detailmenu in the "Top" node's menu. | |||
#texinfo_no_detailmenu = False | |||
# Example configuration for intersphinx: refer to the Python standard library. | |||
intersphinx_mapping = {'http://docs.python.org/': None} |
@@ -0,0 +1,20 @@ | |||
bitshift | |||
======== | |||
**bitshift** is a semantic search engine for source code. | |||
Contents: | |||
.. toctree:: | |||
:maxdepth: 2 | |||
API Reference <api/modules> | |||
Indices and tables | |||
================== | |||
* :ref:`genindex` | |||
* :ref:`modindex` | |||
* :ref:`search` | |||
@@ -0,0 +1,9 @@ | |||
# Configuration file for Gunicorn | |||
# http://docs.gunicorn.org/en/latest/configure.html | |||
bind = ["unix:/tmp/gunicorn.sock"] | |||
workers = 4 | |||
accesslog = "logs/access.log" | |||
errorlog = "logs/error.log" | |||
loglevel = "info" |
@@ -0,0 +1,72 @@ | |||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | |||
<modelVersion>4.0.0</modelVersion> | |||
<groupId>com.bitshift.parsing</groupId> | |||
<artifactId>parsing</artifactId> | |||
<packaging>jar</packaging> | |||
<version>1.0-SNAPSHOT</version> | |||
<name>parsing</name> | |||
<url>http://maven.apache.org</url> | |||
<dependencies> | |||
<dependency> | |||
<groupId>junit</groupId> | |||
<artifactId>junit</artifactId> | |||
<version>4.11</version> | |||
</dependency> | |||
<dependency> | |||
<groupId>org.eclipse.jdt</groupId> | |||
<artifactId>org.eclipse.jdt.core</artifactId> | |||
<version>3.7.1</version> | |||
</dependency> | |||
<dependency> | |||
<groupId>com.google.guava</groupId> | |||
<artifactId>guava</artifactId> | |||
<version>17.0</version> | |||
</dependency> | |||
</dependencies> | |||
<build> | |||
<plugins> | |||
<plugin> | |||
<groupId>org.codehaus.mojo</groupId> | |||
<artifactId>exec-maven-plugin</artifactId> | |||
<version>1.2.1</version> | |||
<configuration> | |||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||
<arguments> | |||
</arguments> | |||
</configuration> | |||
</plugin> | |||
<plugin> | |||
<artifactId>maven-assembly-plugin</artifactId> | |||
<version>2.4</version> | |||
<executions> | |||
<execution> | |||
<id>make-assembly</id> | |||
<phase>package</phase> | |||
<goals> | |||
<goal>single</goal> | |||
</goals> | |||
<configuration> | |||
<archive> | |||
<manifest> | |||
<addClasspath>true</addClasspath> | |||
<mainClass>com.bitshift.parsing.Parse</mainClass> | |||
</manifest> | |||
</archive> | |||
<descriptorRefs> | |||
<descriptorRef>jar-with-dependencies</descriptorRef> | |||
</descriptorRefs> | |||
<outputDirectory>${project.basedir}</outputDirectory> | |||
<finalName>${project.artifactId}</finalName> | |||
<appendAssemblyId>false</appendAssemblyId> | |||
</configuration> | |||
</execution> | |||
</executions> | |||
</plugin> | |||
</plugins> | |||
</build> | |||
</project> |
@@ -0,0 +1,35 @@ | |||
package com.bitshift.parsing; | |||
import java.io.BufferedReader; | |||
import java.io.BufferedWriter; | |||
import java.io.IOException; | |||
import java.io.InputStreamReader; | |||
import java.io.OutputStreamWriter; | |||
import com.bitshift.parsing.parsers.JavaParser; | |||
public class Parse { | |||
public static void main(String[] args) { | |||
try { | |||
BufferedReader br = new BufferedReader( | |||
new InputStreamReader(System.in)); | |||
String str = ""; | |||
StringBuilder source = new StringBuilder(); | |||
while ((str = br.readLine()) != null) { | |||
source.append(str + "\n"); | |||
} | |||
String symbols = (new JavaParser(source.toString())).parse(); | |||
BufferedWriter bw = new BufferedWriter( | |||
new OutputStreamWriter(System.out)); | |||
bw.write(symbols); | |||
bw.flush(); | |||
} catch (IOException e) { | |||
} | |||
} | |||
} |
@@ -0,0 +1,214 @@ | |||
package com.bitshift.parsing.parsers; | |||
import java.util.HashMap; | |||
import java.util.List; | |||
import java.util.ArrayList; | |||
import java.util.Map; | |||
import java.util.Stack; | |||
import java.util.Arrays; | |||
import com.google.common.base.Joiner; | |||
import org.eclipse.jdt.core.JavaCore; | |||
import org.eclipse.jdt.core.dom.AST; | |||
import org.eclipse.jdt.core.dom.ASTNode; | |||
import org.eclipse.jdt.core.dom.ASTParser; | |||
import org.eclipse.jdt.core.dom.ASTVisitor; | |||
import org.eclipse.jdt.core.dom.CompilationUnit; | |||
import org.eclipse.jdt.core.dom.ClassInstanceCreation; | |||
import org.eclipse.jdt.core.dom.ImportDeclaration; | |||
import org.eclipse.jdt.core.dom.MethodDeclaration; | |||
import org.eclipse.jdt.core.dom.MethodInvocation; | |||
import org.eclipse.jdt.core.dom.Name; | |||
import org.eclipse.jdt.core.dom.PackageDeclaration; | |||
import org.eclipse.jdt.core.dom.QualifiedName; | |||
import org.eclipse.jdt.core.dom.SimpleName; | |||
import org.eclipse.jdt.core.dom.Statement; | |||
import org.eclipse.jdt.core.dom.TypeDeclaration; | |||
import org.eclipse.jdt.core.dom.VariableDeclarationFragment; | |||
import com.bitshift.parsing.symbols.Symbols; | |||
import com.bitshift.parsing.symbols.JavaSymbols; | |||
/*TODO: Work on parsing partial java code.*/ | |||
public class JavaParser { | |||
private String source; | |||
public JavaParser(String source) { | |||
this.source = source; | |||
} | |||
private Symbols genSymbols() { | |||
ASTParser parser = ASTParser.newParser(AST.JLS3); | |||
parser.setSource(this.source.toCharArray()); | |||
Map options = JavaCore.getOptions(); | |||
parser.setCompilerOptions(options); | |||
CompilationUnit root = (CompilationUnit) parser.createAST(null); | |||
NodeVisitor visitor = new NodeVisitor(root); | |||
root.accept(visitor); | |||
return visitor.symbols; | |||
} | |||
public String parse() { | |||
JavaSymbols symbols = (JavaSymbols) this.genSymbols(); | |||
return symbols.toString(); | |||
} | |||
class NodeVisitor extends ASTVisitor { | |||
protected CompilationUnit root; | |||
protected JavaSymbols symbols; | |||
private Stack<HashMap<String, Object>> _cache; | |||
public NodeVisitor(CompilationUnit root) { | |||
this.root = root; | |||
this.symbols = new JavaSymbols(); | |||
this._cache = new Stack<HashMap<String, Object>>(); | |||
} | |||
public ArrayList<Integer> blockPosition(ASTNode node) { | |||
int sl = this.root.getLineNumber(node.getStartPosition()); | |||
int sc = this.root.getColumnNumber(node.getStartPosition()) + 1; | |||
int el = this.root.getLineNumber(node.getStartPosition() | |||
+ node.getLength() - 1); | |||
int ec = this.root.getColumnNumber(node.getStartPosition() | |||
+ node.getLength() - 1) + 1; | |||
return Symbols.createCoord(sl, sc, el, ec); | |||
} | |||
public boolean visit(MethodDeclaration node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
Name nameObj = node.getName(); | |||
String name = nameObj.isQualifiedName() ? | |||
((QualifiedName) nameObj).getFullyQualifiedName() : | |||
((SimpleName) nameObj).getIdentifier(); | |||
data.put("coord", this.blockPosition(node)); | |||
data.put("name", name); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(MethodDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertMethodDeclaration("\"" + name + "\"", data); | |||
} | |||
public boolean visit(MethodInvocation node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
Name nameObj = node.getName(); | |||
String name = nameObj.isQualifiedName() ? | |||
((QualifiedName) nameObj).getFullyQualifiedName() : | |||
((SimpleName) nameObj).getIdentifier(); | |||
data.put("coord", this.blockPosition(node)); | |||
data.put("name", name); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(MethodInvocation node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertMethodInvocation("\"" + name + "\"", data); | |||
} | |||
public boolean visit(PackageDeclaration node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(PackageDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.setPackage(name); | |||
} | |||
public boolean visit(TypeDeclaration node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
data.put("coord", this.blockPosition(node)); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(TypeDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
if (node.isInterface()) { | |||
this.symbols.insertInterfaceDeclaration("\"" + name + "\"", data); | |||
} else { | |||
this.symbols.insertClassDeclaration("\"" + name + "\"", data); | |||
} | |||
} | |||
public boolean visit(VariableDeclarationFragment node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
data.put("coord", this.blockPosition(node)); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(VariableDeclarationFragment node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
this.symbols.insertVariableDeclaration("\"" + name + "\"", data); | |||
} | |||
public boolean visit(QualifiedName node) { | |||
if (!this._cache.empty()) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
if(!data.containsKey("name")) { | |||
String name = node.getFullyQualifiedName(); | |||
data.put("name", name); | |||
} | |||
this._cache.push(data); | |||
} | |||
return true; | |||
} | |||
public boolean visit(SimpleName node) { | |||
if (!this._cache.empty()) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
if(!data.containsKey("name")) { | |||
String name = node.getIdentifier(); | |||
data.put("name", name); | |||
} | |||
this._cache.push(data); | |||
} | |||
return true; | |||
} | |||
public boolean visit(ImportDeclaration node) { | |||
HashMap<String, Object> data = new HashMap<String, Object>(); | |||
data.put("coord", this.blockPosition(node)); | |||
this._cache.push(data); | |||
return true; | |||
} | |||
public void endVisit(ImportDeclaration node) { | |||
HashMap<String, Object> data = this._cache.pop(); | |||
String name = (String)data.remove("name"); | |||
String[] parts = name.split("\\."); | |||
for(int i = parts.length; i > 1; i--) { | |||
String pkg = Joiner.on(".").join(Arrays.copyOfRange(parts, 0, i)); | |||
this.symbols.insertImportStatement("\"" + pkg + "\"", data); | |||
} | |||
} | |||
} | |||
} |
@@ -0,0 +1,71 @@ | |||
package com.bitshift.parsing.parsers; | |||
import java.util.Formatter; | |||
import java.io.BufferedReader; | |||
import java.io.BufferedWriter; | |||
import java.io.InputStreamReader; | |||
import java.io.OutputStreamWriter; | |||
import java.io.IOException; | |||
import java.nio.ByteBuffer; | |||
import java.net.Socket; | |||
import com.bitshift.parsing.symbols.Symbols; | |||
public abstract class Parser implements Runnable { | |||
protected Socket clientSocket; | |||
private String eos; | |||
public Parser(Socket clientSocket) { | |||
this.clientSocket = clientSocket; | |||
} | |||
protected String readFromClient() { | |||
String fromClient = ""; | |||
try { | |||
BufferedReader clientReader = new BufferedReader( | |||
new InputStreamReader(this.clientSocket.getInputStream())); | |||
int bytes = Integer.parseInt(clientReader.readLine()); | |||
this.eos = clientReader.readLine(); | |||
StringBuilder builder = new StringBuilder(); | |||
int i = 0; | |||
while(i < bytes) { | |||
char aux = (char)clientReader.read(); | |||
builder.append(aux); | |||
i++; | |||
} | |||
fromClient = builder.toString(); | |||
} catch (IOException ex) { | |||
} | |||
return fromClient; | |||
} | |||
protected void writeToClient(String toClient) { | |||
try { | |||
BufferedWriter clientWriter = new BufferedWriter( | |||
new OutputStreamWriter(this.clientSocket.getOutputStream())); | |||
clientWriter.write(toClient); | |||
clientWriter.write(eos); | |||
clientWriter.flush(); | |||
this.clientSocket.close(); | |||
} catch (IOException ex) { | |||
} | |||
} | |||
protected abstract Symbols genSymbols(); | |||
public abstract void run(); | |||
} | |||
@@ -0,0 +1,177 @@ | |||
package com.bitshift.parsing.symbols; | |||
import java.util.HashMap; | |||
import java.util.ArrayList; | |||
import com.bitshift.parsing.symbols.Symbols; | |||
/*TODO: Overwrite toString.*/ | |||
public class JavaSymbols extends Symbols { | |||
private String _packageName; | |||
private HashMap<String, HashMap<String, Object>> _classes; | |||
private HashMap<String, HashMap<String, Object>> _interfaces; | |||
private HashMap<String, HashMap<String, Object>> _methods; | |||
private HashMap<String, HashMap<String, Object>> _vars; | |||
private HashMap<String, HashMap<String, Object>> _imports; | |||
private final String assignKey = "\"assignments\""; | |||
private final String useKey = "\"uses\""; | |||
public JavaSymbols() { | |||
_packageName = null; | |||
_classes = new HashMap<String, HashMap<String, Object>>(); | |||
_interfaces = new HashMap<String, HashMap<String, Object>>(); | |||
_methods = new HashMap<String, HashMap<String, Object>>(); | |||
_vars = new HashMap<String, HashMap<String, Object>>(); | |||
_imports = new HashMap<String, HashMap<String, Object>>(); | |||
} | |||
public boolean setPackage(String name) { | |||
_packageName = name; | |||
return true; | |||
} | |||
public boolean insertClassDeclaration(String name, HashMap<String, Object> data) { | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
HashMap<String, Object> klass = new HashMap<String, Object>(); | |||
assignments.add(data.get("coord")); | |||
klass.put(assignKey, assignments); | |||
klass.put(useKey, uses); | |||
this._classes.put(name, klass); | |||
return true; | |||
} | |||
public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) { | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
HashMap<String, Object> klass = new HashMap<String, Object>(); | |||
assignments.add(data.get("coord")); | |||
klass.put(assignKey, assignments); | |||
klass.put(useKey, uses); | |||
this._interfaces.put(name, klass); | |||
return true; | |||
} | |||
public boolean insertMethodDeclaration(String name, HashMap<String, Object> data) { | |||
HashMap<String, Object> method = this._methods.get(name); | |||
if (method == null) { | |||
method = new HashMap<String, Object>(); | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
assignments.add(data.get("coord")); | |||
method.put(assignKey, assignments); | |||
method.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> assignments = (ArrayList<Object>)method.get(assignKey); | |||
assignments.add(data.get("coord")); | |||
method.put(assignKey, assignments); | |||
} | |||
this._methods.put(name, method); | |||
return true; | |||
} | |||
public boolean insertMethodInvocation(String name, HashMap<String, Object> data) { | |||
HashMap<String, Object> method = this._methods.get(name); | |||
if (method == null) { | |||
method = new HashMap<String, Object>(); | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
uses.add(data.get("coord")); | |||
method.put(assignKey, assignments); | |||
method.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> uses = (ArrayList<Object>)method.get(useKey); | |||
uses.add(data.get("coord")); | |||
method.put(useKey, uses); | |||
} | |||
this._methods.put(name, method); | |||
return true; | |||
} | |||
public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) { | |||
HashMap<String, Object> var = this._vars.get(name); | |||
if (var == null) { | |||
var = new HashMap<String, Object>(); | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
assignments.add(data.get("coord")); | |||
var.put(assignKey, assignments); | |||
var.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> assignments = (ArrayList<Object>)var.get(assignKey); | |||
assignments.add(data.get("coord")); | |||
var.put(assignKey, assignments); | |||
} | |||
this._vars.put(name, var); | |||
return true; | |||
} | |||
public boolean insertVariableAccess(String name, HashMap<String, Object> data) { | |||
HashMap<String, Object> var = this._vars.get(name); | |||
if (var == null) { | |||
var = new HashMap<String, Object>(); | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
uses.add(data.get("coord")); | |||
var.put(assignKey, assignments); | |||
var.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> uses = (ArrayList<Object>)var.get(useKey); | |||
uses.add(data.get("coord")); | |||
var.put(useKey, uses); | |||
} | |||
this._vars.put(name, var); | |||
return true; | |||
} | |||
public boolean insertImportStatement(String name, HashMap<String, Object> data) { | |||
HashMap<String, Object> lib = this._imports.get(name); | |||
if (lib == null) { | |||
lib = new HashMap<String, Object>(); | |||
ArrayList<Object> assignments = new ArrayList<Object>(10); | |||
ArrayList<Object> uses = new ArrayList<Object>(10); | |||
uses.add(data.get("coord")); | |||
lib.put(assignKey, assignments); | |||
lib.put(useKey, uses); | |||
} else { | |||
ArrayList<Object> uses = (ArrayList<Object>)lib.get(useKey); | |||
uses.add(data.get("coord")); | |||
lib.put(useKey, uses); | |||
} | |||
this._imports.put(name, lib); | |||
return true; | |||
} | |||
public String toString() { | |||
StringBuilder builder = new StringBuilder(); | |||
builder.append("\"classes\":" + this._classes + ","); | |||
builder.append("\"interfaces\":" + this._interfaces + ","); | |||
builder.append("\"functions\":" + this._methods + ","); | |||
builder.append("\"vars\":" + this._vars + ","); | |||
builder.append("\"imports\":" + this._imports + ","); | |||
String s = builder.toString().replaceAll("=", ":"); | |||
s = s.substring(0, s.length() - 1); | |||
return "{" + s + "}"; | |||
} | |||
} | |||
@@ -0,0 +1,17 @@ | |||
package com.bitshift.parsing.symbols; | |||
import java.util.ArrayList; | |||
public abstract class Symbols { | |||
public Symbols() { | |||
} | |||
public static ArrayList<Integer> createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) { | |||
ArrayList<Integer> coord = new ArrayList<Integer>(4); | |||
coord.add(startLine); coord.add(startCol); coord.add(endLine); coord.add(endCol); | |||
return coord; | |||
} | |||
} |
@@ -0,0 +1,4 @@ | |||
source 'https://rubygems.org' | |||
gem 'ruby_parser' | |||
gem 'sexp_processor' |
@@ -0,0 +1,6 @@ | |||
require 'pp' | |||
require File.expand_path('../lib/parser.rb', __FILE__) | |||
task :parse do |t| | |||
parse | |||
end |
@@ -0,0 +1,137 @@ | |||
require 'ripper' | |||
def parse | |||
source = STDIN.read | |||
walker = TreeWalker.new(source) | |||
walker.parse | |||
puts walker.to_s | |||
end | |||
class TreeWalker < Ripper::SexpBuilder | |||
attr_accessor :symbols | |||
def initialize(source) | |||
ns_hash = Hash.new { | |||
|hash, key| | |||
hash[key] = { | |||
:assignments => [], :uses => [] | |||
} | |||
} | |||
class_hash = ns_hash.clone | |||
function_hash = ns_hash.clone | |||
var_hash = ns_hash.clone | |||
@symbols = { | |||
:namespaces => ns_hash, | |||
:classes => class_hash, | |||
:functions => function_hash, | |||
:vars => var_hash | |||
} | |||
super(source) | |||
end | |||
def block_position(node) | |||
last_node = node[0] | |||
while last_node.is_a? Array | |||
sp = last_node | |||
while not (last_el = last_node[last_node.count - 1]) or | |||
(last_el.is_a? Array and last_el[last_el.count - 1].nil?) | |||
last_node = last_node[0..last_node.count - 2] | |||
end | |||
last_node = last_el | |||
end | |||
last_node = node[0] | |||
while last_node.is_a? Array | |||
ep = last_node | |||
while not (last_el = last_node[last_node.count - 1]) or | |||
(last_el.is_a? Array and last_el[last_el.count - 1].nil?) | |||
last_node = last_node[0..last_node.count - 2] | |||
end | |||
last_node = last_el | |||
end | |||
if sp == ep | |||
return sp + [sp[0], -1] | |||
end | |||
return sp + ep | |||
end | |||
def on_module(*node) | |||
pos = block_position(node) | |||
name = node[0][1][1] | |||
symbols[:namespaces][name][:assignments] << pos | |||
return node | |||
end | |||
def on_class(*node) | |||
pos = block_position(node) | |||
name = node[0][1][1] | |||
symbols[:classes][name][:assignments] << pos | |||
return node | |||
end | |||
def on_def(*node) | |||
pos = block_position(node) | |||
name = node[0][1] | |||
symbols[:functions][name][:assignments] << pos | |||
return node | |||
end | |||
def on_call(*node) | |||
pos = block_position(node) | |||
name = node[node.count - 1][1] | |||
symbols[:functions][name][:uses] << pos | |||
return node | |||
end | |||
def on_vcall(*node) | |||
pos = block_position(node) | |||
name = node[0][1] | |||
symbols[:functions][name][:uses] << pos | |||
return node | |||
end | |||
def on_assign(*node) | |||
pos = block_position(node) | |||
return node if not node[0][0].is_a? Array | |||
name = node[0][0][1] | |||
symbols[:vars][name][:assignments] << pos | |||
return node | |||
end | |||
def on_var_field(*node) | |||
pos = block_position(node) | |||
name = node[0][1] | |||
symbols[:vars][name][:uses] << pos | |||
return node | |||
end | |||
def on_var_ref(*node) | |||
pos = block_position(node) | |||
name = node[0][1] | |||
symbols[:vars][name][:uses] << pos | |||
return node | |||
end | |||
def on_command(*node) | |||
# catch require statements | |||
end | |||
def to_s | |||
new_symbols = Hash.new {|hash, key| hash[key] = Hash.new} | |||
symbols.each do |type, sym_list| | |||
sym_list.each do |name, sym| | |||
new_symbols[type.to_s][name.to_s] = { | |||
"assignments" => sym[:assignments], | |||
"uses" => sym[:uses]} | |||
end | |||
end | |||
str = new_symbols.to_s | |||
str = str.gsub(/=>/, ":") | |||
return str | |||
end | |||
end |
@@ -0,0 +1,14 @@ | |||
from setuptools import setup, find_packages | |||
setup( | |||
name = "bitshift", | |||
version = "0.1.dev", | |||
packages = find_packages(), | |||
install_requires = [ | |||
"Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0", | |||
"GitPython>=0.3.2.RC1", "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", | |||
"mmh3>=2.3", "PyYAML>=3.11", "python-dateutil>=2.2", "cchardet>=0.3.5"], | |||
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", | |||
license = "MIT", | |||
url = "https://github.com/earwig/bitshift" | |||
) |
@@ -0,0 +1,65 @@ | |||
td.linenos { background: rgba(65,131,196,0.05); padding-right: 10px; border-right: 1px solid #bbb; } | |||
span.lineno { background: rgba(65,131,196,0.05); padding: 0 5px 0 5px; } | |||
pre { line-height: 125% } | |||
.highlighttable { background-color: #fff; padding-left: 10px; width: inherit; height: inherit; } | |||
.hll { display: block } | |||
.c { color: #999988; font-style: italic } /* Comment */ | |||
.err { color: #a61717; background-color: #e3d2d2 } /* Error */ | |||
.k { color: #000000; font-weight: bold } /* Keyword */ | |||
.o { color: #000000; font-weight: bold } /* Operator */ | |||
.cm { color: #999988; font-style: italic } /* Comment.Multiline */ | |||
.cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */ | |||
.c1 { color: #999988; font-style: italic } /* Comment.Single */ | |||
.cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ | |||
.gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ | |||
.ge { color: #000000; font-style: italic } /* Generic.Emph */ | |||
.gr { color: #aa0000 } /* Generic.Error */ | |||
.gh { color: #999999 } /* Generic.Heading */ | |||
.gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ | |||
.go { color: #888888 } /* Generic.Output */ | |||
.gp { color: #555555 } /* Generic.Prompt */ | |||
.gs { font-weight: bold } /* Generic.Strong */ | |||
.gu { color: #aaaaaa } /* Generic.Subheading */ | |||
.gt { color: #aa0000 } /* Generic.Traceback */ | |||
.kc { color: #000000; font-weight: bold } /* Keyword.Constant */ | |||
.kd { color: #000000; font-weight: bold } /* Keyword.Declaration */ | |||
.kn { color: #000000; font-weight: bold } /* Keyword.Namespace */ | |||
.kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */ | |||
.kr { color: #000000; font-weight: bold } /* Keyword.Reserved */ | |||
.kt { color: #445588; font-weight: bold } /* Keyword.Type */ | |||
.m { color: #009999 } /* Literal.Number */ | |||
.s { color: #d01040 } /* Literal.String */ | |||
.na { color: #008080 } /* Name.Attribute */ | |||
.nb { color: #0086B3 } /* Name.Builtin */ | |||
.nc { color: #445588; font-weight: bold } /* Name.Class */ | |||
.no { color: #008080 } /* Name.Constant */ | |||
.nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */ | |||
.ni { color: #800080 } /* Name.Entity */ | |||
.ne { color: #990000; font-weight: bold } /* Name.Exception */ | |||
.nf { color: #990000; font-weight: bold } /* Name.Function */ | |||
.nl { color: #990000; font-weight: bold } /* Name.Label */ | |||
.nn { color: #555555 } /* Name.Namespace */ | |||
.nt { color: #000080 } /* Name.Tag */ | |||
.nv { color: #008080 } /* Name.Variable */ | |||
.ow { color: #000000; font-weight: bold } /* Operator.Word */ | |||
.w { color: #bbbbbb } /* Text.Whitespace */ | |||
.mf { color: #009999 } /* Literal.Number.Float */ | |||
.mh { color: #009999 } /* Literal.Number.Hex */ | |||
.mi { color: #009999 } /* Literal.Number.Integer */ | |||
.mo { color: #009999 } /* Literal.Number.Oct */ | |||
.sb { color: #d01040 } /* Literal.String.Backtick */ | |||
.sc { color: #d01040 } /* Literal.String.Char */ | |||
.sd { color: #d01040 } /* Literal.String.Doc */ | |||
.s2 { color: #d01040 } /* Literal.String.Double */ | |||
.se { color: #d01040 } /* Literal.String.Escape */ | |||
.sh { color: #d01040 } /* Literal.String.Heredoc */ | |||
.si { color: #d01040 } /* Literal.String.Interpol */ | |||
.sx { color: #d01040 } /* Literal.String.Other */ | |||
.sr { color: #009926 } /* Literal.String.Regex */ | |||
.s1 { color: #d01040 } /* Literal.String.Single */ | |||
.ss { color: #990073 } /* Literal.String.Symbol */ | |||
.bp { color: #999999 } /* Name.Builtin.Pseudo */ | |||
.vc { color: #008080 } /* Name.Variable.Class */ | |||
.vg { color: #008080 } /* Name.Variable.Global */ | |||
.vi { color: #008080 } /* Name.Variable.Instance */ | |||
.il { color: #009999 } /* Literal.Number.Integer.Long */ |
@@ -0,0 +1,64 @@ | |||
td.linenos { background-color: #f0f0f0; padding-right: 10px; } | |||
span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } | |||
pre { line-height: 125% } | |||
.highlighttable { background-color: #49483e; width: inherit; height: inherit; } | |||
.hll { display: block } | |||
{ background: #272822; color: #f8f8f2 } | |||
.c { color: #75715e } /* Comment */ | |||
.err { color: #960050; background-color: #1e0010 } /* Error */ | |||
.k { color: #66d9ef } /* Keyword */ | |||
.l { color: #ae81ff } /* Literal */ | |||
.n { color: #f8f8f2 } /* Name */ | |||
.o { color: #f92672 } /* Operator */ | |||
.p { color: #f8f8f2 } /* Punctuation */ | |||
.cm { color: #75715e } /* Comment.Multiline */ | |||
.cp { color: #75715e } /* Comment.Preproc */ | |||
.c1 { color: #75715e } /* Comment.Single */ | |||
.cs { color: #75715e } /* Comment.Special */ | |||
.ge { font-style: italic } /* Generic.Emph */ | |||
.gs { font-weight: bold } /* Generic.Strong */ | |||
.kc { color: #66d9ef } /* Keyword.Constant */ | |||
.kd { color: #66d9ef } /* Keyword.Declaration */ | |||
.kn { color: #f92672 } /* Keyword.Namespace */ | |||
.kp { color: #66d9ef } /* Keyword.Pseudo */ | |||
.kr { color: #66d9ef } /* Keyword.Reserved */ | |||
.kt { color: #66d9ef } /* Keyword.Type */ | |||
.ld { color: #e6db74 } /* Literal.Date */ | |||
.m { color: #ae81ff } /* Literal.Number */ | |||
.s { color: #e6db74 } /* Literal.String */ | |||
.na { color: #a6e22e } /* Name.Attribute */ | |||
.nb { color: #f8f8f2 } /* Name.Builtin */ | |||
.nc { color: #a6e22e } /* Name.Class */ | |||
.no { color: #66d9ef } /* Name.Constant */ | |||
.nd { color: #a6e22e } /* Name.Decorator */ | |||
.ni { color: #f8f8f2 } /* Name.Entity */ | |||
.ne { color: #a6e22e } /* Name.Exception */ | |||
.nf { color: #a6e22e } /* Name.Function */ | |||
.nl { color: #f8f8f2 } /* Name.Label */ | |||
.nn { color: #f8f8f2 } /* Name.Namespace */ | |||
.nx { color: #a6e22e } /* Name.Other */ | |||
.py { color: #f8f8f2 } /* Name.Property */ | |||
.nt { color: #f92672 } /* Name.Tag */ | |||
.nv { color: #f8f8f2 } /* Name.Variable */ | |||
.ow { color: #f92672 } /* Operator.Word */ | |||
.w { color: #f8f8f2 } /* Text.Whitespace */ | |||
.mf { color: #ae81ff } /* Literal.Number.Float */ | |||
.mh { color: #ae81ff } /* Literal.Number.Hex */ | |||
.mi { color: #ae81ff } /* Literal.Number.Integer */ | |||
.mo { color: #ae81ff } /* Literal.Number.Oct */ | |||
.sb { color: #e6db74 } /* Literal.String.Backtick */ | |||
.sc { color: #e6db74 } /* Literal.String.Char */ | |||
.sd { color: #e6db74 } /* Literal.String.Doc */ | |||
.s2 { color: #e6db74 } /* Literal.String.Double */ | |||
.se { color: #ae81ff } /* Literal.String.Escape */ | |||
.sh { color: #e6db74 } /* Literal.String.Heredoc */ | |||
.si { color: #e6db74 } /* Literal.String.Interpol */ | |||
.sx { color: #e6db74 } /* Literal.String.Other */ | |||
.sr { color: #e6db74 } /* Literal.String.Regex */ | |||
.s1 { color: #e6db74 } /* Literal.String.Single */ | |||
.ss { color: #e6db74 } /* Literal.String.Symbol */ | |||
.bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ | |||
.vc { color: #f8f8f2 } /* Name.Variable.Class */ | |||
.vg { color: #f8f8f2 } /* Name.Variable.Global */ | |||
.vi { color: #f8f8f2 } /* Name.Variable.Instance */ | |||
.il { color: #ae81ff } /* Literal.Number.Integer.Long */ |
@@ -1,4 +0,0 @@ | |||
/* Global project stylesheet. | |||
*/ | |||
p { | |||
font-size: 1.5em; } |
@@ -0,0 +1 @@ | |||
google-site-verification: google10335120a3066831.html |
@@ -0,0 +1,19 @@ | |||
/* | |||
* @file Implements a parallax effect on the about page. | |||
*/ | |||
var lastVertPos = $(window).scrollTop(); | |||
/* | |||
* Scroll `div#img-[1-4]` at a greater speed than the text, producing a | |||
* parallax effect. | |||
*/ | |||
$(window).scroll(function(e){ | |||
var currVertPos = $(window).scrollTop(); | |||
var delta = currVertPos - lastVertPos; | |||
$(".bg").each(function(){ | |||
$(this).css("top", parseFloat($(this).css("top")) - | |||
delta * $(this).attr("speed") + "px"); | |||
}); | |||
lastVertPos = currVertPos; | |||
}); |
@@ -0,0 +1,175 @@ | |||
/* | |||
* @file Manages all advanced search form logic. | |||
*/ | |||
var searchGroups = $("div#search-groups"); | |||
/* | |||
* Load all advanced search form libraries. | |||
*/ | |||
function loadInputFieldWidgets(){ | |||
$(".search-group input#date-last-modified").datepicker(); | |||
$(".search-group input#date-created").datepicker(); | |||
$(".search-group input#autocomplete").autocomplete({ | |||
source: function(request, response){ | |||
var matcher = new RegExp( | |||
$.ui.autocomplete.escapeRegex(request.term), "i"); | |||
response($.grep(AUTOCOMPLETE_LANGUAGES, function(item){ | |||
return matcher.test(item); | |||
})); | |||
} | |||
}); | |||
}; | |||
loadInputFieldWidgets(); | |||
/* | |||
* Set all advanced search form button callbacks. | |||
*/ | |||
(function setSearchFormCallbacks(){ | |||
// Create a new search group, and update the `#sidebar` checklist. | |||
$("button#add-group").click(function(){ | |||
$("div#sidebar input[type=checkbox]").prop("checked", false); | |||
searchGroups.children("#selected").removeAttr("id"); | |||
var searchGroup = $("<div/>", { | |||
class : "search-group", | |||
id : "selected" | |||
}); | |||
searchGroups.append( | |||
searchGroup.append(createSearchGroupInput("language", "languages"))); | |||
loadInputFieldWidgets(); | |||
$("div#sidebar input[type=checkbox]#language").prop("checked", true); | |||
searchGroups[0].scrollTop = searchGroups[0].scrollHeight; | |||
}); | |||
// Remove the currently selected group if it's not the only one, and mark | |||
// one of its siblings as selected. | |||
$("button#remove-group").click(function(){ | |||
var currentGroup = $("div.search-group#selected"); | |||
if($("div.search-group").length == 1) | |||
return; | |||
else { | |||
var nextGroup = currentGroup.prev(); | |||
if(nextGroup.size() == 0) | |||
nextGroup = currentGroup.next(); | |||
} | |||
currentGroup.remove(); | |||
nextGroup.click(); | |||
}); | |||
// Select a search group, and update the `#sidebar` checklist accordingly. | |||
$(document).on("click", "div.search-group", function(){ | |||
searchGroups.children("#selected").removeAttr("id"); | |||
$(this).attr("id", "selected"); | |||
$("div#sidebar input[type=checkbox]").prop("checked", false); | |||
$(this).find("input[type=text]").each(function(){ | |||
var checkBoxSelector = "div#sidebar input[type=checkbox]"; | |||
$(checkBoxSelector + "#" + $(this).attr("class").split(" ")[0]). | |||
prop("checked", true); | |||
}) | |||
}); | |||
// Toggle the presence of an input field. | |||
$("div#sidebar input[type=checkbox]").click(function(){ | |||
var fieldId = $(this).prop("id"); | |||
if($(this).is(":checked")){ | |||
$("div.search-group#selected").append( | |||
$.parseHTML(createSearchGroupInput( | |||
fieldId, $(this).next("label").children("div"). | |||
text()))); | |||
loadInputFieldWidgets(); | |||
if(fieldId.slice(0, 4) == "date") | |||
$(".search-group#selected ." + fieldId).datepicker(); | |||
} | |||
else { | |||
if($(".search-group#selected").children("div").length > 1) | |||
$(".search-group#selected #" + fieldId).remove() | |||
else | |||
$(this).prop("checked", true); | |||
} | |||
searchGroups[0].scrollTop = searchGroups[0].scrollHeight; | |||
}); | |||
$("div#advanced-search button#submit").click(function(){ | |||
$("div#advanced-search").hide(); | |||
advancedSearchButton.removeClass("clicked"); | |||
assembleQuery(); | |||
populateResults(); | |||
}) | |||
var previousAdvancedQuery = ""; | |||
var searchBar = $("form#search-bar input[name=query]"); | |||
window.setInterval(function(){ | |||
var currentQuery = assembleQuery(); | |||
if(currentQuery != previousAdvancedQuery){ | |||
previousAdvancedQuery = currentQuery; | |||
searchBar.val(assembleQuery()); | |||
} | |||
}, 1e3 / 15); | |||
}()); | |||
/* | |||
* Return an HTML string representing a new input field div in a search group. | |||
* | |||
* @param fieldId The id of the input field div, and its child elements. | |||
* @param name The name to display next to the input field. | |||
*/ | |||
function createSearchGroupInput(fieldId, name){ | |||
var fieldHTML = [ | |||
"<div id='" + fieldId + "'>", | |||
"<div class='name'>" + name + "</div>", | |||
"<input class='" + fieldId + "' name='" + fieldId + "'type='text'>", | |||
"<input type='checkbox' name='regex'>", | |||
"<span class='regex'>Regex</span>", | |||
"</div>" | |||
] | |||
if(fieldId == "language") | |||
fieldHTML[2] = [ | |||
"<input id='autocomplete' class='language'", | |||
"name='language' type='text'>" | |||
].join(" "); | |||
return fieldHTML.join(""); | |||
} | |||
/* | |||
* Create a query from advanced-search groups. | |||
*/ | |||
function assembleQuery(){ | |||
var groups = searchGroups.children(".search-group"); | |||
var groupQueries = []; | |||
for(var group = 0; group < groups.length; group++){ | |||
var inputFields = groups[group].querySelectorAll("input[type=text]"); | |||
var regexCheckbox = groups[group].querySelectorAll("input[name=regex]"); | |||
var groupQuery = []; | |||
for(var field = 0; field < inputFields.length; field++) | |||
if(inputFields[field].value.length > 0) | |||
groupQuery.push(genFieldQueryString( | |||
inputFields[field], regexCheckbox[field].checked)); | |||
if(groupQuery.length > 0) | |||
groupQueries.push(groupQuery.join(" AND ")); | |||
} | |||
return groupQueries.join(" OR "); | |||
} | |||
/* | |||
* Generate a processed query string for an input field's value. | |||
* | |||
* @param field (DOM element) An `input[type=text]` element. | |||
* @param hasRegex (boolean) Whether or not the field's value has regex. | |||
* | |||
* @return The processed query. | |||
*/ | |||
function genFieldQueryString(field, hasRegex){ | |||
var terms = field.value.replace(/\\/g, "\\\\").replace(/\"/g, "\\\""); | |||
var query = field.getAttribute("name") + ":" + (hasRegex?"re:":"") + terms; | |||
return '"' + query + '"'; | |||
} |
@@ -0,0 +1,447 @@ | |||
/* | |||
* @file Manages all library initialization, jQuery callbacks, query entry | |||
* callbacks, server querying, and results diplay for `index.html`. | |||
*/ | |||
var advancedSearchDiv = $("div#advanced-search"); | |||
var advancedSearchButton = $("button#advanced-search"); | |||
FINISH_TYPING_INTERVAL = 650; | |||
var searchBar = $("form#search-bar input[type='text']")[0]; | |||
var resultsDiv = $("div#results")[0]; | |||
var typingTimer, scrollTimer, lastValue; | |||
var searchResultsPage = 1; | |||
/* | |||
* Set all page callbacks. | |||
*/ | |||
(function setHomePageCallbabacks(){ | |||
var results = $('#results').get(0); | |||
// Enable infinite scrolling down the results page. | |||
$(window).scroll(function(){ | |||
if($(window).scrollTop() + $(window).height() == $(document).height() && | |||
resultsDiv.querySelectorAll(".result").length > 0) | |||
loadMoreResults(); | |||
clearTimeout(scrollTimer); | |||
if (!results.classList.contains('disable-hover')) | |||
results.classList.add('disable-hover') | |||
scrollTimer = setTimeout(function(){ | |||
if (results.classList.contains('disable-hover')) | |||
results.classList.remove('disable-hover'); | |||
}, 200); | |||
}); | |||
// Toggle the advanced-search form's visibility. | |||
advancedSearchButton.click(function(){ | |||
var searchField = $("div#search-field"); | |||
if(!advancedSearchDiv.hasClass("visible")){ | |||
searchField.addClass("partly-visible"); | |||
advancedSearchDiv.fadeIn(500).addClass("visible"); | |||
advancedSearchButton.addClass("clicked"); | |||
} | |||
else { | |||
advancedSearchDiv.hide().removeClass("visible"); | |||
advancedSearchButton.removeClass("clicked"); | |||
if($("div#results .result").length == 0) | |||
searchField.removeClass("partly-visible"); | |||
clearResults(); | |||
} | |||
}); | |||
// Enable capturing the `enter` key. | |||
$("form#search-bar").submit(function(event){ | |||
event.preventDefault(); | |||
return false; | |||
}); | |||
searchBar.onkeyup = typingTimer; | |||
}()); | |||
/* | |||
* Set keyboard shortcut mappings. | |||
*/ | |||
(function resultsHotkeys(){ | |||
/* | |||
* If the currently viewed result is not the first, scroll to the previous | |||
* result. | |||
*/ | |||
var previousResult = function(){ | |||
var currResult = $(".display-all"); | |||
if(currResult.length) { | |||
currResult.removeClass("display-all"); | |||
currResult = currResult.closest(".result").prev(".result"); | |||
} else { | |||
currResult = $(document.querySelectorAll(".result")[0]); | |||
} | |||
currResult.addClass("display-all"); | |||
currResult.each(function(){ | |||
$('html,body').stop().animate({ | |||
scrollTop: $(this).offset().top - ( | |||
$(window).height() - $(this).outerHeight(true)) / 2 | |||
}, 140); | |||
}); | |||
}; | |||
/* | |||
* If the currently viewed result is not the last, scroll to the next | |||
* result. | |||
*/ | |||
var nextResult = function(){ | |||
var currResult = $(".display-all"); | |||
if(currResult.length) { | |||
currResult.removeClass("display-all"); | |||
currResult = currResult.closest(".result").next(".result"); | |||
} else { | |||
currResult = $(document.querySelectorAll(".result")[0]); | |||
} | |||
currResult.addClass('display-all'); | |||
currResult.each(function(){ | |||
$('html,body').stop().animate({ | |||
scrollTop: $(this).offset().top - ( | |||
$(window).height() - $(this).outerHeight(true)) / 2 | |||
}, 140); | |||
}); | |||
}; | |||
var displayHotkeyHelp = function(){ | |||
var help = $("div#hotkey-help"); | |||
if(help.hasClass("hidden")) | |||
help.fadeIn(420); | |||
else | |||
help.fadeOut(420); | |||
$("div#body").toggleClass("faded"); | |||
help.toggleClass("hidden"); | |||
} | |||
var hotkeyActions = { | |||
"k" : previousResult, | |||
"j" : nextResult, | |||
"h" : previousSymbolMatch, | |||
"l" : nextSymbolMatch, | |||
"?" : displayHotkeyHelp | |||
}; | |||
$(window).keypress(function(key){ | |||
for(var hotkey in hotkeyActions){ | |||
var keyChar = String.fromCharCode(key.keyCode); | |||
if(keyChar == hotkey && | |||
!($(key.target).is("textarea") || $(key.target).is("input"))) | |||
hotkeyActions[keyChar](); | |||
} | |||
}); | |||
}()); | |||
// Enable infinite scrolling down the results page. | |||
$(window).scroll(function() { | |||
var searchField = $("div#search-field"); | |||
if($(window).scrollTop() + $(window).height() == $(document).height() && | |||
searchField.hasClass('partly-visible')){ | |||
loadMoreResults(); | |||
} | |||
}); | |||
/* | |||
* Clear the existing timer and set a new one the the user types text into the | |||
* search bar. | |||
*/ | |||
function typingTimer(event){ | |||
clearTimeout(typingTimer); | |||
var enterKeyCode = 13; | |||
if(event.keyCode != enterKeyCode){ | |||
if(lastValue != searchBar.value) | |||
typingTimer = setTimeout(finishedTyping, FINISH_TYPING_INTERVAL); | |||
} | |||
else { | |||
event.preventDefault(); | |||
finishedTyping(); | |||
return false; | |||
} | |||
}; | |||
/* | |||
* Callback which queries the server whenver the user stops typing. | |||
* | |||
* Whenever the user doesn't type for a `FINISH_TYPING_INTERVAL` after having | |||
* entered new text in the search bar, send the current query request to the | |||
* server. | |||
*/ | |||
function finishedTyping(){ | |||
lastValue = searchBar.value; | |||
var searchField = $("div#search-field"); | |||
clearResults(); | |||
if(searchBar.value){ | |||
searchField.addClass("partly-visible"); | |||
populateResults(); | |||
} | |||
else { | |||
searchField.removeClass("partly-visible"); | |||
$("div#advanced-search").fadeOut(50); | |||
advancedSearchButton.removeClass("clicked"); | |||
clearResults(); | |||
} | |||
} | |||
/* | |||
* Removes any child elements of `div#results`. | |||
*/ | |||
function clearResults(){ | |||
while(resultsDiv.firstChild) | |||
resultsDiv.removeChild(resultsDiv.firstChild); | |||
} | |||
/* | |||
* Create a result element based upon a codelet instance. | |||
* | |||
* @return {Element} The result element. | |||
*/ | |||
function createResult(codelet) { | |||
var maxAttributeLength = 20; | |||
//Level 1 | |||
var newDiv = document.createElement("div"), | |||
table = document.createElement("table"), | |||
row = document.createElement("tr"); | |||
//Level 2 | |||
var displayInfo = document.createElement("div"), | |||
codeElt = document.createElement("td"), | |||
hiddenInfoContainer = document.createElement("td"), | |||
hiddenInfo = document.createElement("div"), | |||
cycle = document.createElement("div"); | |||
//Level 3 | |||
var title = document.createElement("span"), | |||
site = document.createElement("span"), | |||
nextMatch = document.createElement("a"), | |||
prevMatch = document.createElement("a"), | |||
dateModified = document.createElement("div"), | |||
language = document.createElement("div"), | |||
dateCreated = document.createElement("div"), | |||
authors = document.createElement("div"); | |||
//Classes and ID's | |||
newDiv.classList.add('result'); | |||
displayInfo.id = 'display-info'; | |||
codeElt.id = 'code'; | |||
hiddenInfo.id = 'hidden-info'; | |||
cycle.id = 'cycle-matches' | |||
title.id = 'title'; | |||
site.id = 'site'; | |||
nextMatch.id = 'next-match'; | |||
nextMatch.href = '#'; | |||
prevMatch.id = 'prev-match'; | |||
prevMatch.href = '#'; | |||
dateModified.id = 'date-modified'; | |||
language.id = 'language'; | |||
dateCreated.id = 'date-created'; | |||
authors.id = 'authors'; | |||
//Add the bulk of the html | |||
title.innerHTML = ' » <a href="' + codelet.url + '">' | |||
+ codelet.name + '</a>'; | |||
site.innerHTML = '<a href="' + codelet.origin[1] + '">' + | |||
codelet.origin[0] +'</a>'; | |||
nextMatch.innerHTML = 'next match'; | |||
prevMatch.innerHTML = 'prev match'; | |||
language.innerHTML = 'Language: <span>' + codelet.lang + '</span>'; | |||
dateModified.innerHTML = 'Last modified: <span>' + codelet.modified + | |||
'</span>'; | |||
// Needs to be changed from int to string on the server | |||
dateCreated.innerHTML = 'Created: <span>' + | |||
codelet.created.substring(0, maxAttributeLength) + '</span>'; | |||
var authorsHtml = 'Authors: <span>'; | |||
var currLength = 0; | |||
var authorsList = []; | |||
for(var auth = 0; auth < codelet.authors.length; auth++){ | |||
currLength += codelet.authors[auth].length; | |||
if(6 < currLength){ | |||
authorsList.push("..."); | |||
break; | |||
} | |||
else | |||
authorsList.push('<a href=#>' + codelet.authors[auth] + '</a>'); | |||
} | |||
authors.innerHTML = "Authors: <span>" + authorsList.join(" ") + "</span>"; | |||
// Needs to be processed on the server | |||
codeElt.innerHTML = '<div id=tablecontainer>' + codelet.code + '</div>'; | |||
//Event binding | |||
$(newDiv).on('mousemove', function(e) { | |||
var holdCondition = $('.disable-hover'); | |||
if(holdCondition.length == 0) { | |||
$(this).siblings().removeClass('display-all'); | |||
$(this).addClass('display-all'); | |||
} | |||
}); | |||
$(newDiv).on('mouseleave', function(e) { | |||
var holdCondition = $('.disable-hover'); | |||
if(holdCondition.length == 0) | |||
$(this).removeClass('display-all'); | |||
}); | |||
$(nextMatch).click(function(e) { | |||
e.stopPropagation(); | |||
e.preventDefault(); | |||
nextSymbolMatch(); | |||
}); | |||
$(prevMatch).click(function(e) { | |||
e.stopPropagation(); | |||
e.preventDefault(); | |||
previousSymbolMatch(); | |||
}); | |||
//Finish and append elements to parent elements | |||
hiddenInfo.appendChild(dateCreated); | |||
hiddenInfo.appendChild(dateModified); | |||
hiddenInfo.appendChild(language); | |||
hiddenInfo.appendChild(authors); | |||
hiddenInfoContainer.appendChild(hiddenInfo); | |||
row.appendChild(codeElt); | |||
row.appendChild(hiddenInfoContainer); | |||
table.appendChild(row); | |||
displayInfo.appendChild(site); | |||
displayInfo.appendChild(title); | |||
cycle.appendChild(prevMatch); | |||
cycle.appendChild(nextMatch); | |||
newDiv.appendChild(displayInfo); | |||
newDiv.appendChild(table); | |||
return newDiv; | |||
} | |||
function previousSymbolMatch() { | |||
var currResult = $(".display-all"), | |||
currMatch = currResult.find(".hll.current"), | |||
matches = currResult.find(".hll"), | |||
scrollDiv = currResult.find("#tablecontainer"); | |||
if (currMatch.length == 0) | |||
currMatch = matches[0]; | |||
else | |||
currMatch.removeClass('current'); | |||
var index = matches.index(currMatch.get(0)) - 1; | |||
index = index <= 0 ? matches.length - 1 : index; | |||
var newMatch = $(matches[index]); | |||
scrollDiv.scrollTop(scrollDiv.scrollTop() | |||
- scrollDiv.height() / 2 | |||
+ newMatch.position().top + newMatch.height() / 2); | |||
newMatch.effect("highlight", {color: '#FFF'}, 750) | |||
newMatch.addClass('current'); | |||
}; | |||
function nextSymbolMatch() { | |||
var currResult = $(".display-all"), | |||
currMatch = currResult.find(".hll.current"), | |||
matches = currResult.find(".hll"), | |||
scrollDiv = currResult.find("#tablecontainer"); | |||
if (currMatch.length == 0) | |||
currMatch = $(matches[0]); | |||
else | |||
currMatch.removeClass("current"); | |||
var index = matches.index(currMatch.get(0)) + 1; | |||
index = index >= matches.length ? 0 : index; | |||
var newMatch = $(matches[index]); | |||
scrollDiv.scrollTop(scrollDiv.scrollTop() | |||
- scrollDiv.height() / 2 | |||
+ newMatch.position().top + newMatch.height() / 2); | |||
newMatch.effect("highlight", {color: "#FFF"}, 750) | |||
newMatch.addClass("current"); | |||
}; | |||
/* | |||
* AJAX the current query string to the server, and return its response. | |||
* | |||
* @return {Array} The server's response in the form of `div.result` DOM | |||
* elements, to fill `div#results`. | |||
*/ | |||
function queryServer(){ | |||
var queryUrl = document.URL + "search.json?" + $.param({ | |||
"q" : searchBar.value, | |||
"p" : searchResultsPage++, | |||
"hl": 1 | |||
}); | |||
var results = $.Deferred(); | |||
$.getJSON(queryUrl, function(result){ | |||
var resultDivs = []; | |||
if("error" in result) | |||
insertErrorMessage(result["error"]); | |||
else if(result["results"].length == 0 && searchResultsPage == 2) | |||
insertErrorMessage("No search results."); | |||
else | |||
for(var codelet = 0; codelet < result["results"].length; codelet++) | |||
resultDivs.push(createResult(result["results"][codelet])); | |||
results.resolve(resultDivs); | |||
}); | |||
return results; | |||
} | |||
/* | |||
* Query the server with the current search string, and populate `div#results` | |||
* with its response. | |||
*/ | |||
function populateResults(){ | |||
searchResultsPage = 1; | |||
loadMoreResults(); | |||
} | |||
/* | |||
* Query the server for the next results page, and add its codelets to | |||
* `div#results`. | |||
*/ | |||
function loadMoreResults(){ | |||
queryServer().done(function(results){ | |||
for(var result = 0; result < results.length; result++){ | |||
var newDiv = results[result]; | |||
resultsDiv.appendChild(newDiv); | |||
setTimeout( | |||
(function(divReference){ | |||
return function(){ | |||
divReference.classList.add("cascade"); | |||
}; | |||
}(newDiv)), | |||
result * 20); | |||
} | |||
}); | |||
} | |||
/* | |||
* Displays a warning message in the UI. | |||
* | |||
* @param msg (str) The message string. | |||
*/ | |||
function insertErrorMessage(msg){ | |||
var error = $( | |||
[ | |||
"<div id='error'><span id='s1'>Error</span> ", | |||
"<span id='s2'>»</span> </div>" | |||
].join("")); | |||
error.append(msg); | |||
resultsDiv.appendChild(error[0]); | |||
} |
@@ -0,0 +1,6 @@ | |||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ | |||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), | |||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) | |||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga'); | |||
ga('create', 'UA-51910807-1', 'bitshift.it'); | |||
ga('send', 'pageview'); |
@@ -0,0 +1,3 @@ | |||
User-agent: * | |||
Disallow: /search.json | |||
Sitemap: http://www.bitshift.it/sitemap.xml |
@@ -0,0 +1,18 @@ | |||
a#logo | |||
letter-spacing: 0.3em | |||
text-decoration: none | |||
div#logo | |||
font-size: 400% | |||
padding-bottom: 0.2em | |||
text-align: center | |||
#logo-bit | |||
color: $baseColor1 | |||
#logo-angle | |||
color: $baseColor3 | |||
#logo-shift | |||
color: $baseColor2 | |||
font-style: italic |
@@ -1,11 +1,29 @@ | |||
/* | |||
Partial to contain all globally-applicable mixins | |||
Partial to contain all globally-applicable mixins. | |||
*/ | |||
// add vendor prefixes for the property $property with value $value | |||
// Add vendor prefixes for the property $property with value $value. | |||
@mixin vendor($property, $value) | |||
-webkit-#{$property}: $value | |||
-moz-#{$property}: $value | |||
-ms-#{$property}: $value | |||
-o-#{$property}: $value | |||
#{$property}: $value | |||
// Add portable opacity style. | |||
@mixin opaque($opacity) | |||
@include vendor(opacity, $opacity) | |||
filter: alpha(opacity=$opacity) | |||
@mixin delay($time) | |||
transition-delay: $time | |||
-webkit-transition-delay: $time | |||
.t1 | |||
@include vendor(transition, all 0.1s ease-out) | |||
.t2 | |||
@include vendor(transition, all 0.2s ease-out) | |||
.t3 | |||
@include vendor(transition, all 0.3s ease-out) |
@@ -0,0 +1,12 @@ | |||
/* | |||
Partial to contain all globally-applicable variables. | |||
*/ | |||
$baseColor1: #A31F34 | |||
$baseColor2: #8A8B8C | |||
$baseColor3: #C2C0BF | |||
$lightGray: #F1F1F1 | |||
$lightBlue: #67A0FD | |||
$blue: #3177EB |
@@ -0,0 +1,139 @@ | |||
/* | |||
Stylesheet for `templates/about.html` | |||
*/ | |||
@import mixins | |||
@import variables | |||
$centered-section-min-width: 500px | |||
div.bg | |||
$img-height: 650px | |||
position: fixed | |||
width: 100% | |||
left: 0 | |||
z-index: -1 | |||
&#img-1 | |||
background: url(../img/about/bg1.png) no-repeat | |||
background-size: cover | |||
height: 600px | |||
top: -300px | |||
&#img-2 | |||
background: url(../img/about/bg2.png) no-repeat | |||
background-size: cover | |||
height: $img-height + 300 | |||
top: 1150px | |||
&#img-3 | |||
background: url(../img/about/bg3.png) no-repeat | |||
background-size: cover | |||
height: $img-height + 300 | |||
top: 2050px | |||
&#img-4 | |||
background: url(../img/about/bg4.png) no-repeat | |||
background-size: cover | |||
height: $img-height + 400 | |||
top: 3200px | |||
div.section | |||
background-color: white | |||
border: 1px solid $baseColor2 | |||
margin-bottom: 200px | |||
margin-top: 300px | |||
padding-bottom: 80px | |||
padding-top: 20px | |||
overflow: hidden | |||
&#top | |||
margin-top: 0px | |||
div#wrap | |||
width: 100% | |||
position: relative | |||
padding-top: 56.782% // aspect ration, 9 / 16 | |||
iframe#vimeo | |||
border: 1px solid $baseColor3 | |||
margin-top: 40px | |||
position: absolute | |||
top: 0 | |||
left: 0 | |||
height: none | |||
width: none | |||
div.centered | |||
font-size: 110% | |||
line-height: 150% | |||
margin-left: auto | |||
margin-right: auto | |||
min-width: 500px | |||
width: 65% | |||
&#how | |||
b | |||
font-family: monospace | |||
font-size: 110% | |||
ul | |||
list-style: none | |||
span | |||
color: $baseColor1 | |||
font-weight: bold | |||
h1 | |||
color: $baseColor1 | |||
span | |||
color: $baseColor2 | |||
a | |||
color: #727070 | |||
font-style: italic | |||
text-decoration: none | |||
&:hover | |||
@extend .t3 | |||
color: #575757 | |||
span#title | |||
color: $baseColor1 | |||
font-weight: bold | |||
div.person | |||
font-size: 80% | |||
overflow: hidden | |||
&#top | |||
margin-top: 40px | |||
>div | |||
$image-min-width: 100px | |||
display: inline-block | |||
height: 100% | |||
margin-bottom: 40px | |||
vertical-align: top | |||
&.photo | |||
margin-right: 40px | |||
width: $image-min-width | |||
img | |||
display: block | |||
height: $image-min-width | |||
width: $image-min-width | |||
&.bio | |||
min-width: $centered-section-min-width - $image-min-width - 50px | |||
width: 70% | |||
h1 | |||
font-size: 130% | |||
margin: 0 |
@@ -0,0 +1,63 @@ | |||
@import mixins | |||
@import variables | |||
@import logo | |||
h1 | |||
color: $baseColor1 | |||
span | |||
color: $baseColor2 | |||
h2, h3 | |||
color: $baseColor2 * 0.8 | |||
p | |||
line-height: 1.8em | |||
ul | |||
list-style: none | |||
margin-bottom: 2% | |||
li | |||
margin-bottom: 2% | |||
a | |||
color: #727070 | |||
font-style: italic | |||
text-decoration: none | |||
&:hover | |||
@extend .t3 | |||
color: #575757 | |||
span | |||
&.code | |||
background-color: $baseColor3 * 1.2 | |||
font-family: monospace | |||
padding: 5px | |||
&.string | |||
color: $baseColor1 | |||
font-family: monospace | |||
font-size: 1.1em | |||
&.title | |||
color: $baseColor1 | |||
font-weight: bold | |||
table.code-example | |||
border-collapse: collapse | |||
width: 100% !important | |||
td.linenos | |||
border: 1px solid $baseColor2 | |||
padding-left: 10px | |||
width: 20px | |||
td.code | |||
padding-left: 10px | |||
li#sec3 span#gasp | |||
color: $baseColor2 * 0.8 | |||
font-style: italic |
@@ -0,0 +1,19 @@ | |||
@import variables | |||
div#message | |||
color: $baseColor1 | |||
font-family: monospace | |||
font-size: 700% | |||
font-weight: normal | |||
margin-top: 8% | |||
text-align: center | |||
span | |||
&.light | |||
color: $baseColor3 | |||
&.dark | |||
color: $baseColor2 | |||
&.red | |||
color: $baseColor1 |
@@ -0,0 +1,443 @@ | |||
/* | |||
Stylesheet for `templates/index.html`. | |||
*/ | |||
@import mixins | |||
@import variables | |||
@import logo | |||
$minSearchFieldsWidth: 490px | |||
$codeWidth: 700px | |||
$hiddenInfoWidth: 300px | |||
.ui-datepicker | |||
font-size: 70% | |||
.ui-autocomplete | |||
max-height: 30% | |||
overflow-x: hidden | |||
overflow-y: scroll | |||
padding: 0px | |||
>li.ui-menu-item a.ui-state-focus | |||
@include vendor(transition, background-color 0.3s ease-out) | |||
div#body | |||
@extend .t3 | |||
&.faded | |||
@include opaque(0.8) | |||
div#hotkey-help | |||
$width: 40% | |||
background-color: white | |||
border: 1px solid $baseColor3 | |||
left: 50% - $width / 2 | |||
min-width: 400px | |||
padding: 35px | |||
position: fixed | |||
top: 30% | |||
width: $width | |||
z-index: 200 | |||
&.hidden | |||
display: none | |||
div | |||
border-bottom: 1px solid $baseColor2 | |||
color: $baseColor1 | |||
font-size: 130% | |||
padding-bottom: 8px | |||
text-align: center | |||
ul | |||
list-style: none | |||
margin-left: auto | |||
margin-right: auto | |||
position: relative | |||
width: 300px | |||
li | |||
margin-bottom: 4px | |||
span.hotkey | |||
color: $baseColor1 | |||
font-family: monospace | |||
font-size: 130% | |||
font-weight: bold | |||
span.seperator | |||
color: $baseColor2 | |||
div#search-field | |||
@extend .t2 | |||
bottom: 0 | |||
height: 50% | |||
left: 0 | |||
margin: auto | |||
margin-top: 15% | |||
max-height: 100px | |||
right: 0 | |||
position: absolute | |||
z-index: 2 | |||
top: 0 | |||
width: 40% | |||
form#search-bar | |||
min-width: $minSearchFieldsWidth | |||
input[type="text"], button | |||
@extend .t3 | |||
@include vendor(box-sizing, border-box) | |||
border: 1px solid $baseColor2 | |||
font-size: 110% | |||
margin-bottom: 0px | |||
padding: 6px | |||
input[type="text"]#query | |||
width: 100% | |||
&:hover | |||
border-color: $baseColor1 | |||
button#advanced-search | |||
background-color: white | |||
border: none | |||
color: $baseColor2 | |||
font-size: 1.1em | |||
font-style: italic | |||
&:hover | |||
color: $baseColor1 | |||
cursor: pointer | |||
&.clicked | |||
color: $baseColor1 | |||
&:focus | |||
outline: 0 | |||
&.partly-visible | |||
margin-top: 0% | |||
position: absolute | |||
width: 100% | |||
#logo | |||
position: absolute | |||
top: -1% | |||
left: 1% | |||
span | |||
font-size: 50% | |||
form#search-bar | |||
padding-top: 3% | |||
margin-left: auto | |||
margin-right: auto | |||
min-width: 800px | |||
width: 60% | |||
input | |||
@extend .t3 | |||
&#query | |||
width: 80% | |||
&:hover | |||
border: 1px solid $baseColor1 | |||
button#advanced-search | |||
margin-left: 30px | |||
div#advanced-search | |||
background-color: white | |||
border: 1px solid $baseColor3 | |||
display: none | |||
font-size: 96% | |||
height: 400px | |||
min-width: $minSearchFieldsWidth | |||
padding-top: 0px | |||
overflow-x: auto | |||
overflow-y: hidden | |||
#heading | |||
color: $baseColor2 | |||
display: block | |||
font-size: 120% | |||
padding-left: 1% | |||
padding-top: 1% | |||
width: 100% | |||
div | |||
display: inline-block | |||
font-size: 110% | |||
&#col1 | |||
width: 25% | |||
&#col2 | |||
width: 75% | |||
button | |||
border: none | |||
color: white | |||
float: right | |||
font-size: 80% | |||
font-weight: bold | |||
margin-right: 1% | |||
padding-left: 4% | |||
padding-right: 4% | |||
&:hover | |||
cursor: pointer | |||
&#add-group | |||
background-color: #7FAFFC | |||
&:hover | |||
background-color: #609AF8 | |||
&#remove-group | |||
background-color: #E74C4C | |||
&:hover | |||
background-color: #D63636 | |||
&#submit | |||
background-color: #4ee76c | |||
&:hover | |||
background-color: #0FDD38 | |||
>div | |||
@include vendor(box-sizing, border-box) | |||
display: inline-block | |||
float: left | |||
#sidebar | |||
padding-left: 1% | |||
width: 25% | |||
>ul | |||
list-style: none | |||
padding-left: 0 | |||
margin-bottom: 8% | |||
margin-top: 2% | |||
li | |||
margin-bottom: 2% | |||
label | |||
user-select: none | |||
div | |||
@extend .t3 | |||
background-color: $lightGray | |||
border: none | |||
padding: 3% | |||
width: 85% | |||
&:hover, &.selectedInputField | |||
@extend .t3 | |||
background-color: $baseColor2 | |||
color: white | |||
cursor: pointer | |||
width: 90% | |||
input[type="checkbox"] | |||
display: none | |||
&:checked + label > div | |||
@extend .selectedInputField | |||
background-color: $baseColor1 | |||
color: white | |||
width: 90% | |||
#search-groups | |||
margin-top: 1% | |||
max-height: 87% | |||
overflow-y: auto | |||
width: 75% | |||
.search-group | |||
@include vendor(transition, all 0.6s ease-out) | |||
background-color: $lightGray | |||
padding: 1% | |||
margin-bottom: 2% | |||
width: 97% | |||
>div | |||
margin-bottom: 0.7% | |||
>div.name | |||
display: inline-block | |||
font-size: 90% | |||
width: 20% | |||
>input[type=text] | |||
display: inline-block | |||
padding: 2px | |||
width: 60% | |||
>input[type=checkbox] | |||
margin-left: 2% | |||
&:checked + span | |||
@extend .t2 | |||
color: green | |||
font-weight: bold | |||
&:hover | |||
cursor: checkbox | |||
span.regex | |||
font-size: 80% | |||
&:hover | |||
cursor: pointer | |||
background-color: #d6d6d6 | |||
&#selected | |||
background-color: #CACACA | |||
div#results | |||
margin: 3% auto 0 auto | |||
margin-left: auto | |||
margin-right: auto | |||
width: 80% | |||
a | |||
@extend .t3 | |||
text-decoration: none | |||
&:hover | |||
color: $baseColor1 | |||
div#error | |||
font-size: 170% | |||
margin-top: 22% | |||
text-align: center | |||
span | |||
margin-right: 10px | |||
font-size: 150% | |||
&#s1 | |||
color: $baseColor1 | |||
&#s2 | |||
color: $baseColor2 | |||
&.disable-hover | |||
pointer-events: none | |||
div.result | |||
@extend .t3 | |||
height: 200px | |||
margin-bottom: 100% | |||
pointer-events: auto | |||
table | |||
border-collapse: collapse | |||
height: inherit | |||
tr | |||
@extend .t3 | |||
@include opaque(0.8) | |||
height: inherit | |||
&.cascade | |||
@extend .t1 | |||
margin-bottom: 15% | |||
&.display-all | |||
table tr | |||
@include opaque(1.0) | |||
#tablecontainer | |||
max-width: 70% | |||
overflow: auto !important | |||
div#display-info | |||
font-size: 1.3em | |||
padding: 5px 0px 5px 5px | |||
width: 100% | |||
#title | |||
margin-right: 10px | |||
#site | |||
text-transform: capitalize | |||
td#code | |||
@include vendor(transition, width 0.2s ease-in-out) | |||
width: $codeWidth | |||
max-width: $codeWidth | |||
height: inherit | |||
padding: 0px | |||
#tablecontainer | |||
width: 100% | |||
height: inherit | |||
overflow: hidden | |||
background-color: #49483e | |||
position: relative | |||
z-index: 1 | |||
table | |||
border-collapse: collapse | |||
font-family: monospace | |||
.linenos | |||
padding-left: 8px | |||
pre | |||
margin-top: 5px | |||
.code pre | |||
margin-top: 5px | |||
.hll | |||
background: #5B5A51 | |||
div#hidden-info | |||
width: $hiddenInfoWidth | |||
margin-left: -$hiddenInfoWidth | |||
height: 100% | |||
padding-top: 40px | |||
font-size: 1.2em | |||
line-height: 1.5em | |||
position: relative | |||
z-index: 0 | |||
@include vendor(transition, margin-left 0.2s ease-in-out) | |||
.display-all & | |||
margin-left: -$hiddenInfoWidth / 1.5 | |||
padding-left: 20px | |||
span | |||
color: $baseColor1 | |||
font-family: monospace | |||
font-size: 1.1em | |||
// float: right | |||
div | |||
display: block | |||
#authors | |||
a | |||
font-family: monospace |
@@ -2,6 +2,53 @@ | |||
Global project stylesheet. | |||
*/ | |||
// placeholder | |||
p | |||
font-size: 1.5em | |||
@import mixins | |||
@import variables | |||
html, body | |||
height: 100% | |||
margin: 0 | |||
padding: 0 | |||
font-family: sans-serif | |||
div#container | |||
min-height: 100% | |||
position: relative | |||
div#header | |||
padding: 10px | |||
div#body | |||
height: 100% | |||
padding-bottom: 110px | |||
padding-top: 4% | |||
div#center | |||
margin-left: auto | |||
margin-right: auto | |||
width: 75% | |||
div#footer | |||
background-color: $baseColor1 | |||
bottom: 0 | |||
height: 30px | |||
padding-bottom: 5px | |||
padding-top: 15px | |||
position: fixed | |||
text-align: center | |||
width: 100% | |||
z-index: 100 | |||
* | |||
color: white | |||
a | |||
@extend .t3 | |||
font-size: 1.2em | |||
margin-left: 5% | |||
margin-right: 5% | |||
text-decoration: none | |||
&:hover | |||
text-decoration: underline |
@@ -0,0 +1,16 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |||
<url> | |||
<loc>http://bitshift.it/</loc> | |||
<changefreq>monthly</changefreq> | |||
</url> | |||
<url> | |||
<loc>http://bitshift.it/about</loc> | |||
<changefreq>monthly</changefreq> | |||
</url> | |||
<url> | |||
<loc>http://bitshift.it/docs</loc> | |||
<changefreq>monthly</changefreq> | |||
</url> | |||
</urlset> |
@@ -0,0 +1,92 @@ | |||
= extends "layout.html" | |||
= block title | |||
about | |||
= endblock | |||
= block head | |||
{{ assets.tag("lib/jquery.min.js") }} | |||
{{ assets.tag("main.css") }} | |||
{{ assets.tag("about.css") }} | |||
= endblock | |||
= block after_body | |||
<div id="img-1" class="bg" speed="-1.25"></div> | |||
<div id="img-2" class="bg" speed="1.4"></div> | |||
<div id="img-3" class="bg" speed="1.4"></div> | |||
<div id="img-4" class="bg" speed="1.4"></div> | |||
<div id="top" class="section"> | |||
<div class="centered"> | |||
<h1><span>»</span> What</h1> | |||
<span id="title">bitshift</span> is an <a href="https://github.com/earwig/bitshift">open-source</a> | |||
online source-code search engine, developed by programmers, for programmers. The engine currently aggregates | |||
publicly-available code from two online frameworks – <a href="https://github.com/">GitHub</a> and <a | |||
href="https://bitbucket.org/">Bitbucket</a> – but has the necessary infrastructure to quickly incorporate | |||
others, like <a href="http://stackoverflow.com/">StackOverflow</a> and | |||
<a href="https://gitorious.org/">Gitorious</a>. <span id="title">bitshift</span> supports a robust query | |||
language, which allows users to search for specific languages, files, dates of creation and last modifcation, | |||
and symbols (function, class, and variable names), amongst other attributes. | |||
Watch our introductory video: | |||
<div id="wrap"> | |||
<iframe id="vimeo" src="//player.vimeo.com/video/98697078" width="100%" height="100%" frameborder="0" | |||
webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="section"> | |||
<div id="how" class="centered"> | |||
<h1><span>»</span> How</h1> | |||
<span id="title">bitshift</span> has an extensive back-end, roughly divided into three sections: | |||
<ul> | |||
<li><span>indexer</span> : finds and downloads code from online frameworks</li> | |||
<li><span>parser</span> : parses newly crawled code, identifying its symbols</li> | |||
<li><span>database</span> : interprets and compiles user searches into database queries</li> | |||
</ul> | |||
The engine was developed over the span of four months, and is primarily implemented in <b>Python</b>, but has | |||
parsers in <b>Ruby</b>, <b>Java</b>, and a number of other languages. | |||
</div> | |||
</div> | |||
<div class="section"> | |||
<div class="centered"> | |||
<h1><span>»</span> Who</h1> | |||
<span id="title">bitshift</span> was developed by three seniors from New York City's Stuyvesant High School. | |||
<div id="top" class="person"> | |||
<div class="photo"> | |||
<a href="https://github.com/breuckelen"><img src="img/about/bio1.jpg" alt="Benjamin Attal's photo."></a> | |||
</div> | |||
<div class="bio"> | |||
<h1><a href="https://github.com/breuckelen">Benjamin Attal</a></h1> | |||
Benjamin Attal hacked together <span id="title">bitshift</span>'s parsers and is working on | |||
data-visualization for bitshift's statistics page. He is a software developer and entrepreneur who enjoys | |||
listening to and playing country music, as well as working with smart people. | |||
</div> | |||
</div> | |||
<div class="person"> | |||
<div class="photo"> | |||
<a href="https://github.com/earwig"><img src="img/about/bio2.jpg" alt="Ben Kurtovic's photo."></a> | |||
</div> | |||
<div class="bio"> | |||
<h1><a href="https://github.com/earwig">Ben Kurtovic</a></h1> Ben Kurtovic designed <span | |||
id="title">bitshift</span>’s database and acts as its server admin. In his free time, he edits Wikipedia | |||
and invents new ways of confusing the hell out of people through source code obfuscation. | |||
</div> | |||
</div> | |||
<div class="person"> | |||
<div class="photo"> | |||
<a href="https://github.com/sevko"><img src="img/about/bio3.jpg" alt="Severyn Kozak's photo."></a> | |||
</div> | |||
<div class="bio"> | |||
<h1><a href="https://github.com/sevko">Severyn Kozak</a></h1> | |||
Severyn developed <span id="title">bitshift</span>'s crawlers and its front-end. He loves skiing, mathematics | |||
that he doesn't understand, and the art of good software development. | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
{{ assets.tag("about.js") }} | |||
= endblock |
@@ -0,0 +1,282 @@ | |||
= extends "layout.html" | |||
= block title | |||
docs | |||
= endblock | |||
= block head | |||
{{ assets.tag("lib/highlight.css") }} | |||
{{ assets.tag("docs.css") }} | |||
= endblock | |||
= block body | |||
<a id="logo" href="/"> | |||
<div id="logo"> | |||
<span id="logo-bit">bit</span | |||
><span id="logo-angle">«</span | |||
><span id="logo-shift">shift</span> | |||
</div> | |||
</a> | |||
<ul> | |||
<li> | |||
<h1><span>»</span> Usage</h1> | |||
<p> | |||
<span class="title">bitshift</span> is a search-engine optimized for | |||
source code: beyond supporting searches with the full range of ASCII | |||
symbols, the engine <em>understands</em> code, allowing users to query | |||
for metadata, like time of creation/last modification, programming | |||
language, and even symbols like function names and variables. Basic use | |||
boils down to general and advanced searches. | |||
<ul> | |||
<li> | |||
<h2>general search</h2> | |||
<p> | |||
To perform a "general search," simply place your cursor in the | |||
search bar on our home page and begin entering text; when you | |||
stop typing for a short period of time, we'll automatically | |||
execute the query for you. As you scroll down the page, new | |||
codelets, or results, will be seamlessly downloaded from our | |||
server and appended to the end. | |||
</p> | |||
</li> | |||
<li> | |||
<h2>advanced search</h2> | |||
<p> | |||
General searches, though, are limited. To allow users to make the | |||
best of our engine, we created an advanced search form that | |||
allows the creation of complex queries with the following | |||
specifiers: | |||
<ul> | |||
<li> | |||
<h3>search fields</h3> | |||
<ul id="search-fields"> | |||
<li> | |||
<span class="code">languages</span> : The programming | |||
languages to search for. | |||
</li> | |||
<li> | |||
<span class="code">authors</span> : Search for code | |||
written/modified by a specific person. | |||
</li> | |||
<li> | |||
<span class="code">date last modified</span> : Search for | |||
code last modified on a specific date | |||
(<span class="code">mm/dd/yy</span> format). | |||
</li> | |||
<li> | |||
<span class="code">date created</span> : Search for code | |||
created on a specific date | |||
(<span class="code">mm/dd/yy</span> format). | |||
</li> | |||
<li> | |||
<span class="code">symbols</span> : Search for specific | |||
symbols. | |||
</li> | |||
<li> | |||
<span class="code">functions</span> : Search for | |||
functions with specific names. | |||
</li> | |||
<li> | |||
<span class="code">classes</span> : Search for classes | |||
with specific names. | |||
</li> | |||
<li> | |||
<span class="code">variables</span> : Search for | |||
variables with specific names. | |||
</li> | |||
</ul> | |||
<p> | |||
Each of the search fields allows for numerous values; just | |||
separate them with spaces. If you'd like to search for a | |||
multi-word, space-delimited string, on the other hand, | |||
enclose it in double quotes. | |||
A query for <span class="code">foo bar</span> will search | |||
for occurrences of both <span id="string">"foo"</span> and | |||
<span class="string">"bar"</span>, while | |||
<span class="code">"foo bar"</span> will search for | |||
occurrences of <span class="string">"foo bar"</span>. | |||
</p> | |||
</li> | |||
<li> | |||
<h3>search groups</h3> | |||
<p> | |||
Search groups facilitate even more robust queries: they're | |||
like a bunch of individual searches grouped into one. A | |||
user searching for occurrenes of symbol | |||
<span class="string">"curses"</span> in the language | |||
<span class="string">"Python"</span>, and | |||
<span class="string">"ncurses"</span> in | |||
<span id="string">"C"</span>, won't get away with: | |||
<span class="code">"symbols:curses ncurses"</span> and | |||
<span class="code">"languages:Python C"</span>. The engine | |||
might return results <span id="string">"curses"</span> in | |||
<span class="string">"C"</span> and | |||
<span class="string">"ncurses"</span> in | |||
<span class="string">"Python"</span>! | |||
To work around that, you can use two search groups: one for | |||
<span class="string">"curses"</span> in | |||
<span class="string">"Python"</span>, and another for | |||
<span class="string">"curses"</span> in | |||
<span id="string">"C"</span>. | |||
<span class="title">bitshift</span> will return the union | |||
of both sets of search results. | |||
</p> | |||
</li> | |||
</ul> | |||
</p> | |||
</li> | |||
</ul> | |||
</p> | |||
</li> | |||
<li> | |||
<h1><span>»</span> API</h1> | |||
<p> | |||
<span class="title">bitshift</span> provides an API through GET | |||
requests to | |||
<a href="http://bitshift.it/search.json"><span class="code">/search.json</span></a>. | |||
</p> | |||
<h2>parameters</h2> | |||
<ul> | |||
<li> | |||
<span class="code">q</span> : The search query, as entered into the | |||
search bar. | |||
</li> | |||
<li> | |||
<span class="code">p</span> : The result page to return. Defaults to | |||
<span class="code">1</span>. Each page contains ten results, so this | |||
effectively offsets the search by | |||
<span class="code">10 * (p - 1)</span> codelets. | |||
</li> | |||
<li> | |||
<span class="code">hl</span> : Whether to return code as | |||
<a href="http://pygments.org/">pygments</a>-highlighted HTML or as | |||
plain source. Defaults to <span class="code">false</span>. | |||
</li> | |||
</ul> | |||
<h2>output</h2> | |||
<p> | |||
<span class="code">/search.json</span> returns a JSON-encoded | |||
dictionary. If there was an error, it will contain a single key, | |||
<span class="code">"error"</span>, whose value will contain a | |||
human-readable description of the error. Otherwise, there will be two | |||
keys: <span class="code">"count"</span>, storing the number of results, | |||
and <span class="code">"results"</span>, storing a list of codelets. | |||
Each codelet is a dictionary with the following key–value pairs: | |||
</p> | |||
<ul> | |||
<li> | |||
<span class="code">name</span> : The name of the codelet. | |||
</li> | |||
<li> | |||
<span class="code">code</span> : The actual source code if | |||
<span class="code">hl</span> was not given or was | |||
<span class="code">false</span>; HTML code otherwise. | |||
</li> | |||
<li> | |||
<span class="code">lang</span> : The language of the code. | |||
</li> | |||
<li> | |||
<span class="code">authors</span> : A list of authors. Each author is | |||
a list of two items: their name, and URL (or | |||
<span class="code">null</span> if none is known). | |||
</li> | |||
<li> | |||
<span class="code">url</span> : The URL of the page where the code | |||
was crawled from. | |||
</li> | |||
<li> | |||
<span class="code">created</span> : The date the code was created, as | |||
a | |||
<a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>-formatted | |||
string (e.g. <span class="code">"2014-06-01T12:41:28"</span>). | |||
</li> | |||
<li> | |||
<span class="code">modified</span> : The date the code was last | |||
modified, as a | |||
<a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>-formatted | |||
string (e.g. <span class="code">"2014-06-01T12:41:28"</span>). | |||
</li> | |||
<li> | |||
<span class="code">origin</span> : A list of two items: the | |||
originating site's name (e.g. <span class="code">"GitHub"</span>) and | |||
URL (e.g. <span class="code">"https://github.com"</span>). | |||
</li> | |||
</ul> | |||
<h2>example</h2> | |||
<p> | |||
The following example Python 2 code searches for a given Python | |||
function definition and prints the URL of the first result: | |||
</p> | |||
<table class="highlighttable code-example"> | |||
<tr> | |||
<td class="linenos"> | |||
<div class="linenodiv"> | |||
<pre> 1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19</pre> | |||
</div> | |||
</td> | |||
<td class="code"> | |||
<div class="highlight"> | |||
<pre><span class="c">#!/usr/bin/env python</span> | |||
<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span> | |||
<span class="kn">from</span> <span class="nn">sys</span> <span class="kn">import</span> <span class="n">argv</span> | |||
<span class="kn">from</span> <span class="nn">urllib</span> <span class="kn">import</span> <span class="n">urlencode</span> | |||
<span class="kn">from</span> <span class="nn">urllib2</span> <span class="kn">import</span> <span class="n">urlopen</span> | |||
<span class="k">def</span> <span class="nf">get_function</span><span class="p">(</span><span class="n">name</span><span class="p">):</span> | |||
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s">"q"</span><span class="p">:</span> <span class="s">"lang:python and func:def:</span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">name</span><span class="p">}</span> | |||
<span class="n">request</span> <span class="o">=</span> <span class="n">urlopen</span><span class="p">(</span><span class="s">"http://bitshift.it/search.json?"</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">params</span><span class="p">))</span> | |||
<span class="n">res</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">request</span><span class="o">.</span><span class="n">read</span><span class="p">())[</span><span class="s">"results"</span><span class="p">]</span> | |||
<span class="k">if</span> <span class="n">res</span><span class="p">:</span> | |||
<span class="k">print</span> <span class="s">"</span><span class="si">%s</span><span class="s">: </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">res</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s">"url"</span><span class="p">])</span> | |||
<span class="k">else</span><span class="p">:</span> | |||
<span class="k">print</span> <span class="s">"</span><span class="si">%s</span><span class="s"> not found."</span> <span class="o">%</span> <span class="n">name</span> | |||
<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">"__main__"</span><span class="p">:</span> | |||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">argv</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> | |||
<span class="n">get_function</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span></pre> | |||
</div> | |||
</td> | |||
</tr> | |||
</table> | |||
</li> | |||
<li id="sec3"> | |||
<h1><span>»</span> Get Involved</h1> | |||
<p> | |||
<span class="title">bitshift</span> is <span id="gasp">(gasp)</span> | |||
open-source! The project is hosted on | |||
<a href="https://github.com/earwig/bitshift">GitHub</a>; feel free to | |||
file an issue or submit a pull request. | |||
</p> | |||
</li> | |||
</ul> | |||
= endblock |
@@ -0,0 +1,26 @@ | |||
= extends "layout.html" | |||
= block title | |||
404 | |||
= endblock | |||
= block head | |||
{{ assets.tag("error404.css") }} | |||
= endblock | |||
= block body | |||
<div id="message"> | |||
{{ assets.syntax_highlight([ | |||
'puts("404");', | |||
'printf("%d\n", 404);', | |||
'puts 404', | |||
'System.out.println("404")', | |||
'print 404', | |||
'console.log("404")', | |||
'echo 404', | |||
'std::cout << "404\\n"', | |||
'(println "404")', | |||
'say "404!";' | |||
] | random) | safe }} | |||
</div> | |||
= endblock |
@@ -1,9 +1,121 @@ | |||
= extends "layout.html" | |||
= block title | |||
Home | |||
home | |||
= endblock | |||
= block head | |||
{{ assets.tag("lib/jqueryui.custom.min.css") }} | |||
{{ assets.tag("lib/jquery.min.js") }} | |||
{{ assets.tag("lib/jquery-ui.min.js") }} | |||
{{ assets.tag("lib/highlight.css") }} | |||
{{ assets.tag("index.css") }} | |||
<script> | |||
AUTOCOMPLETE_LANGUAGES = {{ autocomplete_languages | safe }}; | |||
</script> | |||
= endblock | |||
= block body | |||
<p>Hello, world.</p> | |||
<div id="search-field"> | |||
<a id="logo" href="/"> | |||
<div id="logo"> | |||
<span id="logo-bit">bit</span | |||
><span id="logo-angle">«</span | |||
><span id="logo-shift">shift</span> | |||
</div> | |||
</a> | |||
<form id="search-bar"> | |||
<input id="query" type="text" name="query" | |||
><button id="advanced-search" title="advanced search" type="button"> | |||
Adv. Search | |||
</button> | |||
<div id="advanced-search"> | |||
<div id="heading"> | |||
<div id="col1">Fields</div | |||
><div id="col2"> | |||
Search groups | |||
<button id="submit"> | |||
<div>Search</div> | |||
</button> | |||
<button id="add-group"> | |||
<div><span>+</span> Add</div> | |||
</button> | |||
<button id="remove-group"> | |||
<div><span>-</span> Remove</div> | |||
</button> | |||
</div> | |||
</div> | |||
<div id="sidebar"> | |||
<ul> | |||
<li> | |||
<input type="checkbox" id="language" checked="true"> | |||
<label for="language"><div>languages</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="author"> | |||
<label for="author"><div>authors</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="date-last-modified"> | |||
<label for="date-last-modified"><div>date last modified</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="date-created"> | |||
<label for="date-created"><div>date created</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="symbol"> | |||
<label for="symbol"><div>symbols</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="function"> | |||
<label for="function"><div>functions</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="class"> | |||
<label for="class"><div>classes</div></label> | |||
</li> | |||
<li> | |||
<input type="checkbox" id="variable"> | |||
<label for="variable"><div>variables</div></label> | |||
</li> | |||
</ul> | |||
</div> | |||
<div id="search-groups"> | |||
<div class="search-group" id="selected"> | |||
<div id="language"> | |||
<div class="name">languages</div | |||
><input id="autocomplete" class="language" name="language" type="text" | |||
><input type="checkbox" name="regex" | |||
><span class="regex">Regex</span> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
</form> | |||
</div> | |||
<div id="results"></div> | |||
{{ assets.tag("index.js") }} | |||
{{ assets.tag("index.advanced-search-form.js") }} | |||
= endblock | |||
= block after_body | |||
<div id="hotkey-help" class="hidden"> | |||
<div>Hotkeys</div> | |||
<ul> | |||
<li><span class="hotkey">k</span> <span class="seperator">:</span> move window up to the previous result</li> | |||
<li><span class="hotkey">j</span> <span class="seperator">:</span> move window down to the next result</li> | |||
<li><span class="hotkey">h</span> <span class="seperator">:</span> move to the previous symbol match</li> | |||
<li><span class="hotkey">l</span> <span class="seperator">:</span> move to the next symbol match</li> | |||
<li><span class="hotkey">?</span> <span class="seperator">:</span> toggle help</li> | |||
</ul> | |||
</div> | |||
= endblock |
@@ -4,24 +4,44 @@ | |||
<html> | |||
<head> | |||
<title> | |||
= block title | |||
= endblock | |||
bitshift « | |||
= filter lower | |||
= block title | |||
= endblock | |||
= endfilter | |||
</title> | |||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/> | |||
<meta name="description" content="bitshift is an online code snippet | |||
exchange."/> | |||
<meta name="keywords" content="code snippet exchange golf programming | |||
software community"/> | |||
<meta name="description" content="bitshift is a source code search engine."/> | |||
<meta name="keywords" content="source code language search engine"/> | |||
<meta name="author" content="Benjamin Attal Ben Kurtovic Severyn Kozak"/> | |||
{{ assets.tag("main.css") }} | |||
{{ assets.tag("main.js") }} | |||
= block head | |||
= endblock | |||
</head> | |||
<body> | |||
= block body | |||
= endblock | |||
<div id="container"> | |||
<div id="header"> | |||
</div> | |||
<div id="body"> | |||
<div id="center"> | |||
= block body | |||
= endblock | |||
</div> | |||
</div> | |||
= block after_body | |||
= endblock | |||
<div id="footer"> | |||
<a href="/">home</a> | |||
<a href="/about">about</a> | |||
<a href="/docs">docs</a> | |||
</div> | |||
</div> | |||
</body> | |||
</html> |
@@ -0,0 +1,19 @@ | |||
#!/usr/bin/env python | |||
from json import loads | |||
from sys import argv | |||
from urllib import urlencode | |||
from urllib2 import urlopen | |||
def get_function(name): | |||
params = {"q": "lang:python and func:def:%s" % name} | |||
request = urlopen("http://bitshift.it/search.json?" + urlencode(params)) | |||
res = loads(request.read())["results"] | |||
if res: | |||
print "%s: %s" % (name, res[0]["url"]) | |||
else: | |||
print "%s not found." % name | |||
if __name__ == "__main__": | |||
if len(argv) == 2: | |||
get_function(argv[1]) |
@@ -0,0 +1,56 @@ | |||
import socket, sys, struct | |||
file_name = 'resources/<name>.c' | |||
server_socket_number = 5001 | |||
recv_size = 8192 | |||
if __name__ == '__main__': | |||
if len(sys.argv) == 1: | |||
print "Please input a parser to test." | |||
elif len(sys.argv) > 2: | |||
print "Too many arguments." | |||
else: | |||
if sys.argv[1] == 'c': | |||
pass | |||
elif sys.argv[1] == 'java': | |||
file_name = "resources/Matrix.java" | |||
server_socket_number = 5002 | |||
elif sys.argv[1] == 'ruby': | |||
file_name = "resources/parser.rb" | |||
server_socket_number = 5065 | |||
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |||
server_socket.connect(("localhost", server_socket_number)) | |||
with open(file_name, "r") as source_file: | |||
source = source_file.read() | |||
server_socket.send("%d\n%s" % (len(source), source)); | |||
total_data = []; size_data = cur_data = '' | |||
total_size = 0; size = sys.maxint | |||
while total_size < size: | |||
cur_data = server_socket.recv(recv_size) | |||
if not total_data: | |||
if len(size_data) > 4: | |||
size_data += cur_data | |||
size = struct.unpack('>i', size_data[:4])[0] | |||
recv_size = size | |||
if recv_size > sys.maxint: recv_size = sys.maxint | |||
total_data.append(size_data[4:]) | |||
else: | |||
size_data += cur_data | |||
else: | |||
total_data.append(cur_data) | |||
total_size = sum([len(s) for s in total_data]) | |||
server_socket.close() | |||
print ''.join(total_data); |
@@ -0,0 +1,218 @@ | |||
package battlechap; | |||
import java.io.PrintStream; | |||
public class Matrix { | |||
private Object[][] _datmatrix; | |||
public Matrix(int paramInt){ | |||
this._datmatrix = new Object[paramInt][paramInt]; | |||
} | |||
public int size() { | |||
return this._datmatrix.length; | |||
} | |||
public Object get(int paramInt1, int paramInt2) { | |||
return this._datmatrix[paramInt1][paramInt2]; | |||
} | |||
public boolean isEmpty(int paramInt1, int paramInt2) { | |||
return this._datmatrix[paramInt1][paramInt2] == null; | |||
} | |||
public boolean equals(Object paramObject) { | |||
boolean bool = true; | |||
if ((paramObject instanceof Matrix)) { | |||
Matrix localMatrix = (Matrix)paramObject; | |||
if (localMatrix.size() == size()) { | |||
for (int i = 0; i < size(); i++) { | |||
for (int j = 0; j < size(); j++) { | |||
if (!localMatrix.get(i, j).equals(get(i, j))) { | |||
bool = false; | |||
break; | |||
} | |||
} | |||
if (!bool) | |||
break; | |||
} | |||
} | |||
else | |||
bool = false; | |||
} | |||
else | |||
{ | |||
bool = false; | |||
} | |||
return bool; | |||
} | |||
public Object set(int paramInt1, int paramInt2, Object paramObject) { | |||
Object localObject = this._datmatrix[paramInt1][paramInt2]; | |||
this._datmatrix[paramInt1][paramInt2] = paramObject; | |||
return localObject; | |||
} | |||
public void transpose() { | |||
int i = 0; | |||
for (int j = 0; j < size(); j++) { | |||
for (int k = i; k < size(); k++) { | |||
set(j, k, set(k, j, get(j, k))); | |||
} | |||
i++; | |||
} | |||
} | |||
public static void swapRows(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) { | |||
for (int i = 0; i < paramArrayOfObject[paramInt1].length; i++) { | |||
Object localObject = paramArrayOfObject[paramInt1][i]; | |||
paramArrayOfObject[paramInt1][i] = paramArrayOfObject[paramInt2][i]; | |||
paramArrayOfObject[paramInt2][i] = localObject; | |||
} | |||
} | |||
public static void swapCols(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) { | |||
for (int i = 0; i < paramArrayOfObject.length; i++) { | |||
Object localObject = paramArrayOfObject[i][paramInt1]; | |||
paramArrayOfObject[i][paramInt1] = paramArrayOfObject[i][paramInt2]; | |||
paramArrayOfObject[i][paramInt2] = localObject; | |||
} | |||
} | |||
public Object[] getRow(int paramInt) { | |||
Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length]; | |||
for (int i = 0; i < arrayOfObject.length; i++) { | |||
arrayOfObject[i] = this._datmatrix[paramInt][i]; | |||
} | |||
return arrayOfObject; | |||
} | |||
public Object[] getCol(int paramInt) { | |||
Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length]; | |||
for (int i = 0; i < arrayOfObject.length; i++) { | |||
arrayOfObject[i] = this._datmatrix[i][paramInt]; | |||
} | |||
return arrayOfObject; | |||
} | |||
public Object[] setRow(int paramInt, Object[] paramArrayOfObject) { | |||
Object[] arrayOfObject = getRow(paramInt); | |||
for (int i = 0; i < size(); i++) { | |||
set(paramInt, i, paramArrayOfObject[i]); | |||
} | |||
return arrayOfObject; | |||
} | |||
public Object[] setCol(int paramInt, Object[] paramArrayOfObject) { | |||
Object[] arrayOfObject = getCol(paramInt); | |||
for (int i = 0; i < size(); i++) { | |||
set(i, paramInt, paramArrayOfObject[i]); | |||
} | |||
return arrayOfObject; | |||
} | |||
public String toString() | |||
{ | |||
String str1 = ""; | |||
for (int i = 0; i < this._datmatrix.length; i++) { | |||
if (i < 9) | |||
str1 = str1 + (i + 1) + ": "; | |||
else | |||
str1 = str1 + (i + 1) + ":"; | |||
for (int j = 0; j < this._datmatrix[i].length; j++) { | |||
int k = (this._datmatrix[i][j] + "").length(); | |||
String str2 = " ".substring(k); | |||
str1 = str1 + this._datmatrix[i][j] + str2; | |||
} | |||
str1 = str1 + "\n"; | |||
} | |||
return str1; | |||
} | |||
public static void print(Object[][] paramArrayOfObject) { | |||
for (int i = 0; i < paramArrayOfObject.length; i++) { | |||
for (int j = 0; j < paramArrayOfObject[i].length; j++) { | |||
int k = (paramArrayOfObject[i][j] + "").length(); | |||
String str = " ".substring(k); | |||
System.out.print(paramArrayOfObject[i][j] + str); | |||
} | |||
System.out.print("\n"); | |||
} | |||
} | |||
public static void printArray(Object[] paramArrayOfObject) { | |||
for (int i = 0; i < paramArrayOfObject.length; i++) { | |||
int j = (paramArrayOfObject[i] + "").length(); | |||
String str = " ".substring(j); | |||
System.out.print(paramArrayOfObject[i] + str); | |||
} | |||
System.out.print("\n"); | |||
} | |||
public static void main(String[] paramArrayOfString) { | |||
Matrix localMatrix1 = new Matrix(5); | |||
Matrix localMatrix2 = new Matrix(5); | |||
for (int i = 0; i < localMatrix1.size(); i++) { | |||
for (int j = 0; j < localMatrix1.size(); j++) { | |||
Integer localInteger1 = new Integer((int)(Math.random() * 20.0D)); | |||
localMatrix1.set(i, j, localInteger1); | |||
localMatrix2.set(i, j, localInteger1); | |||
} | |||
} | |||
System.out.println("\nDemonstrating equals method (should be true)\t" + localMatrix2.equals(localMatrix1) + "\n"); | |||
System.out.println("Demonstrating get method\n" + localMatrix1.get(0, 0) + "\n"); | |||
System.out.println("Demonstrating is empty method\n" + localMatrix1.isEmpty(1, 0) + "\n"); | |||
System.out.println("Demonstrating size method \n" + localMatrix1.size() + "\n"); | |||
System.out.println("Demonstrating toString method\n" + localMatrix1 + "\n"); | |||
localMatrix1.transpose(); | |||
System.out.println("Blop has been transposed\n" + localMatrix1 + "\n"); | |||
Object[][] arrayOfObject = new Object[4][4]; | |||
for (int j = 0; j < arrayOfObject.length; j++) { | |||
for (int k = 0; k < arrayOfObject[j].length; k++) { | |||
Integer localInteger2 = new Integer((int)(Math.random() * 20.0D)); | |||
arrayOfObject[j][k] = localInteger2; | |||
} | |||
} | |||
System.out.println("\n\n**Swapping Rows Demo**"); | |||
print(arrayOfObject); | |||
System.out.println("\nRows 1 and 2 have been Swapped \n"); | |||
swapRows(1, 2, arrayOfObject); | |||
print(arrayOfObject); | |||
System.out.println("\n**Swapping Columns Demo**"); | |||
print(arrayOfObject); | |||
System.out.println("\n\nColumns 1 and 2 have been Swapped \n"); | |||
swapCols(1, 2, arrayOfObject); | |||
print(arrayOfObject); | |||
System.out.println("\n**Getting rows demo (from blop)**"); | |||
System.out.println(localMatrix1); | |||
System.out.println("\nGetting row 1\n"); | |||
printArray(localMatrix1.getRow(1)); | |||
System.out.println("\n**Getting cols demo (from blop)**"); | |||
System.out.println(localMatrix1); | |||
System.out.println("\nGetting col 1\n"); | |||
printArray(localMatrix1.getCol(1)); | |||
System.out.println("\n**Demonstrating set row method**"); | |||
System.out.println(localMatrix1); | |||
System.out.println("\nSwitching row 1 of blop to 1st column of blop\n"); | |||
localMatrix1.setRow(1, localMatrix1.getCol(1)); | |||
System.out.println(localMatrix1 + "\n"); | |||
System.out.println("\n**Demonstrating set col method**"); | |||
System.out.println(localMatrix1); | |||
System.out.println("\nSwitching col 1 of blop to 2nd row of blop\n"); | |||
localMatrix1.setCol(1, localMatrix1.getRow(2)); | |||
System.out.println(localMatrix1 + "\n"); | |||
} | |||
} | |||
@@ -0,0 +1,40 @@ | |||
""" | |||
Module to contain all the project's Flask server plumbing. | |||
""" | |||
from flask import Flask | |||
from flask import render_template, session | |||
from bitshift import assets | |||
# from bitshift.database import Database | |||
# from bitshift.query import parse_query | |||
app = Flask(__name__) | |||
app.config.from_object("bitshift.config") | |||
app_env = app.jinja_env | |||
app_env.line_statement_prefix = "=" | |||
app_env.globals.update(assets=assets) | |||
# database = Database() | |||
@app.route("/") | |||
def index(): | |||
return render_template("index.html") | |||
@app.route("/search/<query>") | |||
def search(query): | |||
# tree = parse_query(query) | |||
# database.search(tree) | |||
pass | |||
@app.route("/about") | |||
def about(): | |||
return render_template("about.html") | |||
@app.route("/developers") | |||
def developers(): | |||
return render_template("developers.html") | |||
if __name__ == "__main__": | |||
app.run(debug=True) |
@@ -0,0 +1,126 @@ | |||
require 'socket' | |||
require 'ruby_parser' | |||
require 'sexp_processor' | |||
module Bitshift | |||
class Parser | |||
def initialize(source) | |||
@source = source | |||
end | |||
def parse | |||
parser = RubyParser.new | |||
tree = parser.parse(@source) | |||
puts tree.inspect | |||
offset = tree.line - 1 | |||
processor = NodeVisitor.new offset | |||
processor.process tree | |||
return processor.symbols | |||
end | |||
end | |||
class NodeVisitor < SexpProcessor | |||
attr_accessor :symbols | |||
attr_accessor :offset | |||
def initialize(offset) | |||
super() | |||
@require_empty = false | |||
@offset = offset | |||
module_hash = Hash.new {|hash, key| hash[key] = Hash.new} | |||
class_hash = module_hash.clone | |||
function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } } | |||
var_hash = Hash.new {|hash, key| hash[key] = [] } | |||
@symbols = { | |||
modules: module_hash, | |||
classes: class_hash, | |||
functions: function_hash, | |||
vars: var_hash | |||
} | |||
end | |||
def block_position(exp) | |||
pos = Hash.new | |||
end_ln = (start_ln = exp.line - offset) | |||
cur_exp = exp | |||
while cur_exp.is_a? Sexp | |||
end_ln = cur_exp.line - offset | |||
cur_exp = cur_exp.last | |||
break if cur_exp == nil | |||
end | |||
pos[:coord] = { | |||
start_ln: start_ln, | |||
end_ln: end_ln } | |||
return pos | |||
end | |||
def statement_position(exp) | |||
pos = Hash.new | |||
end_ln = start_ln = exp.line - offset | |||
pos[:coord] = { | |||
start_ln: start_ln, | |||
end_ln: end_ln } | |||
return pos | |||
end | |||
def process_module(exp) | |||
pos = block_position exp | |||
exp.shift | |||
name = exp.shift | |||
symbols[:modules][name] = pos | |||
exp.each_sexp {|s| process(s)} | |||
return exp.clear | |||
end | |||
def process_class(exp) | |||
pos = block_position exp | |||
exp.shift | |||
name = exp.shift | |||
symbols[:classes][name] = pos | |||
exp.each_sexp {|s| process(s)} | |||
return exp.clear | |||
end | |||
def process_defn(exp) | |||
pos = block_position exp | |||
exp.shift | |||
name = exp.shift | |||
symbols[:functions][name][:declaration] = pos | |||
exp.each_sexp {|s| process(s)} | |||
return exp.clear | |||
end | |||
def process_call(exp) | |||
pos = statement_position exp | |||
exp.shift | |||
exp.shift | |||
name = exp.shift | |||
symbols[:functions][name][:calls] << pos | |||
exp.each_sexp {|s| process(s)} | |||
return exp.clear | |||
end | |||
def process_iasgn(exp) | |||
pos = statement_position exp | |||
exp.shift | |||
name = exp.shift | |||
symbols[:vars][name] << pos | |||
exp.each_sexp {|s| process(s)} | |||
return exp.clear | |||
end | |||
def process_lasgn(exp) | |||
pos = statement_position exp | |||
exp.shift | |||
name = exp.shift | |||
symbols[:vars][name] << pos | |||
exp.each_sexp {|s| process(s)} | |||
return exp.clear | |||
end | |||
end | |||
end |
@@ -0,0 +1,76 @@ | |||
# -*- coding: utf-8 -*- | |||
from __future__ import unicode_literals | |||
import unittest | |||
from bitshift.query import parse_query | |||
TESTS = [ | |||
# Text | |||
("test", "Tree(Text(String(u'test')))"), | |||
("re:test", "Tree(Text(Regex(u'test')))"), | |||
# Language | |||
("language:python", "Tree(Language(Python))"), | |||
("language:py", "Tree(Language(Python))"), | |||
("l:r:r..y", "Tree(Language(Ruby))"), | |||
(r'"lang:re:python|^c$"', | |||
"Tree(BinaryOp(Language(C), OR, Language(Python)))"), | |||
# Author | |||
('"author:Ben Kurtovic"', "Tree(Author(String(u'Ben Kurtovic')))"), | |||
(r"'a:re:b.*?\sk.*?'", r"Tree(Author(Regex(u'b.*?\\sk.*?')))"), | |||
# Date | |||
("'create:before:Jan 1, 2014'", | |||
"Tree(Date(CREATE, BEFORE, 2014-01-01 00:00:00))"), | |||
("'modify:after:2010-05-09 10:11:12'", | |||
"Tree(Date(MODIFY, AFTER, 2010-05-09 10:11:12))"), | |||
# Symbol | |||
("sym:foobar", "Tree(Symbol(ALL, ALL, String(u'foobar')))"), | |||
("func:foo_bar", "Tree(Symbol(ALL, FUNCTION, String(u'foo_bar')))"), | |||
("func:foo_bar()", "Tree(Symbol(ALL, FUNCTION, String(u'foo_bar')))"), | |||
("class:FooBar", "Tree(Symbol(ALL, CLASS, String(u'FooBar')))"), | |||
("var:foobar", "Tree(Symbol(ALL, VARIABLE, String(u'foobar')))"), | |||
("var:r:foobar", "Tree(Symbol(ALL, VARIABLE, Regex(u'foobar')))"), | |||
# Composition | |||
("(a and b) or (c and d)", ", ".join([ | |||
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", | |||
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND", | |||
"Text(String(u'd')))))"])), | |||
("a and b or c and d", ", ".join([ | |||
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", | |||
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND", | |||
"Text(String(u'd')))))"])), | |||
("a and b or c or d", ", ".join([ | |||
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND", | |||
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "OR", | |||
"Text(String(u'd')))))"])), | |||
("a and (b or c or d)", ", ".join([ | |||
"Tree(BinaryOp(Text(String(u'a'))", "AND", | |||
"BinaryOp(Text(String(u'b'))", "OR", "BinaryOp(Text(String(u'c'))", "OR", | |||
"Text(String(u'd'))))))"])), | |||
("a not b", ", ".join([ | |||
"Tree(BinaryOp(Text(String(u'a'))", "AND", "UnaryOp(NOT", | |||
"Text(String(u'b')))))"])), | |||
# Unicode, Escaping | |||
(r'lang:py "author:fo\\o \"bar\" baz\\"', ", ".join([ | |||
"Tree(BinaryOp(Language(Python)", "AND", | |||
"Author(String(u'fo\\\\o \"bar\" baz\\\\'))))"])), | |||
('"author:Ben Kurtović"', "Tree(Author(String(u'Ben Kurtovi\\u0107')))") | |||
] | |||
class TestQueryParser(unittest.TestCase): | |||
"""Unit tests for the query parser in :py:mod:`bitshift.query`.""" | |||
def test_parse(self): | |||
"""test full query parsing""" | |||
for test, expected in TESTS: | |||
self.assertEqual(expected, parse_query(test).serialize()) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |