Merge branch 'develop'. Version 1.0.

Merge the latest version of `develop`: bitshift v1.0 (beta).
10 years ago · 061454a9c4
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,10 @@
 static/css/*
 !lib

 *.swp
 .sass-cache
 .DS_Store
 .my.cnf

 # github premade rules
 *.py[cod]
@@ -18,7 +24,6 @@ var
 sdist
 develop-eggs
 .installed.cfg
 lib
 lib64
 __pycache__

@@ -37,3 +42,15 @@ nosetests.xml
 .mr.developer.cfg
 .project
 .pydevproject

 # Maven
 target

 # Ruby
 !parsers/ruby/lib

 # Ctags
 */tags
 logs
 Gemfile.lock
 parsing.jar
--- a/+ 2
+++ b/+ 2
@@ -1,6 +1,6 @@
 The MIT License (MIT)

 Copyright (c) 2014 Ben Kurtovic
 Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,4 +1,44 @@
 bitshift
 ========

 bitshift is an online code snippet exchange.
 bitshift is a semantic search engine for source code developed by Benjamin
 Attal, Ben Kurtovic, and Severyn Kozak. This README is intended for developers
 only. For a user overview of the project:

 * read our [about page](http://bitshift.it/)
 * watch our [demo video](https://vimeo.com/98697078)

 Branches
 --------

 - `master`: working, tested, version-numbered code - no direct commits; should
  only accept merges from `develop` when ready to release
 - `develop`: integration branch with unreleased but mostly functional code -
  direct commits allowed but should be minor
 - `feature/*`: individual components of the project with untested, likely
  horribly broken code - branch off from and merge into `develop` when done

 Style
 -----
 bitshift uses [SASS][SASS] for styling; compile the stylesheets to CSS with
 `sass --watch static/sass/:static/css`.

 Documentation
 -------------

 To build documentation, run `make html` from the `docs` subdirectory. You can
 then browse from `docs/build/html/index.html`.

 To automatically update the API documentation structure (necessary when adding
 new modules or packages, but *not* when adding functions or changing
 docstrings), run `sphinx-apidoc -fo docs/source/api bitshift` from the project
 root. Note that this will revert any custom changes made to the files in
 `docs/source/api`, so you might want to update them by hand instead.

 [SASS]: http://sass-lang.com/guide

 Releasing
 ---------

 - Update `__version__` in `bitshift/__init__.py`, `version` in `setup.py`, and
  `version` and `release` in `docs/conf.py`.
--- a/app.py
+++ b/app.py
@@ -2,21 +2,67 @@
 Module to contain all the project's Flask server plumbing.
 """

 from flask import Flask
 from flask import render_template, session
 from json import dumps

 from bitshift import *
 from flask import Flask, make_response, render_template, request

 app = Flask(__name__)
 from bitshift import assets
 from bitshift.database import Database
 from bitshift.languages import LANGS
 from bitshift.query import parse_query, QueryParseException

 app = Flask(__name__, static_folder="static", static_url_path="")
 app.config.from_object("bitshift.config")

 app_env = app.jinja_env
 app_env.line_statement_prefix = "="
 app_env.globals.update(assets = assets)
 app_env.globals.update(assets=assets)

 database = Database()

@app.route("/")
 def index():
    return render_template("index.html")
    return render_template("index.html", autocomplete_languages=LANGS)

@app.route("/search.json")
 def search():
    def reply(json):
        resp = make_response(dumps(json))
        resp.mimetype = "application/json"
        return resp

    query = request.args.get("q")
    if not query:
        return reply({"error": "No query given"})
    try:
        tree = parse_query(query)
    except QueryParseException as exc:
        return reply({"error": exc.args[0]})

    page = request.args.get("p", 1)
    try:
        page = int(page)
    except ValueError:
        return reply({"error": u"Invalid page number: %s" % page})

    highlight = request.args.get("hl", "0")
    highlight = highlight.lower() not in ["0", "false", "no"]

    count, codelets = database.search(tree, page)
    results = [clt.serialize(highlight) for clt in codelets]
    return reply({"count": count, "results": results})

@app.route("/about")
 def about():
    return render_template("about.html")

@app.route("/docs")
 def docs():
    return render_template("docs.html")

@app.errorhandler(404)
 def error404(error):
    return render_template("error404.html"), 404

 if __name__ == "__main__":
    app.run()
    app.run(debug=True)
--- a/bitshift/init.py
+++ b/bitshift/init.py
@@ -1 +1,8 @@
 __all__ = ["config", "assets"]
 # -*- coding: utf-8  -*-

 __author__ = "Benjamin Attal, Ben Kurtovic, Severyn Kozak"
 __copyright__ = "Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak"
 __license__ = "MIT License"
 __version__ = "0.1.dev"

 from . import assets, codelet, config, crawler, database, parser, query
--- a/bitshift/assets.py
+++ b/bitshift/assets.py
@@ -1,22 +1,46 @@
 """
 Module contains helper functions to be used inside the project's Jinja
 templates.
 :synopsis: Helper functions for use inside the project's Jinja templates.
 """

 import re

 from flask import Markup

 ASSET_HTML_TEMPLATES = {
    'css': "<link rel='stylesheet' type='text/css' href='/static/css/%s'>",
    'js': "<script src='/static/js/%s'></script>"
    'css': "<link rel='stylesheet' type='text/css' href='/css/%s'>",
    'js': "<script src='/js/%s'></script>"
 }

 def tag(filename):
    """
    Return HTML tag for asset named filename.
    Generate an HTML tag for a CSS/JS asset, based on its file extension.

    :param filename: The filename of the asset to create a tag for.

    :type filename: str

    Return either a <script> or <link> tag to the file named filename,
    based on its extension.
    :return: A string containing a `<source>` tag for JS files, and a `<link>`
        for CSS files.
    :rtype: str
    """

    file_ext = filename.split(".")[-1]
    return Markup(ASSET_HTML_TEMPLATES[file_ext] % filename)

 def syntax_highlight(msg):
    """
    Inserts HTML `<span>` elements into a string, for symbol/word styling.

    Args:
        msg : (str) A message.
    """

    msg.replace("<", "&;lt")
    msg.replace(">", "&;gt")

    font_size = 16.0 / len(msg)
    msg = re.sub('([!()"%])', '<span class="dark">\\1</span>', msg)
    msg = re.sub('([:.;,])', '<span class="red">\\1</span>', msg)
    msg = msg.replace("404", '<span class="red">404</span>')
    return "<span class='light' style='font-size: %fem'>%s</span>" % (
            font_size, msg)
--- a/bitshift/codelet.py
+++ b/bitshift/codelet.py
@@ -0,0 +1,103 @@
 from operator import concat

 from pygments import highlight
 from pygments.lexers import find_lexer_class, get_lexer_by_name
 from pygments.formatters.html import HtmlFormatter

 from .languages import LANGS

 __all__ = ["Codelet"]

 class Codelet(object):
    """
    A source-code object with code metadata and composition analysis.

    :ivar name: (str) A suitable name for the codelet.
    :ivar code: (str) A containing the raw source code.
    :ivar filename: (str, or None) The filename of the snippet.
    :ivar language: (int, or None) The inferred language of `code`.
    :ivar authors: (array of tuples (str, str or None)) An array of tuples
        containing an author's name and profile URL (on the service the code
        was pulled from).
    :ivar url: (str) The url of the (page containing the) source code.
    :ivar date_created: (:class:`datetime.datetime`, or None) The date the code
        was published.
    :ivar date_modified: (:class:`datetime.datetime`, or None) The date the
        code was last modified.
    :ivar rank: (float) A quanitification of the source code's quality, as
        per available ratings (stars, forks, upvotes, etc.).
    :ivar symbols: (dict) Dictionary containing dictionaries of functions,
        classes, variable definitions, etc.
    :ivar origin: (tuple) 2-tuple of (site_name, site_url), as added by the
        database.
    """

    def __init__(self, name, code, filename, language, authors, url,
                 date_created, date_modified, rank, symbols=None, origin=None):
        """
        Create a Codelet instance.

        :param name: see :attr:`self.name`
        :param code: see :attr:`self.code`
        :param filename: see :attr:`self.filename`
        :param language: see :attr:`self.language`
        :param authors: see :attr:`self.authors`
        :param url: see :attr:`self.url`
        :param date_created: see :attr:`self.date_created`
        :param date_modified: see :attr:`self.date_modified`
        :param rank: see :attr:`self.rank`
        :param symbols: see :attr:`self.symbols`
        :param origin: see :attr:`self.origin`

        :type name: see :attr:`self.name`
        :type code: see :attr:`self.code`
        :type filename: see :attr:`self.filename`
        :type language: see :attr:`self.language`
        :type authors: see :attr:`self.authors`
        :type url: see :attr:`self.url`
        :type date_created: see :attr:`self.date_created`
        :type date_modified: see :attr:`self.date_modified`
        :type rank: see :attr:`self.rank`
        :type symbols: see :attr:`self.symbols`
        :type origin: see :attr:`self.origin`
        """

        self.name = name
        self.code = code
        self.filename = filename
        self.language = language
        self.authors = authors
        self.url = url
        self.date_created = date_created
        self.date_modified = date_modified
        self.rank = rank
        self.symbols = symbols or {}
        self.origin = origin or (None, None)

    def serialize(self, highlight_code=False):
        """
        Convert the codelet into a dictionary that can be sent as JSON.

        :param highlight_code: Whether to return code as pygments-highlighted
            HTML or as plain source.
        :type highlight_code: bool

        :return: The codelet as a dictionary.
        :rtype: str
        """
        lang = LANGS[self.language]
        code = self.code
        if highlight_code:
            lexer = find_lexer_class(lang)() or get_lexer_by_name("text")
            symbols = reduce(concat, self.symbols.values(), [])
            lines = reduce(concat, [[loc[0] for loc in sym[1] + sym[2]]
                                    for sym in symbols], [])
            formatter = HtmlFormatter(linenos=True, hl_lines=lines)
            code = highlight(code, lexer, formatter)

        return {
            "name": self.name, "code": code, "lang": lang,
            "authors": self.authors, "url": self.url,
            "created": self.date_created.isoformat(),
            "modified": self.date_modified.isoformat(), "origin": self.origin
        }
--- a/+ 0
+++ b/+ 0
--- a/bitshift/crawler/crawl.py
+++ b/bitshift/crawler/crawl.py
@@ -0,0 +1,94 @@
 """
 :synopsis: Parent crawler module, which supervises all crawlers.

 Contains functions for initializing all subsidiary, threaded crawlers.
 """

 import logging
 import logging.handlers
 import os
 import Queue
 import sys
 import time
 from threading import Event

 from .crawler import GitHubCrawler, BitbucketCrawler
 from .indexer import GitIndexer, GitRepository

 __all__ = ["crawl"]

 MAX_URL_QUEUE_SIZE = 5e3

 def crawl():
    """
    Initialize all crawlers (and indexers).

    Start the:
    1. GitHub crawler, :class:`crawler.GitHubCrawler`.
    2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
    3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
    """

    _configure_logging()
    time.sleep(5)

    repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
    run_event = Event()
    run_event.set()
    threads = [GitIndexer(repo_clone_queue, run_event)]

    if sys.argv[1:]:
        names = sys.argv[1:]
        ranks = GitHubCrawler.get_ranks(names)
        for name in names:
            repo = GitRepository("https://github.com/" + name, name, "GitHub",
                                 ranks[name])
            repo_clone_queue.put(repo)
    else:
        threads += [GitHubCrawler(repo_clone_queue, run_event),
                    BitbucketCrawler(repo_clone_queue, run_event)]

    for thread in threads:
        thread.start()

    try:
        while 1:
            time.sleep(0.1)
    except KeyboardInterrupt:
        run_event.clear()
        with repo_clone_queue.mutex:
            repo_clone_queue.queue.clear()
        for thread in threads:
            thread.join()

 def _configure_logging():
    # This isn't ideal, since it means the bitshift python package must be kept
    # inside the app, but it works for now:
    root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
    log_dir = os.path.join(root, "logs")

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    logging.getLogger("requests").setLevel(logging.WARNING)
    logging.getLogger("urllib3").setLevel(logging.WARNING)

    formatter = logging.Formatter(
            fmt=("%(asctime)s %(levelname)s %(name)s %(message)s"),
            datefmt="%y-%m-%d %H:%M:%S")

    file_handler = logging.handlers.TimedRotatingFileHandler(
            "%s/%s" % (log_dir, "app.log"), when="H", interval=1,
            backupCount=20)
    stream_handler = logging.StreamHandler()
    file_handler.setFormatter(formatter)
    stream_handler.setFormatter(formatter)

    root_logger = logging.getLogger()
    root_logger.handlers = []
    root_logger.addHandler(file_handler)
    root_logger.addHandler(stream_handler)
    root_logger.setLevel(logging.NOTSET)

 if __name__ == "__main__":
    crawl()
--- a/bitshift/crawler/crawler.py
+++ b/bitshift/crawler/crawler.py
@@ -0,0 +1,243 @@
 """
 :synopsis: Main crawler module, to oversee all site-specific crawlers.

 Contains all website/framework-specific Class crawlers.
 """

 import logging
 import math
 import time
 import threading

 import requests

 from . import indexer

 class GitHubCrawler(threading.Thread):
    """
    Crawler that retrieves links to all of GitHub's public repositories.

    GitHubCrawler is a threaded singleton that queries GitHub's API for urls
    to its public repositories, which it inserts into a :class:`Queue.Queue`
    shared with :class:`indexer.GitIndexer`.

    :ivar clone_queue: (:class:`Queue.Queue`) Contains :class:`GitRepository`
    with repository metadata retrieved by :class:`GitHubCrawler`, and other Git
    crawlers, to be processed by :class:`indexer.GitIndexer`.
    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
    """

    AUTHENTICATION = {
        "client_id" : "436cb884ae09be7f2a4e",
        "client_secret" : "8deeefbc2439409c5b7a092fd086772fe8b1f24e"
    }

    def __init__(self, clone_queue, run_event):
        """
        Create an instance of the singleton `GitHubCrawler`.

        :param clone_queue: see :attr:`self.clone_queue`

        :type clone_queue: see :attr:`self.clone_queue`
        """

        self.clone_queue = clone_queue
        self.run_event = run_event
        self._logger = logging.getLogger("%s.%s" %
                (__name__, self.__class__.__name__))
        self._logger.info("Starting.")
        super(GitHubCrawler, self).__init__(name=self.__class__.__name__)

    def run(self):
        """
        Query the GitHub API for data about every public repository.

        Pull all of GitHub's repositories by making calls to its API in a loop,
        accessing a subsequent page of results via the "next" URL returned in an
        API response header. Uses Severyn Kozak's (sevko) authentication
        credentials. For every new repository, a :class:`GitRepository` is
        inserted into :attr:`self.clone_queue`.
        """

        next_api_url = "https://api.github.com/repositories"
        api_request_interval = 5e3 / 60 ** 2

        while next_api_url and self.run_event.is_set():
            start_time = time.time()

            try:
                resp = requests.get(next_api_url, params=self.AUTHENTICATION)
            except requests.ConnectionError:
                self._logger.exception("API %s call failed:" % next_api_url)
                time.sleep(0.5)
                continue

            queue_percent_full = (float(self.clone_queue.qsize()) /
                    self.clone_queue.maxsize) * 100
            self._logger.info("API call made. Queue size: %d/%d, %d%%." %
                    ((self.clone_queue.qsize(), self.clone_queue.maxsize,
                    queue_percent_full)))

            repo_names = [repo["full_name"] for repo in resp.json()]
            repo_ranks = self.get_ranks(repo_names)

            for repo in resp.json():
                while self.clone_queue.full():
                    time.sleep(1)

                self.clone_queue.put(indexer.GitRepository(
                        repo["html_url"], repo["full_name"], "GitHub",
                        repo_ranks[repo["full_name"]]))

            if int(resp.headers["x-ratelimit-remaining"]) == 0:
                time.sleep(int(resp.headers["x-ratelimit-reset"]) -
                        time.time())

            next_api_url = resp.headers["link"].split(">")[0][1:]

            sleep_time = api_request_interval - (time.time() - start_time)
            if sleep_time > 0:
                time.sleep(sleep_time)

    @classmethod
    def get_ranks(cls, repo_names):
        """
        Return the ranks for several repositories.

        Queries the GitHub API for the number of stargazers for any given
        repositories, and blocks if the query limit is exceeded. The rank is
        calculated using these numbers.

        :param repo_names: An array of repository names, in
            `username/repository_name` format.

        :type repo_names: str

        :return: A dictionary mapping repository names to ranks.

            Example dictionary:
            .. code-block:: python
                {
                    "user/repository" : 0.2564949357461537
                }

        :rtype: dictionary
        """

        API_URL = "https://api.github.com/search/repositories"
        REPOS_PER_QUERY = 25

        repo_ranks = {}
        for names in [repo_names[ind:ind + REPOS_PER_QUERY] for ind in
                xrange(0, len(repo_names), REPOS_PER_QUERY)]:
            query_url = "%s?q=%s" % (API_URL,
                "+".join("repo:%s" % name for name in names))

            params = cls.AUTHENTICATION
            resp = requests.get(query_url,
                    params=params,
                    headers={
                        "Accept" : "application/vnd.github.preview"
                    })

            if int(resp.headers["x-ratelimit-remaining"]) == 0:
                sleep_time = int(resp.headers["x-ratelimit-reset"]) - \
                        time.time() + 1
                if sleep_time > 0:
                    logging.info("API quota exceeded. Sleep time: %d." %
                            sleep_time)
                    time.sleep(sleep_time)

            for repo in resp.json()["items"]:
                stars = repo["stargazers_count"]
                rank = min(math.log(max(stars, 1), 5000), 1.0)
                repo_ranks[repo["full_name"]] = rank

        for name in repo_names:
            if name not in repo_ranks:
                repo_ranks[name] = 0.1

        return repo_ranks

 class BitbucketCrawler(threading.Thread):
    """
    Crawler that retrieves links to all of Bitbucket's public repositories.

    BitbucketCrawler is a threaded singleton that queries Bitbucket's API for
    urls to its public repositories, and inserts them as
    :class:`indexer.GitRepository` into a :class:`Queue.Queue` shared with
    :class:`indexer.GitIndexer`.

    :ivar clone_queue: (:class:`Queue.Queue`) The shared queue to insert
        :class:`indexer.GitRepository` repository urls into.
    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
    """

    def __init__(self, clone_queue, run_event):
        """
        Create an instance of the singleton `BitbucketCrawler`.

        :param clone_queue: see :attr:`self.clone_queue`

        :type clone_queue: see :attr:`self.clone_queue`
        """

        self.clone_queue = clone_queue
        self.run_event = run_event
        self._logger = logging.getLogger("%s.%s" %
                (__name__, self.__class__.__name__))
        self._logger.info("Starting.")
        super(BitbucketCrawler, self).__init__(name=self.__class__.__name__)

    def run(self):
        """
        Query the Bitbucket API for data about every public repository.

        Query the Bitbucket API's "/repositories" endpoint and read its
        paginated responses in a loop; any "git" repositories have their
        clone-urls and names inserted into a :class:`indexer.GitRepository` in
        :attr:`self.clone_queue`.
        """

        next_api_url = "https://api.bitbucket.org/2.0/repositories"

        while self.run_event.is_set():
            try:
                response = requests.get(next_api_url).json()
            except requests.ConnectionError:
                self._logger.exception("API %s call failed:", next_api_url)
                time.sleep(0.5)
                continue

            queue_percent_full = (float(self.clone_queue.qsize()) /
                    self.clone_queue.maxsize) * 100
            self._logger.info("API call made. Queue size: %d/%d, %d%%." %
                    ((self.clone_queue.qsize(), self.clone_queue.maxsize,
                    queue_percent_full)))

            for repo in response["values"]:
                if repo["scm"] == "git":
                    while self.clone_queue.full():
                        time.sleep(1)

                    clone_links = repo["links"]["clone"]
                    clone_url = (clone_links[0]["href"] if
                            clone_links[0]["name"] == "https" else
                            clone_links[1]["href"])

                    try:
                        watchers = requests.get(
                                repo["links"]["watchers"]["href"])
                        num = len(watchers.json()["values"])
                        rank = min(math.log(max(num, 1), 500), 1.0)
                    except requests.ConnectionError:
                        err = "API %s call failed:" % next_api_url
                        self._logger.exception(err)
                        time.sleep(0.5)
                        continue

                    self.clone_queue.put(indexer.GitRepository(
                        clone_url, repo["full_name"], "Bitbucket"), rank)

            next_api_url = response["next"]
            time.sleep(0.2)
--- a/bitshift/crawler/indexer.py
+++ b/bitshift/crawler/indexer.py
@@ -0,0 +1,348 @@
 """
 :synopsis: Contains a singleton GitIndexer class, which clones and indexes git
    repositories.
 """

 from datetime import datetime
 import logging
 import os
 import Queue
 import shutil
 import string
 import time
 import threading

 from bs4 import UnicodeDammit
 import git

 from ..database import Database
 from ..parser import parse, UnsupportedFileError
 from ..codelet import Codelet

 GIT_CLONE_DIR = "/tmp/bitshift"
 THREAD_QUEUE_SLEEP = 0.5
 MAX_INDEX_QUEUE_SIZE = 10

 class GitRepository(object):
    """
    A representation of a Git repository's metadata.

    :ivar url: (str) The repository's url.
    :ivar name: (str) The name of the repository.
    :ivar framework_name: (str) The name of the online Git framework that the
        repository belongs to (eg, GitHub, BitBucket).
    :ivar rank: (float) The rank of the repository, as assigned by
        :class:`crawler.GitHubCrawler`.
    :ivar path: (str) The repository's on-disk directory path.
    :ivar repo: (git.Repo) A git.Repo representation of the repository.
    """

    def __init__(self, url, name, framework_name, rank):
        """
        Create a GitRepository instance.

        :param url: see :attr:`GitRepository.url`
        :param name: see :attr:`GitRepository.name`
        :param framework_name: see :attr:`GitRepository.framework_name`
        :param rank: see :attr:`GitRepository.rank`

        :type url: str
        :type name: str
        :type framework_name: str
        :type rank: float
        """

        self.url = url
        self.name = name
        self.framework_name = framework_name
        self.rank = rank
        dirname = name.replace("/", "-") + "-" + str(int(time.time()))
        self.path = os.path.join(GIT_CLONE_DIR, dirname)
        self.repo = None

 class GitIndexer(threading.Thread):
    """
    A singleton Git repository indexer.

    :class:`GitIndexer` indexes the repositories cloned by the
    :class:`_GitCloner` singleton.

    :ivar index_queue: (:class:`Queue.Queue`) A queue containing
        :class:`GitRepository` objects for every new repository succesfully
        cloned by :class:`_GitCloner`, which are to be indexed.
    :ivar git_cloner: (:class:`_GitCloner`) The corresponding repository
        cloner, which feeds :class:`GitIndexer`.
    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
    """

    def __init__(self, clone_queue, run_event):
        """
        Create an instance of the singleton `GitIndexer`.

        :param clone_queue: see :attr:`self.index_queue`

        :type index_queue: see :attr:`self.index_queue`
        """

        self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE)
        self.run_event = run_event
        self.git_cloner = _GitCloner(clone_queue, self.index_queue, run_event)
        self.git_cloner.start()
        self.database = Database()
        self._logger = logging.getLogger("%s.%s" %
                (__name__, self.__class__.__name__))
        self._logger.info("Starting.")

        if not os.path.exists(GIT_CLONE_DIR):
            os.makedirs(GIT_CLONE_DIR)

        super(GitIndexer, self).__init__(name=self.__class__.__name__)

    def run(self):
        """
        Retrieve metadata about newly cloned repositories and index them.

        Blocks until new repositories appear in :attr:`self.index_queue`, then
        retrieves one, and attempts indexing it. Should any errors occur, the
        new repository will be discarded and the indexer will index the next in
        the queue.
        """

        while True:
            while self.index_queue.empty() and self.run_event.is_set():
                time.sleep(THREAD_QUEUE_SLEEP)
            if not self.run_event.is_set():
                break

            repo = self.index_queue.get()
            self.index_queue.task_done()
            self._index_repository(repo)

    def _index_repository(self, repo):
        """
        Clone and index (create and insert Codeletes for) a Git repository.

        `git clone` the Git repository located at **repo.url**, call
        `_insert_repository_codelets()`, then remove said repository.

        :param repo: The metadata of the repository to be indexed.
        :type repo: :class:`GitRepository`
        """

        self._logger.info(u"Indexing repo: %s", repo.name)
        try:
            self._insert_repository_codelets(repo)
        except Exception:
            self._logger.exception("Exception raised while indexing:")
        finally:
            if os.path.isdir(repo.path):
                shutil.rmtree(repo.path)

    def _insert_repository_codelets(self, repo):
        """
        Create and insert a Codelet for the files inside a Git repository.

        Create a new Codelet, and insert it into the Database singleton, for
        every file inside the current working directory's default branch
        (usually *master*).

        :param repo_url: The metadata of the repository to be indexed.

        :type repo_url: :class:`GitRepository`
        """

        file_meta = self._get_file_metadata(repo.repo)
        if file_meta is None:
            return

        for filename, data in file_meta.iteritems():
            name = ("%s: %s" % (repo.name, filename)).encode("utf8")
            authors = [(author, None) for author in data["authors"]]
            encoded_source = data["blob"].data_stream.read()
            source = UnicodeDammit(encoded_source).unicode_markup
            url = self._generate_file_url(filename, repo)
            codelet = Codelet(name, source, filename, None, authors, url,
                              data["time_created"], data["time_last_modified"],
                              repo.rank)
            self._logger.debug("Indexing file: %s", codelet.name)
            try:
                parse(codelet)
            except UnsupportedFileError:
                continue
            except Exception:
                self._logger.exception("Exception raised while parsing:")
            self.database.insert(codelet)

    def _generate_file_url(self, filename, repo):
        """
        Return a url for a filename from a Git wrapper framework.

        :param filename: The path of the file.
        :param repo: The git repo.

        :type filename: str
        :type repo: :class:`GitRepository`

        :return: The file's full url on the given framework, if successfully
            derived.
        :rtype: str, or None
        """

        if repo.framework_name == "GitHub":
            default_branch = repo.repo.active_branch.name
            parts = [repo.url, "blob", default_branch, filename]
        elif repo.framework_name == "Bitbucket":
            try:
                commit_hash = repo.repo.head.commit.hexsha
            except ValueError:  # No commits
                return None
            parts = [repo.url, "src", commit_hash, filename]
        return "/".join(s.strip("/") for s in parts)

    def _get_file_metadata(self, repo):
        """
        Return a dictionary containing every valuable tracked file's metadata.

        :return: A dictionary with author names, time of creation, and time of
            last modification for every filename key.
            .. code-block:: python
                sample_returned_dict = {
                    "my_file" : {
                        "blob": (GitPython Blob) <object>,
                        "authors" : (str list) ["author1", "author2"],
                        "time_created" : (`datetime.datetime`) <object>,
                        "time_last_modified" : (`datetime.datetime`) <object>
                    }
                }
        :rtype: dictionary of dictionaries
        """
        try:
            tree = repo.head.commit.tree
        except ValueError:  # No commits
            return {}

        files = {}
        self._logger.debug("Building file metadata")
        for item in tree.traverse():
            if item.type != "blob" or not self._is_ascii(item.data_stream):
                continue
            log = repo.git.log("--follow", '--format=%an %ct', "--", item.path)
            lines = log.splitlines()
            authors = {line.rsplit(" ", 1)[0].decode("utf8") for line in lines}
            last_mod = int(lines[0].rsplit(" ", 1)[1])
            created = int(lines[-1].rsplit(" ", 1)[1])

            files[item.path] = {
                "blob": item,
                "authors" : authors,
                "time_last_modified": datetime.fromtimestamp(last_mod),
                "time_created": datetime.fromtimestamp(created)
            }

        return files

    def _is_ascii(self, source):
        """
        Heuristically determine whether a file is ASCII text or binary.

        If a portion of the file contains null bytes, or the percentage of bytes
        that aren't ASCII is greater than 30%, then the file is concluded to be
        binary. This heuristic is used by the `file` utility, Perl's inbuilt `-T`
        operator, and is the de-facto method for in : passdetermining whether a
        file is ASCII.

        :param source: The file object to test.

        :type source: `file`

        :return: Whether the file is probably ASCII.
        :rtype: Boolean
        """

        file_snippet = source.read(512)

        if not file_snippet:
            return True

        ascii_characters = "".join(map(chr, range(32, 127)) +
                list("\n\r\t\b"))
        null_trans = string.maketrans("", "")

        if "\0" in file_snippet:
            return False

        non_ascii = file_snippet.translate(null_trans, ascii_characters)
        return not float(len(non_ascii)) / len(file_snippet) > 0.30

 class _GitCloner(threading.Thread):
    """
    A singleton Git repository cloner.

    Clones the repositories crawled by :class:`crawler.GitHubCrawler` for
    :class:`GitIndexer` to index.

    :ivar clone_queue: (:class:`Queue.Queue`) see
        :attr:`crawler.GitHubCrawler.clone_queue`.
    :ivar index_queue: (:class:`Queue.Queue`) see
        :attr:`GitIndexer.index_queue`.
    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
    """

    def __init__(self, clone_queue, index_queue, run_event):
        """
        Create an instance of the singleton :class:`_GitCloner`.

        :param clone_queue: see :attr:`self.clone_queue`
        :param index_queue: see :attr:`self.index_queue`

        :type clone_queue: see :attr:`self.clone_queue`
        :type index_queue: see :attr:`self.index_queue`
        """

        self.clone_queue = clone_queue
        self.index_queue = index_queue
        self.run_event = run_event
        self._logger = logging.getLogger("%s.%s" %
                (__name__, self.__class__.__name__))
        self._logger.info("Starting.")
        super(_GitCloner, self).__init__(name=self.__class__.__name__)

    def run(self):
        """
        Retrieve metadata about newly crawled repositories and clone them.

        Blocks until new :class:`GitRepository` appear in
        :attr:`self.clone_queue`, then attempts cloning them. If
        succcessful, the cloned repository is added to :attr:`self.index_queue`
        for the `GitIndexer` to clone; otherwise, it is discarded.
        """

        while True:
            while self.clone_queue.empty() and self.run_event.is_set():
                time.sleep(THREAD_QUEUE_SLEEP)
            if not self.run_event.is_set():
                break
            repo = self.clone_queue.get()
            self.clone_queue.task_done()

            try:
                self._clone_repository(repo)
            except Exception:
                self._logger.exception("Exception raised while cloning:")

    def _clone_repository(self, repo):
        """
        Attempt cloning a Git repository.

        :param repo: Metadata about the repository to clone.

        :type repo: :class:`GitRepository`
        """

        self._logger.info("Cloning repo: %s", repo.url)
        repo.repo = git.Repo.clone_from(repo.url, to_path=repo.path, bare=True,
                                        single_branch=True)
        while self.index_queue.full() and self.run_event.is_set():
            time.sleep(THREAD_QUEUE_SLEEP)
        if self.run_event.is_set():
            self.index_queue.put(repo)
--- a/bitshift/database/init.py
+++ b/bitshift/database/init.py
@@ -0,0 +1,239 @@
 """
 Subpackage with classes and functions to handle communication with the MySQL
 database backend, which manages the search index.
 """

 import codecs
 import os

 import mmh3
 import oursql

 from .migration import VERSION, MIGRATIONS
 from ..codelet import Codelet
 from ..query.nodes import (String, Regex, Text, Language, Author, Date, Symbol,
                           BinaryOp, UnaryOp)

 __all__ = ["Database"]

 class Database(object):
    """Represents the MySQL database."""

    def __init__(self, migrate=False):
        self._conn = self._connect()
        self._check_version(migrate)

    def _connect(self):
        """Establish a connection to the database."""
        try:
            codecs.lookup("utf8mb4")
        except LookupError:
            utf8 = codecs.lookup("utf8")
            codecs.register(lambda name: utf8 if name == "utf8mb4" else None)

        root = os.path.dirname(os.path.abspath(__file__))
        default_file = os.path.join(root, ".my.cnf")
        return oursql.connect(
            db="bitshift", read_default_file=default_file, autoping=True,
            autoreconnect=True, charset="utf8mb4")

    def _migrate(self, cursor, current):
        """Migrate the database to the latest schema version."""
        for version in xrange(current, VERSION):
            print "Migrating to %d..." % (version + 1)
            for query in MIGRATIONS[version - 1]:
                cursor.execute(query)
            cursor.execute("UPDATE version SET version = ?", (version + 1,))

    def _check_version(self, migrate):
        """Check the database schema version and respond accordingly.

        If the schema is out of date, migrate if *migrate* is True, else raise
        an exception.
        """
        with self._conn.cursor() as cursor:
            cursor.execute("SELECT version FROM version")
            version = cursor.fetchone()[0]
            if version < VERSION:
                if migrate:
                    self._migrate(cursor, version)
                else:
                    err = "Database schema out of date. " \
                          "Run `python -m bitshift.database.migration`."
                    raise RuntimeError(err)

    def _search_with_query(self, cursor, tree, page):
        """Execute an SQL query based on a query tree, and return results.

        The returned data is a 2-tuple of (list of codelet IDs, estimated
        number of total results).
        """
        query, args = tree.build_query(page)
        cursor.execute(query, args)
        ids = [cid for cid, _ in cursor.fetchall()]
        num_results = len(ids)  # TODO: This is not entirely correct
        return ids, num_results

    def _get_authors_for_codelet(self, cursor, codelet_id):
        """Return a list of authors for a given codelet."""
        query = """SELECT author_name, author_url
                   FROM authors
                   WHERE author_codelet = ?"""

        cursor.execute(query, (codelet_id,))
        return cursor.fetchall()

    def _get_symbols_for_code(self, cursor, code_id, tree):
        """Return a list of symbols for a given codelet."""
        query = """SELECT symbol_type, symbol_name, sloc_type, sloc_row,
                          sloc_col, sloc_end_row, sloc_end_col
                   FROM symbols
                   INNER JOIN symbol_locations ON sloc_symbol = symbol_id
                   WHERE symbol_code = ? AND (%s)"""

        conds, args = [], [code_id]
        for node in tree.walk(Symbol):
            node_cond, node_args, _, _ = node.parameterize(set())
            conds.append(node_cond)
            args += node_args
        if not conds:
            return {}
        cond = " OR ".join(conds)

        symbols = {type_: {} for type_ in Symbol.TYPES}
        cursor.execute(query % cond, tuple(args))
        for type_, name, loc_type, row, col, erow, ecol in cursor.fetchall():
            sdict = symbols[Symbol.TYPES[type_]]
            if name not in sdict:
                sdict[name] = ([], [])
            sdict[name][loc_type].append((row, col, erow, ecol))
        for type_, sdict in symbols.items():
            symbols[type_] = [(n, d, u) for n, (d, u) in sdict.iteritems()]
        return symbols

    def _get_codelets_from_ids(self, cursor, ids, tree):
        """Return a list of Codelet objects given a list of codelet IDs."""
        query = """SELECT *
                   FROM codelets
                   INNER JOIN code ON codelet_code_id = code_id
                   INNER JOIN origins ON codelet_origin = origin_id
                   WHERE codelet_id = ?"""

        with self._conn.cursor(oursql.DictCursor) as dict_cursor:
            for codelet_id in ids:
                dict_cursor.execute(query, (codelet_id,))
                row = dict_cursor.fetchall()[0]
                code_id = row["code_id"]
                if row["origin_url_base"]:
                    url = row["origin_url_base"] + row["codelet_url"]
                else:
                    url = row["codelet_url"]
                origin = (row["origin_name"], row["origin_url"])
                authors = self._get_authors_for_codelet(cursor, codelet_id)
                symbols = self._get_symbols_for_code(cursor, code_id, tree)
                yield Codelet(
                    row["codelet_name"], row["code_code"], None,
                    row["code_lang"], authors, url,
                    row["codelet_date_created"], row["codelet_date_modified"],
                    row["codelet_rank"], symbols, origin)

    def _decompose_url(self, cursor, url):
        """Break up a URL into an origin (with a URL base) and a suffix."""
        query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base) + 1)
                   FROM origins
                   WHERE origin_url_base IS NOT NULL
                   AND ? LIKE CONCAT(origin_url_base, "%")"""

        cursor.execute(query, (url, url))
        result = cursor.fetchone()
        return result if result else (1, url)

    def _insert_symbols(self, cursor, code_id, sym_type, symbols):
        """Insert a list of symbols of a given type into the database."""
        query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)"
        query2 = """INSERT INTO symbol_locations VALUES
                    (DEFAULT, ?, ?, ?, ?, ?, ?)"""
        build = lambda id, L, typ: [tuple([id, typ] + list(loc)) for loc in L]

        type_id = Symbol.TYPES.index(sym_type)
        for (name, defs, uses) in symbols:
            cursor.execute(query1, (code_id, type_id, name))
            sym_id = cursor.lastrowid
            params = (build(sym_id, defs, Symbol.DEFINE) +
                      build(sym_id, uses, Symbol.USE))
            cursor.executemany(query2, params)

    def close(self):
        """Disconnect from the database."""
        self._conn.close()

    def search(self, tree, page=1):
        """
        Search the database for a query and return the *n*\ th page of results.

        :param tree: The query to search for.
        :type tree: :py:class:`~.query.tree.Tree`
        :param page: The result page to display.
        :type page: int

        :return: The total number of results, and the *n*\ th page of results.
        :rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s)
        """
        query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
        query2 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp
                    FROM cache
                    INNER JOIN cache_data ON cache_id = cdata_cache
                    WHERE cache_id = ?
                    ORDER BY cdata_index ASC"""
        query3 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)"
        query4 = "INSERT INTO cache_data VALUES (?, ?, ?)"

        cache_id = mmh3.hash64(str(page) + ":" + tree.serialize())[0]

        with self._conn.cursor() as cursor:
            cursor.execute(query1, (cache_id,))
            cache_hit = cursor.fetchall()
            if cache_hit:
                cursor.execute(query2, (cache_id,))
                rows = cursor.fetchall()
                num_results = rows[0][1] * (10 ** rows[0][2]) if rows else 0
                ids = [row[0] for row in rows]
            else:
                ids, num_results = self._search_with_query(cursor, tree, page)
                num_exp = max(len(str(num_results)) - 3, 0)
                num_results = int(round(num_results, -num_exp))
                num_mnt = num_results / (10 ** num_exp)
                cursor.execute(query3, (cache_id, num_mnt, num_exp))
                cdata = [(cache_id, c_id, i) for i, c_id in enumerate(ids)]
                cursor.executemany(query4, cdata)
            codelet_gen = self._get_codelets_from_ids(cursor, ids, tree)
            return (num_results, list(codelet_gen))

    def insert(self, codelet):
        """
        Insert a codelet into the database.

        :param codelet: The codelet to insert.
        :type codelet: :py:class:`.Codelet`
        """
        query1 = """INSERT INTO code VALUES (?, ?, ?)
                    ON DUPLICATE KEY UPDATE code_id=code_id"""
        query2 = """INSERT INTO codelets VALUES
                    (DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
        query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"

        hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
        code_id = mmh3.hash64(hash_key)[0]

        with self._conn.cursor() as cursor:
            cursor.execute(query1, (code_id, codelet.language, codelet.code))
            if cursor.rowcount == 1:
                for sym_type, symbols in codelet.symbols.iteritems():
                    self._insert_symbols(cursor, code_id, sym_type, symbols)
            origin, url = self._decompose_url(cursor, codelet.url)
            cursor.execute(query2, (codelet.name, code_id, origin, url,
                                    codelet.rank, codelet.date_created,
                                    codelet.date_modified))
            codelet_id = cursor.lastrowid
            authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]
            cursor.executemany(query3, authors)
--- a/bitshift/database/migration.py
+++ b/bitshift/database/migration.py
@@ -0,0 +1,147 @@
 """
 Contains information about database schema versions, and SQL queries to update
 between them.
 """

 VERSION = 12

 MIGRATIONS = [
    # 1 -> 2
    [
        """ALTER TABLE `codelets`
           DROP FOREIGN KEY `codelets_ibfk_1`""",
        """ALTER TABLE `code`
           DROP KEY `code_hash`,
           DROP COLUMN `code_hash`,
           MODIFY COLUMN `code_id` BIGINT NOT NULL""",
        """ALTER TABLE `codelets`
           MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL,
           ADD KEY (`codelet_lang`),
           ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`)
               REFERENCES `code` (`code_id`)
               ON DELETE RESTRICT ON UPDATE CASCADE""",
        """ALTER TABLE `symbols`
           ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL,
           ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL"""
    ],
    # 2 -> 3
    [
        """ALTER TABLE `symbols`
           DROP FOREIGN KEY `symbols_ibfk_1`,
           CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL,
           ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`)
               REFERENCES `code` (`code_id`)
               ON DELETE CASCADE ON UPDATE CASCADE"""
    ],
    # 3 -> 4
    [
        """ALTER TABLE `symbols`
           DROP COLUMN `symbol_row`,
           DROP COLUMN `symbol_col`,
           DROP COLUMN `symbol_end_row`,
           DROP COLUMN `symbol_end_col`""",
        """CREATE TABLE `symbol_locations` (
           `sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
           `sloc_symbol` BIGINT UNSIGNED NOT NULL,
           `sloc_type` TINYINT UNSIGNED NOT NULL,
           `sloc_row` INT UNSIGNED NOT NULL,
           `sloc_col` INT UNSIGNED NOT NULL,
           `sloc_end_row` INT UNSIGNED NOT NULL,
           `sloc_end_col` INT UNSIGNED NOT NULL,
           PRIMARY KEY (`sloc_id`),
           FOREIGN KEY (`sloc_symbol`)
               REFERENCES `symbols` (`symbol_id`)
               ON DELETE CASCADE ON UPDATE CASCADE
           ) ENGINE=InnoDB"""
    ],
    # 4 -> 5
    [
        """ALTER TABLE `origins`
           MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL,
           MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL,
           MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL"""
    ],
    # 5 -> 6
    [
        """ALTER TABLE `code`
           ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL
               AFTER `code_id`,
           ADD KEY (`code_lang`)""",
        """ALTER TABLE `codelets`
           DROP KEY `codelet_lang`,
           DROP COLUMN `codelet_lang`""",
        """ALTER TABLE `cache_data`
           DROP FOREIGN KEY `cache_data_ibfk_1`""",
        """ALTER TABLE `cache`
           MODIFY COLUMN `cache_id` BIGINT NOT NULL,
           DROP COLUMN `cache_hash`,
           DROP COLUMN `cache_last_used`,
           MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""",
        """ALTER TABLE `cache_data`
           MODIFY COLUMN `cdata_cache` BIGINT NOT NULL,
           ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
           ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`)
               REFERENCES `codelets` (`codelet_id`)
               ON DELETE CASCADE ON UPDATE CASCADE""",
        """CREATE EVENT `flush_cache`
           ON SCHEDULE EVERY 1 HOUR
           DO
               DELETE FROM `cache`
                   WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);"""
    ],
    # 6 -> 7
    [
        """DELETE FROM `cache`""",
        """ALTER TABLE `cache_data`
           ADD COLUMN `cdata_index` TINYINT UNSIGNED NOT NULL
               AFTER `cdata_codelet`"""
    ],
    # 7 -> 8
    [
        """ALTER TABLE `origins`
           DROP COLUMN `origin_image`"""
    ],
    # 8 -> 9
    [
        """DELIMITER //
        CREATE PROCEDURE `empty_database`()
            BEGIN
                DELETE FROM `codelets`;
                DELETE FROM `code`;
                DELETE FROM `cache`;
                ALTER TABLE `codelets` AUTO_INCREMENT = 1;
                ALTER TABLE `authors` AUTO_INCREMENT = 1;
                ALTER TABLE `symbols` AUTO_INCREMENT = 1;
                ALTER TABLE `symbol_locations` AUTO_INCREMENT = 1;
            END//
        DELIMITER ;"""
    ],
    # 9 -> 10
    [
        """ALTER TABLE `symbol_locations`
           MODIFY COLUMN `sloc_col` INT UNSIGNED DEFAULT NULL,
           MODIFY COLUMN `sloc_end_row` INT UNSIGNED DEFAULT NULL,
           MODIFY COLUMN `sloc_end_col` INT UNSIGNED DEFAULT NULL"""
    ],
    # 10 -> 11
    [
        """ALTER DATABASE `bitshift`
           CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci"""
    ],
    # 11 -> 12
    [
        """CREATE TABLE `stopwords` (
           `value` varchar(18) NOT NULL DEFAULT ""
           ) ENGINE=InnoDB DEFAULT CHARSET=utf8""",
        """INSERT INTO `stopwords` VALUES
           ("a"), ("about"), ("an"), ("are"), ("as"), ("at"), ("be"), ("by"),
           ("how"), ("i"), ("it"), ("la"), ("of"), ("on"), ("that"), ("the"),
           ("to"), ("und"), ("was"), ("what"), ("when"), ("where"), ("who"),
           ("will")"""
    ]
 ]

 if __name__ == "__main__":
    from . import Database

    Database(migrate=True).close()
--- a/bitshift/database/schema.sql
+++ b/bitshift/database/schema.sql
@@ -0,0 +1,141 @@
 -- Schema version 12

 CREATE DATABASE `bitshift`
    DEFAULT CHARACTER SET utf8mb4
    COLLATE utf8mb4_unicode_ci;
 USE `bitshift`;

 CREATE TABLE `version` (
    `version` INT UNSIGNED NOT NULL
 ) ENGINE=InnoDB;
 INSERT INTO `version` VALUES (12);

 CREATE TABLE `stopwords`
    LIKE information_schema.innodb_ft_default_stopword
    ENGINE=InnoDB;

 CREATE TABLE `stopwords` (
    `value` varchar(18) NOT NULL DEFAULT ""
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
 INSERT INTO `stopwords` VALUES
    ("a"), ("about"), ("an"), ("are"), ("as"), ("at"), ("be"), ("by"), ("how"),
    ("i"), ("it"), ("la"), ("of"), ("on"), ("that"), ("the"), ("to"), ("und"),
    ("was"), ("what"), ("when"), ("where"), ("who"), ("will");

 CREATE TABLE `origins` (
    `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT,
    `origin_name` VARCHAR(64) DEFAULT NULL,
    `origin_url` VARCHAR(512) DEFAULT NULL,
    `origin_url_base` VARCHAR(512) DEFAULT NULL,
    PRIMARY KEY (`origin_id`)
 ) ENGINE=InnoDB;
 INSERT INTO `origins` VALUES (1, NULL, NULL, NULL);

 CREATE TABLE `code` (
    `code_id` BIGINT NOT NULL,
    `code_lang` SMALLINT UNSIGNED DEFAULT NULL,
    `code_code` MEDIUMTEXT NOT NULL,
    PRIMARY KEY (`code_id`),
    KEY (`code_lang`),
    FULLTEXT KEY (`code_code`)
 ) ENGINE=InnoDB;

 CREATE TABLE `codelets` (
    `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
    `codelet_name` VARCHAR(300) NOT NULL,
    `codelet_code_id` BIGINT NOT NULL,
    `codelet_origin` TINYINT UNSIGNED NOT NULL,
    `codelet_url` VARCHAR(512) NOT NULL,
    `codelet_rank` FLOAT NOT NULL,
    `codelet_date_created` DATETIME DEFAULT NULL,
    `codelet_date_modified` DATETIME DEFAULT NULL,
    PRIMARY KEY (`codelet_id`),
    FULLTEXT KEY (`codelet_name`),
    KEY (`codelet_rank`),
    KEY (`codelet_date_created`),
    KEY (`codelet_date_modified`),
    FOREIGN KEY (`codelet_code_id`)
        REFERENCES `code` (`code_id`)
        ON DELETE RESTRICT ON UPDATE CASCADE,
    FOREIGN KEY (`codelet_origin`)
        REFERENCES `origins` (`origin_id`)
        ON DELETE RESTRICT ON UPDATE CASCADE
 ) ENGINE=InnoDB;

 CREATE TABLE `authors` (
    `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
    `author_codelet` BIGINT UNSIGNED NOT NULL,
    `author_name` VARCHAR(128) NOT NULL,
    `author_url` VARCHAR(512) DEFAULT NULL,
    PRIMARY KEY (`author_id`),
    FULLTEXT KEY (`author_name`),
    FOREIGN KEY (`author_codelet`)
        REFERENCES `codelets` (`codelet_id`)
        ON DELETE CASCADE ON UPDATE CASCADE
 ) ENGINE=InnoDB;

 CREATE TABLE `symbols` (
    `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
    `symbol_code` BIGINT NOT NULL,
    `symbol_type` TINYINT UNSIGNED NOT NULL,
    `symbol_name` VARCHAR(512) NOT NULL,
    PRIMARY KEY (`symbol_id`),
    KEY (`symbol_type`, `symbol_name`(32)),
    FOREIGN KEY (`symbol_code`)
        REFERENCES `code` (`code_id`)
        ON DELETE CASCADE ON UPDATE CASCADE
 ) ENGINE=InnoDB;

 CREATE TABLE `symbol_locations` (
    `sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
    `sloc_symbol` BIGINT UNSIGNED NOT NULL,
    `sloc_type` TINYINT UNSIGNED NOT NULL,
    `sloc_row` INT UNSIGNED NOT NULL,
    `sloc_col` INT UNSIGNED DEFAULT NULL,
    `sloc_end_row` INT UNSIGNED DEFAULT NULL,
    `sloc_end_col` INT UNSIGNED DEFAULT NULL,
    PRIMARY KEY (`sloc_id`),
    FOREIGN KEY (`sloc_symbol`)
        REFERENCES `symbols` (`symbol_id`)
        ON DELETE CASCADE ON UPDATE CASCADE
 ) ENGINE=InnoDB;

 CREATE TABLE `cache` (
    `cache_id` BIGINT NOT NULL,
    `cache_count_mnt` SMALLINT UNSIGNED NOT NULL,
    `cache_count_exp` TINYINT UNSIGNED NOT NULL,
    `cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (`cache_id`)
 ) ENGINE=InnoDB;

 CREATE TABLE `cache_data` (
    `cdata_cache` BIGINT NOT NULL,
    `cdata_codelet` BIGINT UNSIGNED NOT NULL,
    `cdata_index` TINYINT UNSIGNED NOT NULL,
    PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
    FOREIGN KEY (`cdata_cache`)
        REFERENCES `cache` (`cache_id`)
        ON DELETE CASCADE ON UPDATE CASCADE,
    FOREIGN KEY (`cdata_codelet`)
        REFERENCES `codelets` (`codelet_id`)
        ON DELETE CASCADE ON UPDATE CASCADE
 ) ENGINE=InnoDB;

 DELIMITER //
 CREATE PROCEDURE `empty_database`()
    BEGIN
        DELETE FROM `codelets`;
        DELETE FROM `code`;
        DELETE FROM `cache`;
        ALTER TABLE `codelets` AUTO_INCREMENT = 1;
        ALTER TABLE `authors` AUTO_INCREMENT = 1;
        ALTER TABLE `symbols` AUTO_INCREMENT = 1;
        ALTER TABLE `symbol_locations` AUTO_INCREMENT = 1;
    END//
 DELIMITER ;

 CREATE EVENT `flush_cache`
    ON SCHEDULE EVERY 1 HOUR
    DO
        DELETE FROM `cache`
            WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);
--- a/bitshift/languages.py
+++ b/bitshift/languages.py
@@ -0,0 +1,22 @@
 from os import path

 import yaml

 __all__ = ["LANGS", "LANGS_ALL"]

 def _load_langs():
    filename = path.join(path.dirname(__file__), "languages.yml")
    with open(filename) as fp:
        data = yaml.load(fp)["languages"]
        langs = [(it.keys()[0] if isinstance(it, dict) else it).encode("utf8")
                 for it in data]
        all_langs = {}
        for i, lang in enumerate(data):
            if isinstance(lang, dict):
                for val in lang.values()[0]:
                    all_langs[val] = i
            else:
                all_langs[lang] = i
    return langs, all_langs

 LANGS, LANGS_ALL = _load_langs()
--- a/bitshift/languages.yml
+++ b/bitshift/languages.yml
@@ -0,0 +1,368 @@
 # A list of programming languages supported by bitshift:

 languages:
    # With parsers:
    - Python:
        - Python
        - Python 3
        - Python 3.0 Traceback
        - Python console session
        - Python Traceback
        - NumPy
    - C
    - Java
    - Ruby:
        - Ruby
        - Ruby irb session

    # Without parsers:
    - ABAP
    - APL
    - ActionScript:
        - ActionScript
        - ActionScript 3
    - ANTLR:
        - ANTLR
        - ANTLR With ActionScript Target
        - ANTLR With CPP Target
        - "ANTLR With C# Target"
        - ANTLR With Java Target
        - ANTLR With ObjectiveC Target
        - ANTLR With Perl Target
        - ANTLR With Python Target
        - ANTLR With Ruby Target
    - Ada
    - Agda:
        - Agda
        - Literate Agda
    - Alloy
    - AmbientTalk
    - ApacheConf
    - AppleScript
    - AspectJ
    - aspx-cs
    - aspx-vb
    - Asymptote
    - autohotkey
    - AutoIt
    - Awk
    - BBCode
    - BUGS
    - Bash:
        - Bash
        - Bash Session
    - Batchfile
    - Befunge
    - BlitzBasic:
        - BlitzBasic
        - BlitzMax
    - Boo
    - Brainfuck
    - Bro
    - "C#"
    - C++
    - ca65
    - CBM BASIC V2
    - Ceylon
    - CFEngine3
    - cfstatement
    - ChaiScript
    - Chapel
    - Cheetah
    - Cirru
    - Clay
    - Clojure:
        - Clojure
        - ClojureScript
    - CMake
    - COBOL:
        - COBOL
        - COBOLFree
    - CoffeeScript
    - Coldfusion CFC
    - Coldfusion HTML
    - Common Lisp
    - Coq
    - Croc
    - Cryptol:
        - Cryptol
        - Literate Cryptol
    - CSS:
        - CSS
        - CSS+Django/Jinja
        - CSS+Genshi Text
        - CSS+Lasso
        - CSS+Mako
        - CSS+Mako
        - CSS+Myghty
        - CSS+PHP
        - CSS+Ruby
        - CSS+Smarty
    - CUDA
    - Cypher
    - Cython
    - D
    - Darcs Patch
    - Dart
    - Debian Control file
    - Debian Sourcelist
    - Delphi
    - dg
    - Diff
    - Django/Jinja
    - Docker
    - DTD
    - Duel
    - Dylan:
        - Dylan
        - Dylan session
    - DylanLID
    - EBNF
    - eC
    - ECL
    - Eiffel
    - Elixir:
        - Elixir
        - Elixir iex session
    - Embedded Ragel
    - ERB:
        - ERB
        - RHTML
    - Erlang:
        - Erlang
        - Erlang erl session
    - Evoque
    - Factor
    - Fancy
    - Fantom
    - Felix
    - Fortran
    - FoxPro
    - FSharp
    - GAP
    - GAS
    - Genshi
    - Genshi Text
    - Gettext Catalog
    - Gherkin
    - GLSL
    - Gnuplot
    - Go
    - Golo
    - GoodData-CL
    - Gosu
    - Gosu Template
    - Groff
    - Groovy
    - Haml
    - Handlebars
    - Haskell:
        - Haskell
        - Literate Haskell
    - Haxe
    - HTML:
        - HTML
        - HTML+Cheetah
        - HTML+Django/Jinja
        - HTML+Evoque
        - HTML+Genshi
        - HTML+Lasso
        - HTML+Mako
        - HTML+Mako
        - HTML+Myghty
        - HTML+PHP
        - HTML+Smarty
        - HTML+Velocity
    - Hxml
    - Hy
    - Hybris
    - IDL
    - Idris:
        - Idris
        - Literate Idris
    - Igor
    - Inform 6:
        - Inform 6
        - Inform 6 template
    - Inform 7
    - INI
    - Io
    - Ioke
    - Jade
    - JAGS
    - Jasmin
    - Java Server Page
    - JavaScript:
        - JavaScript
        - JavaScript+Cheetah
        - JavaScript+Django/Jinja
        - JavaScript+Genshi Text
        - JavaScript+Lasso
        - JavaScript+Mak
        - JavaScript+Mako
        - JavaScript+Myghty
        - JavaScript+PHP
        - JavaScript+Ruby
        - JavaScript+Smarty
    - JSON
    - Julia:
        - Julia
        - Julia console
    - Kal
    - Kconfig
    - Koka
    - Kotlin
    - Lasso
    - Lighttpd configuration file
    - Limbo
    - LiveScript
    - LLVM
    - Logos
    - Logtalk
    - LSL
    - Lua
    - Makefile
        - Makefile
        - Base Makefile
    - Mako
    - MAQL
    - Mask
    - Mason
    - Mathematica
    - Matlab:
        - Matlab
        - Matlab session
    - MiniD
    - Modelica
    - Modula-2
    - Monkey
    - MOOCode
    - MoonScript
    - MQL
    - Mscgen
    - MuPAD
    - MXML
    - Myghty
    - NASM
    - Nemerle
    - nesC
    - NewLisp
    - Newspeak
    - Nginx configuration file
    - Nimrod
    - Nix
    - NSIS
    - Objective-C
    - Objective-C++
    - Objective-J
    - OCaml
    - Octave
    - Ooc
    - Opa
    - OpenEdge ABL
    - Pan
    - Pawn
    - Perl:
        - Perl
        - Perl6
    - PHP
    - Pig
    - Pike
    - PostScript
    - POVRay
    - PowerShell
    - Prolog
    - Properties
    - Protocol Buffer
    - Puppet
    - PyPy Log
    - QBasic
    - QML
    - Racket
    - Ragel:
        - Ragel
        - Ragel in C Host
        - Ragel in CPP Host
        - Ragel in D Host
        - Ragel in Java Host
        - Ragel in Objective C Host
        - Ragel in Ruby Host
    - RConsole
    - Rd
    - REBOL
    - Red
    - Redcode
    - reg
    - reStructuredText
    - Rexx
    - RobotFramework
    - RPMSpec
    - RQL
    - RSL
    - Rust
    - S
    - Sass:
        - Sass
        - SCSS
    - Scala
    - Scalate Server Page
    - Scaml
    - Scheme
    - Scilab
    - Shell Session
    - Slim
    - Smali
    - Smalltalk
    - Smarty
    - Snobol
    - SourcePawn
    - SPARQL
    - SQL:
        - SQL
        - MySQL
        - PL/pgSQL
        - PostgreSQL console (psql)
        - PostgreSQL SQL dialect
        - sqlite3con

    - SquidConf
    - Stan
    - Standard ML
    - SWIG
    - systemverilog
    - Tcl
    - Tcsh
    - Tea
    - TeX
    - Todotxt
    - Treetop
    - TypeScript
    - UrbiScript
    - Vala
    - VB.net
    - VCTreeStatus
    - Velocity
    - verilog
    - VGL
    - vhdl
    - VimL
    - XML:
        - XML
        - XML+Cheetah
        - XML+Django/Jinja
        - XML+Evoque
        - XML+Lasso
        - XML+Mako
        - XML+Mako
        - XML+Myghty
        - XML+PHP
        - XML+Ruby
        - XML+Smarty
        - XML+Velocity
    - XQuery
    - XSLT
    - Xtend
    - YAML:
        - YAML
        - YAML+Jinja
    - Zephir
--- a/bitshift/parser/init.py
+++ b/bitshift/parser/init.py
@@ -0,0 +1,90 @@
 import json
 import subprocess

 from os import path
 from pygments import lexers as pgl, util

 from ..languages import LANGS, LANGS_ALL
 from .python import parse_py

 __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]

 # TODO: Change these
 PARSER_COMMANDS = {
        'Java': ['java', '-cp',
            path.join(path.dirname(__file__), "../../parsers/java/parsing.jar"),
            'com.bitshift.parsing.Parse'],
        'Ruby': ['rake', '-f',
            path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
            'parse']
 }

 class UnsupportedFileError(Exception):
    pass

 def _lang(codelet):
    """
    Private function to identify the language of a codelet.

    :param codelet: The codelet object to identified.

    :type code: Codelet

    .. todo::
        Modify function to incorporate tags from stackoverflow.
    """

    try:
        if codelet.filename:
            lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
        else:
            lex = pgl.guess_lexer(codelet.code)
        return LANGS_ALL[lex.name]
    except (util.ClassNotFound, KeyError):
        raise UnsupportedFileError(codelet.filename)

 def parse_via_proc(codelet):
    proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]],
            stdin=subprocess.PIPE, stdout=subprocess.PIPE)

    data = proc.communicate(codelet.code)[0]
    symbols = json.loads(data)
    return symbols

 PARSERS = {
    "Python": parse_py,
    "Java":   parse_via_proc,
    "Ruby":   parse_via_proc,
 }

 def parse(codelet):
    """
    Dispatches the codelet to the correct parser based on its language.
        It is the job of the respective parsers to accumulate data about the
        code and to convert it into a string representing a python dict.
        The codelet is then given dict as its 'symbols' field.

    :param codelet: The codelet object to parsed.

    :type code: Codelet
    """
    lang = _lang(codelet)
    lang_string = LANGS[lang]
    codelet.language = lang

    def loc_helper(l):
        for i in l:
            if i == -1:
                yield None
            else:
                yield i

    if lang_string in PARSERS:
        symbols = PARSERS[lang_string](codelet)
        symbols = {
            key: [(name,
                   [tuple(loc_helper(loc)) for loc in syms[name]["assignments"]],
                   [tuple(loc_helper(loc)) for loc in syms[name]["uses"]])
                  for name in syms]
            for key, syms in symbols.iteritems()}
        codelet.symbols = symbols
--- a/bitshift/parser/c.py
+++ b/bitshift/parser/c.py
@@ -0,0 +1,106 @@
 from pycparser import c_parser, c_ast

 class _TreeCutter(c_ast.NodeVisitor):
    """
    Local node visitor for c abstract syntax trees.

    :ivar accum: (dict) Information on variables, functions, and structs
        accumulated from an abstract syntax tree.

    :ivar cache: (dict or None) Information stored about parent nodes. Added
        to accum when node reaches the lowest possible level.

    .. todo::
        Add visit function for c_ast.ID to record all uses of a variable.

        Use self.cache to store extra information about variables.
    """

    def __init__(self):
        """
        Create a _TreeCutter instance.
        """

        self.accum = {'vars': {}, 'functions': {}, 'structs': {}}
        self.cache = None

    def start_n_end(self, node):
        pass

    def visit_FuncDecl(self, node):
        """
        Visits FuncDecl nodes in a tree.  Adds relevant data about them to accum
            after visiting all of its children as well.

        :param node: The current node.

        :type node: c_ast.FuncDecl

        .. todo::
            Add other relevant information about functions like parameters and
                return type.
        """

        self.cache['group'] = 'functions'
        self.cache['meta']['end_ln'] = node.coord.line
        self.cache['meta']['end_col'] = node.coord.column

        self.generic_visit(node)

    def visit_Struct(self, node):
        """
        Visits Struct nodes in a tree.  Adds relevant data about them to accum
            after visiting all of its children as well.

        :param node: The current node.

        :type node: c_ast.Struct

        .. todo::
            Find other relevant information to add about structs.
        """

        self.cache['group'] = 'structs'
        self.cache['meta']['end_ln'] = node.coord.line
        self.cache['meta']['end_col'] = node.coord.column

        self.generic_visit(node)

    def visit_Decl(self, node):
        """
        Visits Decl nodes in a tree.  Adds relevant data about them to accum
            after visiting all of its children as well.

        :param node: The current node.

        :type node: c_ast.Decl
        """

        self.cache = {'group': 'vars', 'meta': {}}

        self.cache['meta']['start_ln'] = node.coord.line
        self.cache['meta']['start_col'] = node.coord.column
        self.cache['meta']['end_ln'] = node.coord.line
        self.cache['meta']['end_col'] = node.coord.column

        self.generic_visit(node)

        self.accum[self.cache['group']][node.name] = self.cache['meta']
        self.cache = None

 def parse_c(codelet):
    """
    Adds 'symbols' field to the codelet after parsing the c code.

    :param codelet: The codelet object to parsed.

    :type code: Codelet

    .. todo::
        Preprocess c code so that no ParseErrors are thrown.
    """

    tree = c_parser.CParser().parse(codelet.code)
    cutter = _TreeCutter()
    cutter.visit(tree)
    codelet.symbols = cutter.accum
--- a/bitshift/parser/python.py
+++ b/bitshift/parser/python.py
@@ -0,0 +1,217 @@
 import ast
 import re

 encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)

 class _TreeWalker(ast.NodeVisitor):
    """
    Local node visitor for python abstract syntax trees.

    :ivar symbols: (dict) Information on variables, functions, and classes
        symbolsulated from an abstract syntax tree.

    :ivar cache: (dict or None) Information stored about parent nodes. Added
        to symbols when node reaches the lowest possible level.

    .. todo::
        Add visit funciton for ast.Name to record all uses of a variable.

        Use self.cache to store extra information about nodes.
    """

    def __init__(self):
        """
        Create a _TreeCutter instance.
        """

        self.symbols = {'vars': {}, 'functions': {}, 'classes': {}}
        self.cache = []

    def clear_cache(self):
        self.cache = []

    def block_position(self, node):
        """
        Helper function to get the start and end lines of an AST node.

        :param node: The node.

        :type node: ast.FunctionDef or ast.ClassDef or ast.Module
        """

        start_line, start_col = node.lineno, node.col_offset
        temp_node = node

        while 'body' in temp_node.__dict__:
            temp_node = temp_node.body[-1]

        end_line, end_col = temp_node.lineno, temp_node.col_offset

        if start_line == end_line:
            return [start_line, start_col, end_line, -1]

        return [start_line, start_col, end_line, end_col]

    def visit_Assign(self, node):
        """
        Visits Assign nodes in a tree.  Adds relevant data about them to symbols.

        :param node: The current node.

        :type node: ast.Assign

        .. todo::
            Add value and type metadata to symbols.
        """

        pos = self.block_position(node)

        for t in node.targets:
            self.visit(t)

        for name in self.cache:
            if not self.symbols['vars'].has_key(name):
                self.symbols['vars'][name] = {'assignments': [], 'uses': []}

            self.symbols['vars'][name]['assignments'].append(pos)

        self.clear_cache()
        self.visit(node.value)

        for name in self.cache:
            if not self.symbols['vars'].has_key(name):
                self.symbols['vars'][name] = {'assignments': [], 'uses': []}

            self.symbols['vars'][name]['uses'].append(pos)

        self.clear_cache()

    def visit_FunctionDef(self, node):
        """
        Visits FunctionDef nodes in a tree.  Adds relevant data about them to symbols.

        :param node: The current node.

        :type node: ast.FunctionDef

        .. todo::
            Add arguments and decorators metadata to symbols.
        """

        pos = self.block_position(node)

        if not self.symbols['functions'].has_key(node.name):
            self.symbols['functions'][node.name] = {'assignments': [], 'uses': []}

        self.symbols['functions'][node.name]['assignments'].append(pos)

        self.generic_visit(node)

    def visit_Call(self, node):
        """
        Visits Function Call nodes in a tree.  Adds relevant data about them
            in the functions section for symbols.

        :param node: The current node.

        :type node: ast.Call

        .. todo::
            Add arguments and decorators metadata to symbols.
        """

        pos = self.block_position(node)

        self.visit(node.func)
        if not self.cache:
            return
        name = self.cache.pop()

        if not self.symbols['functions'].has_key(name):
            self.symbols['functions'][name] = {'assignments': [], 'uses': []}

        self.symbols['functions'][name]['uses'].append(pos)

        for name in self.cache:
            if not self.symbols['vars'].has_key(name):
                self.symbols['vars'][name] = {'assignments': [], 'uses': []}

            self.symbols['vars'][name]['uses'].append(pos)

        self.clear_cache()

        for a in node.args:
            self.visit(a)

        for name in self.cache:
            if not self.symbols['vars'].has_key(name):
                self.symbols['vars'][name] = {'assignments': [], 'uses': []}

            self.symbols['vars'][name]['uses'].append(pos)

        self.clear_cache()

    def visit_ClassDef(self, node):
        """
        Visits ClassDef nodes in a tree.  Adds relevant data about them to symbols.

        :param node: The current node.

        :type node: ast.ClassDef

        .. todo::
            Add arguments, inherits, and decorators metadata to symbols.
        """

        pos = self.block_position(node)

        if node.name not in self.symbols['classes']:
            self.symbols['classes'][node.name] = {'assignments': [], 'uses': []}
        self.symbols['classes'][node.name]['assignments'].append(pos)

        self.generic_visit(node)

    def visit_Name(self, node):
        self.cache.append(node.id)

    def visit_Attribute(self, node):
        self.visit(node.value)
        self.cache.append(node.attr)

    def visit_Import(self, node):
        pos = self.block_position(node)
        # look through aliases

 def parse_py(codelet):
    """
    Adds 'symbols' field to the codelet after parsing the python code.

    :param codelet: The codelet object to parsed.

    :type code: Codelet
    """

    def strip_encoding(lines):
        """Strips the encoding line from a file, which breaks the parser."""
        it = iter(lines)
        try:
            first = next(it)
            if not encoding_re.match(first):
                yield first
            second = next(it)
            if not encoding_re.match(second):
                yield second
        except StopIteration:
            return
        for line in it:
            yield line

    try:
        tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines())))
    except SyntaxError:
        ## TODO: add some logging here?
        return {}

    walker = _TreeWalker()
    walker.visit(tree)
    return walker.symbols
--- a/bitshift/query/init.py
+++ b/bitshift/query/init.py
@@ -0,0 +1,320 @@
 """
 This subpackage contains code to parse search queries received from the
 frontend into trees that can be used by the database backend.
 """

 from __future__ import unicode_literals
 from re import IGNORECASE, search
 from sys import maxsize

 from dateutil.parser import parse as parse_date

 from .nodes import (String, Regex, Text, Language, Author, Date, Symbol,
                    BinaryOp, UnaryOp)
 from .tree import Tree
 from ..languages import LANGS

 __all__ = ["QueryParseException", "parse_query"]

 class QueryParseException(Exception):
    """Raised by parse_query() when a query is invalid."""
    pass


 class _QueryParser(object):
    """Wrapper class with methods to parse queries. Used as a singleton."""

    def __init__(self):
        self._prefixes = {
            self._parse_language: ["l", "lang", "language"],
            self._parse_author: ["a", "author"],
            self._parse_modified: ["m", "mod", "modified", "modify"],
            self._parse_created: ["cr", "create", "created"],
            self._parse_symbol: ["s", "sym", "symb", "symbol"],
            self._parse_function: ["f", "fn", "fun", "func", "function",
                                   "meth", "method"],
            self._parse_class: ["cl", "class", "clss"],
            self._parse_variable: ["v", "var", "variable"],
            self._parse_namespace: ["n", "ns", "namespace", "module"],
            self._parse_interface: ["in", "inter", "interface", "implements"],
            self._parse_import: ["im", "imp", "import", "include", "require",
                                 "imports", "requires"]
        }

    def _scan_query(self, query, markers):
        """Scan a query (sub)string for the first occurance of some markers.

        Returns a 2-tuple of (first_marker_found, marker_index).
        """
        def is_escaped(query, index):
            """Return whether a query marker is backslash-escaped."""
            return (index > 0 and query[index - 1] == "\\" and
                    (index < 2 or query[index - 2] != "\\"))

        best_marker, best_index = None, maxsize
        for marker in markers:
            index = query.find(marker)
            if is_escaped(query, index):
                _, new_index = self._scan_query(query[index + 1:], marker)
                index += new_index + 1
            if index >= 0 and index < best_index:
                best_marker, best_index = marker, index
        return best_marker, best_index

    def _split_query(self, query, markers, parens=False):
        """Split a query string into a nested list of query terms.

        Returns a list of terms and/or nested sublists of terms. Each term and
        sublist is guarenteed to be non-empty.
        """
        query = query.lstrip()
        if not query:
            return []
        marker, index = self._scan_query(query, markers)
        if not marker:
            return [query]
        nest = [query[:index]] if index > 0 else []
        after = query[index + 1:]

        if marker == " ":
            nest += self._split_query(after, markers, parens)
        elif marker in ('"', "'"):
            close_marker, close_index = self._scan_query(after, marker)
            if close_marker:
                if close_index > 0:
                    nest.append(after[:close_index])
                after = after[close_index + 1:]
                nest += self._split_query(after, markers, parens)
            elif after:
                nest.append(after)
        elif marker == "(":
            inner, after = self._split_query(after, markers, True), []
            if inner and isinstance(inner[-1], tuple):
                after = self._split_query(inner.pop()[0], markers, parens)
            if inner:
                nest.append(inner)
            if after:
                nest += after
        elif marker == ")":
            if parens:
                nest.append((after,))
            else:
                nest += self._split_query(after, markers)
        return nest

    def _parse_literal(self, literal):
        """Parse part of a search query into a string or regular expression."""
        if literal.startswith(("r:", "re:", "regex:", "regexp:")):
            arg = literal.split(":", 1)[1]
            if not arg:
                err = 'Incomplete query term: "%s"' % literal
                raise QueryParseException(err)
            return Regex(arg)
        return String(literal)

    def _parse_language(self, term):
        """Parse part of a query into a language node and return it."""
        term = self._parse_literal(term)
        if isinstance(term, Regex):
            langs = [i for i, lang in enumerate(LANGS)
                     if search(term.regex, lang, IGNORECASE)]
            if not langs:
                err = 'No languages found for regex: "%s"' % term.regex
                raise QueryParseException(err)
            node = Language(langs.pop())
            while langs:
                node = BinaryOp(Language(langs.pop()), BinaryOp.OR, node)
            return node

        needle = term.string.lower()
        for i, lang in enumerate(LANGS):
            if lang.lower() == needle:
                return Language(i)
        for i, lang in enumerate(LANGS):
            if lang.lower().startswith(needle):
                return Language(i)
        err = 'No languages found for string: "%s"' % term.string
        raise QueryParseException(err)

    def _parse_author(self, term):
        """Parse part of a query into an author node and return it."""
        return Author(self._parse_literal(term))

    def _parse_date(self, term, type_):
        """Parse part of a query into a date node and return it."""
        if ":" not in term:
            err = "A date relationship is required " \
                  '("before:<date>" or "after:<date>"): "%s"'
            raise QueryParseException(err % term)
        relstr, dtstr = term.split(":", 1)
        if relstr.lower() in ("before", "b"):
            relation = Date.BEFORE
        elif relstr.lower() in ("after", "a"):
            relation = Date.AFTER
        else:
            err = 'Bad date relationship (should be "before" or "after"): "%s"'
            raise QueryParseException(err % relstr)
        try:
            dt = parse_date(dtstr)
        except (TypeError, ValueError):
            raise QueryParseException('Bad date/time string: "%s"' % dtstr)
        return Date(type_, relation, dt)

    def _parse_modified(self, term):
        """Parse part of a query into a date modified node and return it."""
        return self._parse_date(term, Date.MODIFY)

    def _parse_created(self, term):
        """Parse part of a query into a date created node and return it."""
        return self._parse_date(term, Date.CREATE)

    def _parse_symbol(self, term, stype=Symbol.ALL):
        """Parse part of a query into a symbol node and return it."""
        defines = ("a:", "assign:", "assignment:", "d:", "def:", "definition:",
                   "decl:", "declare:", "declaration:")
        uses = ("u:", "use:", "c:", "call:")
        if term.startswith(defines) or term.startswith(uses):
            context = Symbol.DEFINE if term.startswith(defines) else Symbol.USE
            term_part = term.split(":", 1)[1]
            if not term_part:
                raise QueryParseException('Incomplete query term: "%s"' % term)
            term = term_part
        else:
            context = Symbol.ALL
        literal = self._parse_literal(term)
        if isinstance(literal, String):
            make_symbol = lambda lit: Symbol(context, stype, String(lit))
            symbols = self._split_query(literal.string, " \"'")
            node = make_symbol(symbols.pop())
            while symbols:
                node = BinaryOp(make_symbol(symbols.pop()), BinaryOp.OR, node)
            return node
        return Symbol(context, stype, literal)

    def _parse_function(self, term):
        """Parse part of a query into a function node and return it."""
        return self._parse_symbol(term, Symbol.FUNCTION)

    def _parse_class(self, term):
        """Parse part of a query into a class node and return it."""
        return self._parse_symbol(term, Symbol.CLASS)

    def _parse_variable(self, term):
        """Parse part of a query into a variable node and return it."""
        return self._parse_symbol(term, Symbol.VARIABLE)

    def _parse_namespace(self, term):
        """Parse part of a query into a namespace node and return it."""
        return self._parse_symbol(term, Symbol.NAMESPACE)

    def _parse_interface(self, term):
        """Parse part of a query into a interface node and return it."""
        return self._parse_symbol(term, Symbol.INTERFACE)

    def _parse_import(self, term):
        """Parse part of a query into a import node and return it."""
        return self._parse_symbol(term, Symbol.IMPORT)

    def _parse_term(self, term):
        """Parse a query term into a tree node and return it."""
        term = term.replace('\\"', '"').replace("\\\\", "\\")
        if ":" in term and not term[0] == ":":
            prefix, arg = term.split(":", 1)
            invert = prefix.lower() == "not"
            if invert:
                prefix, arg = arg.split(":", 1)
            if not arg:
                raise QueryParseException('Incomplete query term: "%s"' % term)
            for meth, prefixes in self._prefixes.iteritems():
                if prefix.lower() in prefixes:
                    if invert:
                        return UnaryOp(UnaryOp.NOT, meth(arg))
                    return meth(arg)
        return Text(self._parse_literal(term))

    def _parse_boolean_operators(self, nest):
        """Parse boolean operators in a nested query list."""
        op_lookup = {
            "and": BinaryOp.AND,
            "or": BinaryOp.OR,
            "not": UnaryOp.NOT
        }
        for i, term in enumerate(nest):
            if isinstance(term, list):
                self._parse_boolean_operators(term)
            else:
                nest[i] = op_lookup.get(term.lower(), term)

    def _parse_nest(self, nest):
        """Recursively parse a nested list of search query terms."""
        def parse_binary_op(op):
            """Parse a binary operator in a nested query list."""
            index = nest.index(op)
            if index == 0 or index == len(nest) - 1:
                err = "Invalid query: '%s' given without argument."
                raise QueryParseException(err % BinaryOp.OPS[op])
            left = self._parse_nest(nest[:index])
            right = self._parse_nest(nest[index + 1:])
            return BinaryOp(left, op, right)

        if not nest:
            err = "Error while parsing query: empty nest detected."
            raise QueryParseException(err)
        elif BinaryOp.OR in nest:
            return parse_binary_op(BinaryOp.OR)
        elif BinaryOp.AND in nest:
            return parse_binary_op(BinaryOp.AND)
        elif UnaryOp.NOT in nest:
            index = nest.index(UnaryOp.NOT)
            if index == len(nest) - 1:
                err = "Invalid query: '%s' given without argument."
                raise QueryParseException(err % UnaryOp.OPS[UnaryOp.NOT])
            right = UnaryOp(UnaryOp.NOT, self._parse_nest(nest[index + 1:]))
            if index > 0:
                left = self._parse_nest(nest[:index])
                return BinaryOp(left, BinaryOp.AND, right)
            return right
        elif len(nest) > 1:
            left, right = self._parse_term(nest[0]), self._parse_nest(nest[1:])
            return BinaryOp(left, BinaryOp.AND, right)
        elif isinstance(nest[0], list):
            return self._parse_nest(nest[0])
        else:
            return self._parse_term(nest[0])

    def _balance_tree(self, node):
        """Auto-balance a tree using a string sorting function."""
        if isinstance(node, BinaryOp):
            self._balance_tree(node.left)
            self._balance_tree(node.right)
            if node.right.sortkey() < node.left.sortkey():
                node.left, node.right = node.right, node.left
        elif isinstance(node, UnaryOp):
            self._balance_tree(node.node)

    def parse(self, query):
        """
        Parse a search query.

        The result is normalized with a sorting function so that
        ``"foo OR bar"`` and ``"bar OR foo"`` result in the same tree. This is
        important for caching purposes.

        :param query: The query be converted.
        :type query: str

        :return: A tree storing the data in the query.
        :rtype: :py:class:`~.query.tree.Tree`

        :raises: :py:class:`.QueryParseException`
        """
        nest = self._split_query(query.rstrip(), " \"'()")
        if not nest:
            raise QueryParseException('Empty query: "%s"' % query)
        self._parse_boolean_operators(nest)
        root = self._parse_nest(nest)
        self._balance_tree(root)
        return Tree(root)


 parse_query = _QueryParser().parse
--- a/bitshift/query/nodes.py
+++ b/bitshift/query/nodes.py
@@ -0,0 +1,297 @@
 from ..languages import LANGS

 __all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
           "BinaryOp", "UnaryOp"]

 class _Node(object):
    """Represents a single node in a query tree.

    Generally speaking, a node is a constraint applied to the database. Thus,
    a :py:class:`~.Language` node represents a constraint where only codelets
    of a specific language are selected.
    """

    def _null_regex(self, expr):
        """Implements a regex search with support for a null expression."""
        return "IF(ISNULL(%s), 0, %s REGEXP ?)" % (expr, expr)

    def sortkey(self):
        """Return a string sort key for the node."""
        return ""

    def parameterize(self, tables):
        """Parameterize the node.

        Returns a 4-tuple of (conditional string, parameter list, rank list,
        should-we-rank boolean). If the rank list is empty, then it is assumed
        to contain the conditional string.
        """
        return "", [], [], False


 class _Literal(object):
    """Represents a literal component of a search query, present at the leaves.

    A literal might be a string or a regular expression.
    """
    pass


 class String(_Literal):
    """Represents a string literal."""

    def __init__(self, string):
        """
        :type string: unicode
        """
        self.string = string

    def __repr__(self):
        return "String({0!r})".format(self.string)

    def sortkey(self):
        return self.string


 class Regex(_Literal):
    """Represents a regular expression literal."""

    def __init__(self, regex):
        """
        :type string: unicode
        """
        self.regex = regex

    def __repr__(self):
        return "Regex({0!r})".format(self.regex)

    def sortkey(self):
        return self.regex


 class Text(_Node):
    """Represents a text node.

    Searches in codelet names (full-text search), symbols (equality), and
    source code (full-text search).
    """

    def __init__(self, text):
        """
        :type text: :py:class:`._Literal`
        """
        self.text = text

    def __repr__(self):
        return "Text({0})".format(self.text)

    def sortkey(self):
        return self.text.sortkey()

    def parameterize(self, tables):
        tables |= {"code", "symbols"}
        if isinstance(self.text, Regex):
            ranks = ["(codelet_name REGEXP ?)", "(code_code REGEXP ?)",
                     self._null_regex("symbol_name")]
            text = self.text.regex
        else:
            ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))",
                     "(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
                     "(symbol_name <=> ?)"]
            text = self.text.string
        cond = "(" + " OR ".join(ranks) + ")"
        return cond, [text] * 3, ranks, True


 class Language(_Node):
    """Represents a language node.

    Searches in the code_lang field.
    """

    def __init__(self, lang):
        """
        :type lang: int
        """
        self.lang = lang

    def __repr__(self):
        return "Language({0})".format(LANGS[self.lang])

    def sortkey(self):
        return LANGS[self.lang]

    def parameterize(self, tables):
        tables |= {"code"}
        return "(code_lang <=> ?)", [self.lang], [], False


 class Author(_Node):
    """Represents a author node.

    Searches in the author_name field (full-text search).
    """

    def __init__(self, name):
        """
        :type name: :py:class:`_Literal`
        """
        self.name = name

    def __repr__(self):
        return "Author({0})".format(self.name)

    def sortkey(self):
        return self.name.sortkey()

    def parameterize(self, tables):
        tables |= {"authors"}
        if isinstance(self.name, Regex):
            cond = self._null_regex("author_name")
            return cond, [self.name.regex], [], False
        cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
        return cond, [self.name.string], [], True


 class Date(_Node):
    """Represents a date node.

    Searches in the codelet_date_created or codelet_date_modified fields.
    """
    CREATE = 1
    MODIFY = 2

    BEFORE = 1
    AFTER = 2

    def __init__(self, type_, relation, date):
        """
        :type type_: int (``CREATE`` or ``MODIFY``)
        :type relation: int (``BEFORE``, ``AFTER``)
        :type date: datetime.datetime
        """
        self.type = type_
        self.relation = relation
        self.date = date

    def __repr__(self):
        types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
        relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
        tm = "Date({0}, {1}, {2})"
        return tm.format(types[self.type], relations[self.relation], self.date)

    def sortkey(self):
        return self.date.strftime("%Y%m%d%H%M%S")

    def parameterize(self, tables):
        column = {self.CREATE: "codelet_date_created",
                  self.MODIFY: "codelet_date_modified"}[self.type]
        op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
        cond = "IF(ISNULL(%s), 0, %s %s ?)" % (column, column, op)
        return cond, [self.date], [], False


 class Symbol(_Node):
    """Represents a symbol node.

    Searches in symbol_type and symbol_name.
    """
    ALL = -1
    DEFINE = 0
    USE = 1

    FUNCTION = 0
    CLASS = 1
    VARIABLE = 2
    NAMESPACE = 3
    INTERFACE = 4
    IMPORT = 5
    TYPES = ["functions", "classes", "vars", "namespaces", "interfaces",
             "imports"]
    TYPE_REPR = ["FUNCTION", "CLASS", "VARIABLE", "NAMESPACE", "INTERFACE",
                 "IMPORT"]

    def __init__(self, context, type_, name):
        """
        :type context: int (``DEFINE`` or ``USE``)
        :type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
        :type name: :py:class:`._Literal`
        """
        self.context = context
        self.type = type_
        self.name = name

    def __repr__(self):
        context = ["DEFINE", "USE", "ALL"][self.context]
        type_ = self.TYPE_REPR[self.type] if self.type >= 0 else "ALL"
        return "Symbol({0}, {1}, {2})".format(context, type_, self.name)

    def sortkey(self):
        return self.name.sortkey()

    def parameterize(self, tables):
        tables |= {"code", "symbols"}
        if isinstance(self.name, Regex):
            cond, name = self._null_regex("symbol_name"), self.name.regex
        else:
            cond, name = "symbol_name <=> ?", self.name.string
            if self.type == self.ALL:
                types = ", ".join(str(typ) for typ in xrange(len(self.TYPES)))
                part = " AND IF(ISNULL(symbol_type), 0, symbol_type IN (%s))"
                cond += part % types
        if self.type != self.ALL:
            cond += " AND symbol_type <=> %d" % self.type
        if self.context != self.ALL:
            tables |= {"symbol_locations"}
            cond += " AND sloc_type <=> %d" % self.context
        return "(" + cond + ")", [name], [], False


 class BinaryOp(_Node):
    """Represents a relationship between two nodes: ``and``, ``or``."""
    AND = object()
    OR = object()
    OPS = {AND: "AND", OR: "OR"}

    def __init__(self, left, op, right):
        self.left = left
        self.op = op
        self.right = right

    def __repr__(self):
        tmpl = "BinaryOp({0}, {1}, {2})"
        return tmpl.format(self.left, self.OPS[self.op], self.right)

    def sortkey(self):
        return self.left.sortkey() + self.right.sortkey()

    def parameterize(self, tables):
        lcond, largs, lranks, need_lranks = self.left.parameterize(tables)
        rcond, rargs, rranks, need_rranks = self.right.parameterize(tables)
        lranks, rranks = lranks or [lcond], rranks or [rcond]
        op = self.OPS[self.op]
        cond = "(" + lcond  + " " + op + " " + rcond + ")"
        need_ranks = need_lranks or need_rranks or self.op == self.OR
        return cond, largs + rargs, lranks + rranks, need_ranks


 class UnaryOp(_Node):
    """Represents a transformation applied to one node: ``not``."""
    NOT = object()
    OPS = {NOT: "NOT"}

    def __init__(self, op, node):
        self.op = op
        self.node = node

    def __repr__(self):
        return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)

    def sortkey(self):
        return self.node.sortkey()

    def parameterize(self, tables):
        cond, args, ranks, need_ranks = self.node.parameterize(tables)
        new_cond = "(" + self.OPS[self.op] + " " + cond + ")"
        ranks = ranks or [cond]
        return new_cond, args, ranks, need_ranks
--- a/bitshift/query/tree.py
+++ b/bitshift/query/tree.py
@@ -0,0 +1,84 @@
 from . import nodes

 __all__ = ["Tree"]

 QUERY_TEMPLATE = """SELECT codelet_id, MAX(codelet_rank%s) AS score
 FROM codelets %s
 WHERE %s
 GROUP BY codelet_id
 ORDER BY score DESC
 LIMIT %d OFFSET %d""".replace("\n", " ")

 class Tree(object):
    """Represents a query tree."""

    def __init__(self, root):
        self._root = root

    def __repr__(self):
        return "Tree({0})".format(self._root)

    @property
    def root(self):
        """The root node of the tree."""
        return self._root

    def sortkey(self):
        """Return a string sort key for the query tree."""
        return self._root.sortkey()

    def serialize(self):
        """Create a string representation of the query for caching.

        :return: Query string representation.
        :rtype: str
        """
        return repr(self)

    def walk(self, node_type=None):
        """Walk through the query tree, returning nodes of a specific type."""
        pending = [self._root]
        while pending:
            node = pending.pop()
            if not node_type or isinstance(node, node_type):
                yield node
            if isinstance(node, nodes.UnaryOp):
                pending.append(node.node)
            elif isinstance(node, nodes.BinaryOp):
                pending.extend([node.left, node.right])

    def build_query(self, page=1, page_size=10):
        """Convert the query tree into a parameterized SQL SELECT statement.

        :param page: The page number to get results for.
        :type page: int
        :param page_size: The number of results per page.
        :type page_size: int

        :return: SQL query data.
        :rtype: 2-tuple of (SQL statement string, query parameter tuple)
        """
        def get_table_joins(tables):
            joins = [
                ("INNER", "code", "codelet_code_id", "code_id"),
                ("LEFT", "authors", "author_codelet", "codelet_id"),
                ("LEFT", "symbols", "symbol_code", "code_id"),
                ("LEFT", "symbol_locations", "sloc_symbol", "symbol_id")
            ]
            tmpl = "%s JOIN %s ON %s = %s"
            for args in joins:
                if args[1] in tables:
                    yield tmpl % args

        tables = set()
        cond, arglist, ranks, need_ranks = self._root.parameterize(tables)
        ranks = ranks or [cond]
        if need_ranks:
            score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks))
        else:
            score = ""
        joins = " ".join(get_table_joins(tables))
        offset = (page - 1) * page_size

        query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset)
        return query, tuple(arglist * 2 if need_ranks else arglist)
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -0,0 +1,177 @@
 # Makefile for Sphinx documentation
 #

 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = build

 # User-friendly check for sphinx-build
 ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 endif

 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 # the i18n builder cannot share the environment and doctrees with the others
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source

 .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext

 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
 	@echo "  dirhtml    to make HTML files named index.html in directories"
 	@echo "  singlehtml to make a single large HTML file"
 	@echo "  pickle     to make pickle files"
 	@echo "  json       to make JSON files"
 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 	@echo "  qthelp     to make HTML files and a qthelp project"
 	@echo "  devhelp    to make HTML files and a Devhelp project"
 	@echo "  epub       to make an epub"
 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 	@echo "  text       to make text files"
 	@echo "  man        to make manual pages"
 	@echo "  texinfo    to make Texinfo files"
 	@echo "  info       to make Texinfo files and run them through makeinfo"
 	@echo "  gettext    to make PO message catalogs"
 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 	@echo "  xml        to make Docutils-native XML files"
 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 	@echo "  linkcheck  to check all external links for integrity"
 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"

 clean:
 	rm -rf $(BUILDDIR)/*

 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

 singlehtml:
 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 	@echo
 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

 pickle:
 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 	@echo
 	@echo "Build finished; now you can process the pickle files."

 json:
 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 	@echo
 	@echo "Build finished; now you can process the JSON files."

 htmlhelp:
 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 	@echo
 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 	      ".hhp project file in $(BUILDDIR)/htmlhelp."

 qthelp:
 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 	@echo
 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bitshift.qhcp"
 	@echo "To view the help file:"
 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bitshift.qhc"

 devhelp:
 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 	@echo
 	@echo "Build finished."
 	@echo "To view the help file:"
 	@echo "# mkdir -p $$HOME/.local/share/devhelp/bitshift"
 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bitshift"
 	@echo "# devhelp"

 epub:
 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 	@echo
 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."

 latex:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo
 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
 	      "(use \`make latexpdf' here to do that automatically)."

 latexpdf:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo "Running LaTeX files through pdflatex..."
 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

 latexpdfja:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo "Running LaTeX files through platex and dvipdfmx..."
 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

 text:
 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
 	@echo
 	@echo "Build finished. The text files are in $(BUILDDIR)/text."

 man:
 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
 	@echo
 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

 texinfo:
 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
 	@echo
 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
 	@echo "Run \`make' in that directory to run these through makeinfo" \
 	      "(use \`make info' here to do that automatically)."

 info:
 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
 	@echo "Running Texinfo files through makeinfo..."
 	make -C $(BUILDDIR)/texinfo info
 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

 gettext:
 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
 	@echo
 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

 changes:
 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
 	@echo
 	@echo "The overview file is in $(BUILDDIR)/changes."

 linkcheck:
 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
 	@echo
 	@echo "Link check complete; look for any errors in the above output " \
 	      "or in $(BUILDDIR)/linkcheck/output.txt."

 doctest:
 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
 	@echo "Testing of doctests in the sources finished, look at the " \
 	      "results in $(BUILDDIR)/doctest/output.txt."

 xml:
 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
 	@echo
 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."

 pseudoxml:
 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
 	@echo
 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
--- a/docs/source/api/bitshift.crawler.rst
+++ b/docs/source/api/bitshift.crawler.rst
@@ -0,0 +1,27 @@
 crawler Package
 ===============

 :mod:`crawler` Package
 ----------------------

 .. automodule:: bitshift.crawler
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`crawler` Module
 ---------------------

 .. automodule:: bitshift.crawler.crawler
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`indexer` Module
 ---------------------

 .. automodule:: bitshift.crawler.indexer
    :members:
    :undoc-members:
    :show-inheritance:

--- a/docs/source/api/bitshift.database.rst
+++ b/docs/source/api/bitshift.database.rst
@@ -0,0 +1,19 @@
 database Package
 ================

 :mod:`database` Package
 -----------------------

 .. automodule:: bitshift.database
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`migration` Module
 -----------------------

 .. automodule:: bitshift.database.migration
    :members:
    :undoc-members:
    :show-inheritance:

--- a/docs/source/api/bitshift.query.rst
+++ b/docs/source/api/bitshift.query.rst
@@ -0,0 +1,11 @@
 query Package
 =============

 :mod:`query` Package
 --------------------

 .. automodule:: bitshift.query
    :members:
    :undoc-members:
    :show-inheritance:

--- a/docs/source/api/bitshift.rst
+++ b/docs/source/api/bitshift.rst
@@ -0,0 +1,45 @@
 bitshift Package
 ================

 :mod:`bitshift` Package
 -----------------------

 .. automodule:: bitshift.__init__
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`assets` Module
 --------------------

 .. automodule:: bitshift.assets
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`codelet` Module
 ---------------------

 .. automodule:: bitshift.codelet
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`config` Module
 --------------------

 .. automodule:: bitshift.config
    :members:
    :undoc-members:
    :show-inheritance:

 Subpackages
 -----------

 .. toctree::

    bitshift.crawler
    bitshift.database
    bitshift.parser
    bitshift.query

--- a/docs/source/api/modules.rst
+++ b/docs/source/api/modules.rst
@@ -0,0 +1,7 @@
 bitshift
 ========

 .. toctree::
   :maxdepth: 4

   bitshift
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -0,0 +1,268 @@
 # -*- coding: utf-8 -*-
 #
 # bitshift documentation build configuration file, created by
 # sphinx-quickstart on Mon Apr  7 21:09:45 2014.
 #
 # This file is execfile()d with the current directory set to its
 # containing dir.
 #
 # Note that not all possible configuration values are present in this
 # autogenerated file.
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.

 import sys
 import os

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('../..'))

 # -- General configuration ------------------------------------------------

 # If your documentation needs a minimal Sphinx version, state it here.
 #needs_sphinx = '1.0'

 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.intersphinx',
    'sphinx.ext.coverage',
    'sphinx.ext.mathjax',
    'sphinx.ext.viewcode',
 ]

 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

 # The suffix of source filenames.
 source_suffix = '.rst'

 # The encoding of source files.
 #source_encoding = 'utf-8-sig'

 # The master toctree document.
 master_doc = 'index'

 # General information about the project.
 project = u'bitshift'
 copyright = u'2014, Benjamin Attal, Ben Kurtovic, Severyn Kozak'

 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
 version = '0.1'
 # The full version, including alpha/beta/rc tags.
 release = '0.1.dev'

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #language = None

 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
 #today = ''
 # Else, today_fmt is used as the format for a strftime call.
 #today_fmt = '%B %d, %Y'

 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 exclude_patterns = []

 # The reST default role (used for this markup: `text`) to use for all
 # documents.
 #default_role = None

 # If true, '()' will be appended to :func: etc. cross-reference text.
 #add_function_parentheses = True

 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
 #add_module_names = True

 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
 #show_authors = False

 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'

 # A list of ignored prefixes for module index sorting.
 #modindex_common_prefix = []

 # If true, keep warnings as "system message" paragraphs in the built documents.
 #keep_warnings = False


 # -- Options for HTML output ----------------------------------------------

 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 html_theme = 'nature'

 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #html_theme_options = {}

 # Add any paths that contain custom themes here, relative to this directory.
 #html_theme_path = []

 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
 #html_title = None

 # A shorter title for the navigation bar.  Default is the same as html_title.
 #html_short_title = None

 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #html_logo = None

 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
 #html_favicon = None

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']

 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
 #html_extra_path = []

 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
 #html_last_updated_fmt = '%b %d, %Y'

 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
 #html_use_smartypants = True

 # Custom sidebar templates, maps document names to template names.
 #html_sidebars = {}

 # Additional templates that should be rendered to pages, maps page names to
 # template names.
 #html_additional_pages = {}

 # If false, no module index is generated.
 #html_domain_indices = True

 # If false, no index is generated.
 #html_use_index = True

 # If true, the index is split into individual pages for each letter.
 #html_split_index = False

 # If true, links to the reST sources are added to the pages.
 #html_show_sourcelink = True

 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #html_show_sphinx = True

 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
 #html_show_copyright = True

 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
 #html_use_opensearch = ''

 # This is the file name suffix for HTML files (e.g. ".xhtml").
 #html_file_suffix = None

 # Output file base name for HTML help builder.
 htmlhelp_basename = 'bitshiftdoc'


 # -- Options for LaTeX output ---------------------------------------------

 latex_elements = {
 # The paper size ('letterpaper' or 'a4paper').
 #'papersize': 'letterpaper',

 # The font size ('10pt', '11pt' or '12pt').
 #'pointsize': '10pt',

 # Additional stuff for the LaTeX preamble.
 #'preamble': '',
 }

 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
  ('index', 'bitshift.tex', u'bitshift Documentation',
   u'Benjamin Attal, Ben Kurtovic, Severyn Kozak', 'manual'),
 ]

 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
 #latex_logo = None

 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
 #latex_use_parts = False

 # If true, show page references after internal links.
 #latex_show_pagerefs = False

 # If true, show URL addresses after external links.
 #latex_show_urls = False

 # Documents to append as an appendix to all manuals.
 #latex_appendices = []

 # If false, no module index is generated.
 #latex_domain_indices = True


 # -- Options for manual page output ---------------------------------------

 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
    ('index', 'bitshift', u'bitshift Documentation',
     [u'Benjamin Attal, Ben Kurtovic, Severyn Kozak'], 1)
 ]

 # If true, show URL addresses after external links.
 #man_show_urls = False


 # -- Options for Texinfo output -------------------------------------------

 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
  ('index', 'bitshift', u'bitshift Documentation',
   u'Benjamin Attal, Ben Kurtovic, Severyn Kozak', 'bitshift', 'One line description of project.',
   'Miscellaneous'),
 ]

 # Documents to append as an appendix to all manuals.
 #texinfo_appendices = []

 # If false, no module index is generated.
 #texinfo_domain_indices = True

 # How to display URL addresses: 'footnote', 'no', or 'inline'.
 #texinfo_show_urls = 'footnote'

 # If true, do not generate a @detailmenu in the "Top" node's menu.
 #texinfo_no_detailmenu = False


 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'http://docs.python.org/': None}
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -0,0 +1,20 @@
 bitshift
 ========

 **bitshift** is a semantic search engine for source code.

 Contents:

 .. toctree::
   :maxdepth: 2

   API Reference <api/modules>


 Indices and tables
 ==================

 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`

--- a/gunicorn.cfg
+++ b/gunicorn.cfg
@@ -0,0 +1,9 @@
 # Configuration file for Gunicorn
 # http://docs.gunicorn.org/en/latest/configure.html

 bind = ["unix:/tmp/gunicorn.sock"]
 workers = 4

 accesslog = "logs/access.log"
 errorlog = "logs/error.log"
 loglevel = "info"
--- a/parsers/java/pom.xml
+++ b/parsers/java/pom.xml
@@ -0,0 +1,72 @@
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
        <modelVersion>4.0.0</modelVersion>

        <groupId>com.bitshift.parsing</groupId>
        <artifactId>parsing</artifactId>
        <packaging>jar</packaging>
        <version>1.0-SNAPSHOT</version>
        <name>parsing</name>
        <url>http://maven.apache.org</url>

        <dependencies>
            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>4.11</version>
            </dependency>
            <dependency>
                <groupId>org.eclipse.jdt</groupId>
                <artifactId>org.eclipse.jdt.core</artifactId>
                <version>3.7.1</version>
            </dependency>
            <dependency>
                <groupId>com.google.guava</groupId>
                <artifactId>guava</artifactId>
                <version>17.0</version>
            </dependency>
        </dependencies>

        <build>
            <plugins>
                <plugin>
                    <groupId>org.codehaus.mojo</groupId>
                    <artifactId>exec-maven-plugin</artifactId>
                    <version>1.2.1</version>
                    <configuration>
                        <mainClass>com.bitshift.parsing.Parse</mainClass>
                        <arguments>
                        </arguments>
                    </configuration>
                </plugin>
                <plugin>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <version>2.4</version>
                    <executions>
                        <execution>
                            <id>make-assembly</id>
                            <phase>package</phase>
                            <goals>
                                <goal>single</goal>
                            </goals>
                            <configuration>
                                <archive>
                                    <manifest>
                                        <addClasspath>true</addClasspath>
                                        <mainClass>com.bitshift.parsing.Parse</mainClass>
                                    </manifest>
                                </archive>
                                <descriptorRefs>
                                    <descriptorRef>jar-with-dependencies</descriptorRef>
                                </descriptorRefs>
                                <outputDirectory>${project.basedir}</outputDirectory>
                                <finalName>${project.artifactId}</finalName>
                                <appendAssemblyId>false</appendAssemblyId>
                            </configuration>
                        </execution>
                    </executions>
                </plugin>
        </plugins>
    </build>

 </project>
--- a/parsers/java/src/main/java/com/bitshift/parsing/Parse.java
+++ b/parsers/java/src/main/java/com/bitshift/parsing/Parse.java
@@ -0,0 +1,35 @@
 package com.bitshift.parsing;

 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;

 import com.bitshift.parsing.parsers.JavaParser;

 public class Parse {

    public static void main(String[] args) {
        try {
            BufferedReader br = new BufferedReader(
                new InputStreamReader(System.in));

            String str = "";
            StringBuilder source = new StringBuilder();
            while ((str = br.readLine()) != null) {
                source.append(str + "\n");
            }

            String symbols = (new JavaParser(source.toString())).parse();
            BufferedWriter bw = new BufferedWriter(
                new OutputStreamWriter(System.out));

            bw.write(symbols);
            bw.flush();
        } catch (IOException e) {

        }
    }

 }
--- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java
+++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java
@@ -0,0 +1,214 @@
 package com.bitshift.parsing.parsers;

 import java.util.HashMap;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.Stack;
 import java.util.Arrays;

 import com.google.common.base.Joiner;

 import org.eclipse.jdt.core.JavaCore;
 import org.eclipse.jdt.core.dom.AST;
 import org.eclipse.jdt.core.dom.ASTNode;
 import org.eclipse.jdt.core.dom.ASTParser;
 import org.eclipse.jdt.core.dom.ASTVisitor;
 import org.eclipse.jdt.core.dom.CompilationUnit;
 import org.eclipse.jdt.core.dom.ClassInstanceCreation;
 import org.eclipse.jdt.core.dom.ImportDeclaration;
 import org.eclipse.jdt.core.dom.MethodDeclaration;
 import org.eclipse.jdt.core.dom.MethodInvocation;
 import org.eclipse.jdt.core.dom.Name;
 import org.eclipse.jdt.core.dom.PackageDeclaration;
 import org.eclipse.jdt.core.dom.QualifiedName;
 import org.eclipse.jdt.core.dom.SimpleName;
 import org.eclipse.jdt.core.dom.Statement;
 import org.eclipse.jdt.core.dom.TypeDeclaration;
 import org.eclipse.jdt.core.dom.VariableDeclarationFragment;

 import com.bitshift.parsing.symbols.Symbols;
 import com.bitshift.parsing.symbols.JavaSymbols;

 /*TODO: Work on parsing partial java code.*/
 public class JavaParser {
    private String source;

    public JavaParser(String source) {
        this.source = source;
    }

    private Symbols genSymbols() {
        ASTParser parser = ASTParser.newParser(AST.JLS3);
        parser.setSource(this.source.toCharArray());

        Map options = JavaCore.getOptions();
        parser.setCompilerOptions(options);

        CompilationUnit root = (CompilationUnit) parser.createAST(null);

        NodeVisitor visitor = new NodeVisitor(root);
        root.accept(visitor);

        return visitor.symbols;
    }

    public String parse() {
        JavaSymbols symbols = (JavaSymbols) this.genSymbols();
        return symbols.toString();
    }

    class NodeVisitor extends ASTVisitor {

        protected CompilationUnit root;
        protected JavaSymbols symbols;
        private Stack<HashMap<String, Object>> _cache;

        public NodeVisitor(CompilationUnit root) {
            this.root = root;
            this.symbols = new JavaSymbols();
            this._cache = new Stack<HashMap<String, Object>>();
        }

        public ArrayList<Integer> blockPosition(ASTNode node) {
            int sl = this.root.getLineNumber(node.getStartPosition());
            int sc = this.root.getColumnNumber(node.getStartPosition()) + 1;
            int el = this.root.getLineNumber(node.getStartPosition()
                    + node.getLength() - 1);
            int ec = this.root.getColumnNumber(node.getStartPosition()
                    + node.getLength() - 1) + 1;

            return Symbols.createCoord(sl, sc, el, ec);
        }

        public boolean visit(MethodDeclaration node) {
            HashMap<String, Object> data = new HashMap<String, Object>();
            Name nameObj = node.getName();
            String name = nameObj.isQualifiedName() ?
                ((QualifiedName) nameObj).getFullyQualifiedName() :
                ((SimpleName) nameObj).getIdentifier();

            data.put("coord", this.blockPosition(node));
            data.put("name", name);
            this._cache.push(data);
            return true;
        }

        public void endVisit(MethodDeclaration node) {
            HashMap<String, Object> data = this._cache.pop();
            String name = (String)data.remove("name");
            this.symbols.insertMethodDeclaration("\"" + name + "\"", data);
        }

        public boolean visit(MethodInvocation node) {
            HashMap<String, Object> data = new HashMap<String, Object>();
            Name nameObj = node.getName();
            String name = nameObj.isQualifiedName() ?
                ((QualifiedName) nameObj).getFullyQualifiedName() :
                ((SimpleName) nameObj).getIdentifier();

            data.put("coord", this.blockPosition(node));
            data.put("name", name);
            this._cache.push(data);
            return true;
        }

        public void endVisit(MethodInvocation node) {
            HashMap<String, Object> data = this._cache.pop();
            String name = (String)data.remove("name");
            this.symbols.insertMethodInvocation("\"" + name + "\"", data);
        }

        public boolean visit(PackageDeclaration node) {
            HashMap<String, Object> data = new HashMap<String, Object>();
            this._cache.push(data);
            return true;
        }

        public void endVisit(PackageDeclaration node) {
            HashMap<String, Object> data = this._cache.pop();
            String name = (String)data.remove("name");
            this.symbols.setPackage(name);
        }

        public boolean visit(TypeDeclaration node) {
            HashMap<String, Object> data = new HashMap<String, Object>();

            data.put("coord", this.blockPosition(node));
            this._cache.push(data);
            return true;
        }

        public void endVisit(TypeDeclaration node) {
            HashMap<String, Object> data = this._cache.pop();
            String name = (String)data.remove("name");

            if (node.isInterface()) {
                this.symbols.insertInterfaceDeclaration("\"" + name + "\"", data);
            } else {
                this.symbols.insertClassDeclaration("\"" + name + "\"", data);
            }
        }

        public boolean visit(VariableDeclarationFragment node) {
            HashMap<String, Object> data = new HashMap<String, Object>();

            data.put("coord", this.blockPosition(node));
            this._cache.push(data);
            return true;
        }

        public void endVisit(VariableDeclarationFragment node) {
            HashMap<String, Object> data = this._cache.pop();
            String name = (String)data.remove("name");
            this.symbols.insertVariableDeclaration("\"" + name + "\"", data);
        }

        public boolean visit(QualifiedName node) {
            if (!this._cache.empty()) {
                HashMap<String, Object> data = this._cache.pop();

                if(!data.containsKey("name")) {
                    String name = node.getFullyQualifiedName();
                    data.put("name", name);
                }

                this._cache.push(data);
            }
            return true;
        }

        public boolean visit(SimpleName node) {
            if (!this._cache.empty()) {
                HashMap<String, Object> data = this._cache.pop();

                if(!data.containsKey("name")) {
                    String name = node.getIdentifier();
                    data.put("name", name);
                }

                this._cache.push(data);
            }
            return true;
        }

        public boolean visit(ImportDeclaration node) {
            HashMap<String, Object> data = new HashMap<String, Object>();

            data.put("coord", this.blockPosition(node));
            this._cache.push(data);
            return true;
        }

        public void endVisit(ImportDeclaration node) {
            HashMap<String, Object> data = this._cache.pop();
            String name = (String)data.remove("name");
            String[] parts = name.split("\\.");

            for(int i = parts.length; i > 1; i--) {
                String pkg = Joiner.on(".").join(Arrays.copyOfRange(parts, 0, i));
                this.symbols.insertImportStatement("\"" + pkg + "\"", data);
            }
        }
    }
 }
--- a/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java
+++ b/parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java
@@ -0,0 +1,71 @@
 package com.bitshift.parsing.parsers;

 import java.util.Formatter;

 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.IOException;

 import java.nio.ByteBuffer;

 import java.net.Socket;

 import com.bitshift.parsing.symbols.Symbols;

 public abstract class Parser implements Runnable {

    protected Socket clientSocket;
    private String eos;

    public Parser(Socket clientSocket) {
        this.clientSocket = clientSocket;
    }

    protected String readFromClient() {
        String fromClient = "";

        try {
            BufferedReader clientReader = new BufferedReader(
                    new InputStreamReader(this.clientSocket.getInputStream()));

            int bytes = Integer.parseInt(clientReader.readLine());
            this.eos = clientReader.readLine();

            StringBuilder builder = new StringBuilder();
            int i = 0;

            while(i < bytes) {
                char aux = (char)clientReader.read();
                builder.append(aux);
                i++;
            }

            fromClient = builder.toString();

        } catch (IOException ex) {
        }

        return fromClient;
    }

    protected void writeToClient(String toClient) {
        try {
            BufferedWriter clientWriter = new BufferedWriter(
                    new OutputStreamWriter(this.clientSocket.getOutputStream()));

            clientWriter.write(toClient);
            clientWriter.write(eos);
            clientWriter.flush();
            this.clientSocket.close();
        } catch (IOException ex) {
        }
    }

    protected abstract Symbols genSymbols();

    public abstract void run();

 }

--- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java
+++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java
@@ -0,0 +1,177 @@
 package com.bitshift.parsing.symbols;

 import java.util.HashMap;
 import java.util.ArrayList;
 import com.bitshift.parsing.symbols.Symbols;

 /*TODO: Overwrite toString.*/
 public class JavaSymbols extends Symbols {

    private String _packageName;
    private HashMap<String, HashMap<String, Object>> _classes;
    private HashMap<String, HashMap<String, Object>> _interfaces;
    private HashMap<String, HashMap<String, Object>> _methods;
    private HashMap<String, HashMap<String, Object>> _vars;
    private HashMap<String, HashMap<String, Object>> _imports;

    private final String assignKey = "\"assignments\"";
    private final String useKey = "\"uses\"";

    public JavaSymbols() {
        _packageName = null;
        _classes = new HashMap<String, HashMap<String, Object>>();
        _interfaces = new HashMap<String, HashMap<String, Object>>();
        _methods = new HashMap<String, HashMap<String, Object>>();
        _vars = new HashMap<String, HashMap<String, Object>>();
        _imports = new HashMap<String, HashMap<String, Object>>();
    }

    public boolean setPackage(String name) {
        _packageName = name;
        return true;
    }

    public boolean insertClassDeclaration(String name, HashMap<String, Object> data) {
        ArrayList<Object> assignments = new ArrayList<Object>(10);
        ArrayList<Object> uses = new ArrayList<Object>(10);
        HashMap<String, Object> klass = new HashMap<String, Object>();

        assignments.add(data.get("coord"));
        klass.put(assignKey, assignments);
        klass.put(useKey, uses);

        this._classes.put(name, klass);
        return true;
    }

    public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) {
        ArrayList<Object> assignments = new ArrayList<Object>(10);
        ArrayList<Object> uses = new ArrayList<Object>(10);
        HashMap<String, Object> klass = new HashMap<String, Object>();

        assignments.add(data.get("coord"));
        klass.put(assignKey, assignments);
        klass.put(useKey, uses);

        this._interfaces.put(name, klass);
        return true;
    }

    public boolean insertMethodDeclaration(String name, HashMap<String, Object> data) {
        HashMap<String, Object> method = this._methods.get(name);
        if (method == null) {
            method = new HashMap<String, Object>();
            ArrayList<Object> assignments = new ArrayList<Object>(10);
            ArrayList<Object> uses = new ArrayList<Object>(10);

            assignments.add(data.get("coord"));
            method.put(assignKey, assignments);
            method.put(useKey, uses);
        } else {
            ArrayList<Object> assignments = (ArrayList<Object>)method.get(assignKey);

            assignments.add(data.get("coord"));
            method.put(assignKey, assignments);
        }

        this._methods.put(name, method);
        return true;
    }
    public boolean insertMethodInvocation(String name, HashMap<String, Object> data) {
        HashMap<String, Object> method = this._methods.get(name);
        if (method == null) {
            method = new HashMap<String, Object>();
            ArrayList<Object> assignments = new ArrayList<Object>(10);
            ArrayList<Object> uses = new ArrayList<Object>(10);

            uses.add(data.get("coord"));
            method.put(assignKey, assignments);
            method.put(useKey, uses);
        } else {
            ArrayList<Object> uses = (ArrayList<Object>)method.get(useKey);

            uses.add(data.get("coord"));
            method.put(useKey, uses);
        }

        this._methods.put(name, method);
        return true;
    }

    public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) {
        HashMap<String, Object> var = this._vars.get(name);
        if (var == null) {
            var = new HashMap<String, Object>();
            ArrayList<Object> assignments = new ArrayList<Object>(10);
            ArrayList<Object> uses = new ArrayList<Object>(10);

            assignments.add(data.get("coord"));
            var.put(assignKey, assignments);
            var.put(useKey, uses);
        } else {
            ArrayList<Object> assignments = (ArrayList<Object>)var.get(assignKey);

            assignments.add(data.get("coord"));
            var.put(assignKey, assignments);
        }

        this._vars.put(name, var);
        return true;
    }

    public boolean insertVariableAccess(String name, HashMap<String, Object> data) {
        HashMap<String, Object> var = this._vars.get(name);
        if (var == null) {
            var = new HashMap<String, Object>();
            ArrayList<Object> assignments = new ArrayList<Object>(10);
            ArrayList<Object> uses = new ArrayList<Object>(10);

            uses.add(data.get("coord"));
            var.put(assignKey, assignments);
            var.put(useKey, uses);
        } else {
            ArrayList<Object> uses = (ArrayList<Object>)var.get(useKey);

            uses.add(data.get("coord"));
            var.put(useKey, uses);
        }

        this._vars.put(name, var);
        return true;
    }

    public boolean insertImportStatement(String name, HashMap<String, Object> data) {
        HashMap<String, Object> lib = this._imports.get(name);
        if (lib == null) {
            lib = new HashMap<String, Object>();
            ArrayList<Object> assignments = new ArrayList<Object>(10);
            ArrayList<Object> uses = new ArrayList<Object>(10);

            uses.add(data.get("coord"));
            lib.put(assignKey, assignments);
            lib.put(useKey, uses);
        } else {
            ArrayList<Object> uses = (ArrayList<Object>)lib.get(useKey);

            uses.add(data.get("coord"));
            lib.put(useKey, uses);
        }

        this._imports.put(name, lib);
        return true;
    }

    public String toString() {
        StringBuilder builder = new StringBuilder();
        builder.append("\"classes\":" + this._classes + ",");
        builder.append("\"interfaces\":" + this._interfaces + ",");
        builder.append("\"functions\":" + this._methods + ",");
        builder.append("\"vars\":" + this._vars + ",");
        builder.append("\"imports\":" + this._imports + ",");

        String s = builder.toString().replaceAll("=", ":");
        s = s.substring(0, s.length() - 1);
        return "{" + s + "}";
    }
 }

--- a/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java
+++ b/parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java
@@ -0,0 +1,17 @@
 package com.bitshift.parsing.symbols;

 import java.util.ArrayList;

 public abstract class Symbols {

    public Symbols() {

    }

    public static ArrayList<Integer> createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) {
        ArrayList<Integer> coord = new ArrayList<Integer>(4);
        coord.add(startLine); coord.add(startCol); coord.add(endLine); coord.add(endCol);
        return coord;
    }

 }
--- a/parsers/ruby/Gemfile
+++ b/parsers/ruby/Gemfile
@@ -0,0 +1,4 @@
 source 'https://rubygems.org'

 gem 'ruby_parser'
 gem 'sexp_processor'
--- a/parsers/ruby/Rakefile
+++ b/parsers/ruby/Rakefile
@@ -0,0 +1,6 @@
 require 'pp'
 require File.expand_path('../lib/parser.rb', __FILE__)

 task :parse do |t|
    parse
 end
--- a/parsers/ruby/lib/parser.rb
+++ b/parsers/ruby/lib/parser.rb
@@ -0,0 +1,137 @@
 require 'ripper'

 def parse
    source = STDIN.read
    walker = TreeWalker.new(source)
    walker.parse
    puts walker.to_s
 end

 class TreeWalker < Ripper::SexpBuilder
    attr_accessor :symbols

    def initialize(source)
        ns_hash = Hash.new {
            |hash, key|
            hash[key] = {
                :assignments => [], :uses => []
            }
        }
        class_hash = ns_hash.clone
        function_hash = ns_hash.clone
        var_hash = ns_hash.clone

        @symbols = {
            :namespaces => ns_hash,
            :classes    => class_hash,
            :functions  => function_hash,
            :vars       => var_hash
        }

        super(source)
    end

    def block_position(node)
        last_node = node[0]
        while last_node.is_a? Array
            sp = last_node
            while not (last_el = last_node[last_node.count - 1]) or
                (last_el.is_a? Array and last_el[last_el.count - 1].nil?)
                last_node = last_node[0..last_node.count - 2]
            end
            last_node = last_el
        end

        last_node = node[0]
        while last_node.is_a? Array
            ep = last_node
            while not (last_el = last_node[last_node.count - 1]) or
                (last_el.is_a? Array and last_el[last_el.count - 1].nil?)
                last_node = last_node[0..last_node.count - 2]
            end
            last_node = last_el
        end

        if sp == ep
            return sp + [sp[0], -1]
        end
        return sp + ep
    end

    def on_module(*node)
        pos = block_position(node)
        name = node[0][1][1]
        symbols[:namespaces][name][:assignments] << pos
        return node
    end

    def on_class(*node)
        pos = block_position(node)
        name = node[0][1][1]
        symbols[:classes][name][:assignments] << pos
        return node
    end

    def on_def(*node)
        pos = block_position(node)
        name = node[0][1]
        symbols[:functions][name][:assignments] << pos
        return node
    end

    def on_call(*node)
        pos = block_position(node)
        name = node[node.count - 1][1]
        symbols[:functions][name][:uses] << pos
        return node
    end

    def on_vcall(*node)
        pos = block_position(node)
        name = node[0][1]
        symbols[:functions][name][:uses] << pos
        return node
    end

    def on_assign(*node)
        pos = block_position(node)
        return node if not node[0][0].is_a? Array
        name = node[0][0][1]
        symbols[:vars][name][:assignments] << pos
        return node
    end

    def on_var_field(*node)
        pos = block_position(node)
        name = node[0][1]
        symbols[:vars][name][:uses] << pos
        return node
    end

    def on_var_ref(*node)
        pos = block_position(node)
        name = node[0][1]
        symbols[:vars][name][:uses] << pos
        return node
    end

    def on_command(*node)
        # catch require statements
    end

    def to_s
        new_symbols = Hash.new {|hash, key| hash[key] = Hash.new}

        symbols.each do |type, sym_list|
            sym_list.each do |name, sym|
                new_symbols[type.to_s][name.to_s] = {
                    "assignments" => sym[:assignments],
                    "uses" => sym[:uses]}
            end
        end

        str = new_symbols.to_s
        str = str.gsub(/=>/, ":")
        return str
    end
 end
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,14 @@
 from setuptools import setup, find_packages

 setup(
    name = "bitshift",
    version = "0.1.dev",
    packages = find_packages(),
    install_requires = [
        "Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0",
        "GitPython>=0.3.2.RC1", "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1",
        "mmh3>=2.3", "PyYAML>=3.11", "python-dateutil>=2.2", "cchardet>=0.3.5"],
    author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
    license = "MIT",
    url = "https://github.com/earwig/bitshift"
 )
--- a/static/css/lib/github.css
+++ b/static/css/lib/github.css
@@ -0,0 +1,65 @@
 td.linenos { background: rgba(65,131,196,0.05); padding-right: 10px; border-right: 1px solid #bbb; }
 span.lineno { background: rgba(65,131,196,0.05); padding: 0 5px 0 5px; }
 pre { line-height: 125% }
 .highlighttable { background-color: #fff; padding-left: 10px; width: inherit; height: inherit; }
 .hll { display: block }
 .c { color: #999988; font-style: italic } /* Comment */
 .err { color: #a61717; background-color: #e3d2d2 } /* Error */
 .k { color: #000000; font-weight: bold } /* Keyword */
 .o { color: #000000; font-weight: bold } /* Operator */
 .cm { color: #999988; font-style: italic } /* Comment.Multiline */
 .cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */
 .c1 { color: #999988; font-style: italic } /* Comment.Single */
 .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
 .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
 .ge { color: #000000; font-style: italic } /* Generic.Emph */
 .gr { color: #aa0000 } /* Generic.Error */
 .gh { color: #999999 } /* Generic.Heading */
 .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
 .go { color: #888888 } /* Generic.Output */
 .gp { color: #555555 } /* Generic.Prompt */
 .gs { font-weight: bold } /* Generic.Strong */
 .gu { color: #aaaaaa } /* Generic.Subheading */
 .gt { color: #aa0000 } /* Generic.Traceback */
 .kc { color: #000000; font-weight: bold } /* Keyword.Constant */
 .kd { color: #000000; font-weight: bold } /* Keyword.Declaration */
 .kn { color: #000000; font-weight: bold } /* Keyword.Namespace */
 .kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */
 .kr { color: #000000; font-weight: bold } /* Keyword.Reserved */
 .kt { color: #445588; font-weight: bold } /* Keyword.Type */
 .m { color: #009999 } /* Literal.Number */
 .s { color: #d01040 } /* Literal.String */
 .na { color: #008080 } /* Name.Attribute */
 .nb { color: #0086B3 } /* Name.Builtin */
 .nc { color: #445588; font-weight: bold } /* Name.Class */
 .no { color: #008080 } /* Name.Constant */
 .nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */
 .ni { color: #800080 } /* Name.Entity */
 .ne { color: #990000; font-weight: bold } /* Name.Exception */
 .nf { color: #990000; font-weight: bold } /* Name.Function */
 .nl { color: #990000; font-weight: bold } /* Name.Label */
 .nn { color: #555555 } /* Name.Namespace */
 .nt { color: #000080 } /* Name.Tag */
 .nv { color: #008080 } /* Name.Variable */
 .ow { color: #000000; font-weight: bold } /* Operator.Word */
 .w { color: #bbbbbb } /* Text.Whitespace */
 .mf { color: #009999 } /* Literal.Number.Float */
 .mh { color: #009999 } /* Literal.Number.Hex */
 .mi { color: #009999 } /* Literal.Number.Integer */
 .mo { color: #009999 } /* Literal.Number.Oct */
 .sb { color: #d01040 } /* Literal.String.Backtick */
 .sc { color: #d01040 } /* Literal.String.Char */
 .sd { color: #d01040 } /* Literal.String.Doc */
 .s2 { color: #d01040 } /* Literal.String.Double */
 .se { color: #d01040 } /* Literal.String.Escape */
 .sh { color: #d01040 } /* Literal.String.Heredoc */
 .si { color: #d01040 } /* Literal.String.Interpol */
 .sx { color: #d01040 } /* Literal.String.Other */
 .sr { color: #009926 } /* Literal.String.Regex */
 .s1 { color: #d01040 } /* Literal.String.Single */
 .ss { color: #990073 } /* Literal.String.Symbol */
 .bp { color: #999999 } /* Name.Builtin.Pseudo */
 .vc { color: #008080 } /* Name.Variable.Class */
 .vg { color: #008080 } /* Name.Variable.Global */
 .vi { color: #008080 } /* Name.Variable.Instance */
 .il { color: #009999 } /* Literal.Number.Integer.Long */
--- a/static/css/lib/highlight.css
+++ b/static/css/lib/highlight.css
@@ -0,0 +1,64 @@
 td.linenos { background-color: #f0f0f0; padding-right: 10px; }
 span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
 pre { line-height: 125% }
 .highlighttable { background-color: #49483e; width: inherit; height: inherit; }
 .hll { display: block }
 { background: #272822; color: #f8f8f2 }
 .c { color: #75715e } /* Comment */
 .err { color: #960050; background-color: #1e0010 } /* Error */
 .k { color: #66d9ef } /* Keyword */
 .l { color: #ae81ff } /* Literal */
 .n { color: #f8f8f2 } /* Name */
 .o { color: #f92672 } /* Operator */
 .p { color: #f8f8f2 } /* Punctuation */
 .cm { color: #75715e } /* Comment.Multiline */
 .cp { color: #75715e } /* Comment.Preproc */
 .c1 { color: #75715e } /* Comment.Single */
 .cs { color: #75715e } /* Comment.Special */
 .ge { font-style: italic } /* Generic.Emph */
 .gs { font-weight: bold } /* Generic.Strong */
 .kc { color: #66d9ef } /* Keyword.Constant */
 .kd { color: #66d9ef } /* Keyword.Declaration */
 .kn { color: #f92672 } /* Keyword.Namespace */
 .kp { color: #66d9ef } /* Keyword.Pseudo */
 .kr { color: #66d9ef } /* Keyword.Reserved */
 .kt { color: #66d9ef } /* Keyword.Type */
 .ld { color: #e6db74 } /* Literal.Date */
 .m { color: #ae81ff } /* Literal.Number */
 .s { color: #e6db74 } /* Literal.String */
 .na { color: #a6e22e } /* Name.Attribute */
 .nb { color: #f8f8f2 } /* Name.Builtin */
 .nc { color: #a6e22e } /* Name.Class */
 .no { color: #66d9ef } /* Name.Constant */
 .nd { color: #a6e22e } /* Name.Decorator */
 .ni { color: #f8f8f2 } /* Name.Entity */
 .ne { color: #a6e22e } /* Name.Exception */
 .nf { color: #a6e22e } /* Name.Function */
 .nl { color: #f8f8f2 } /* Name.Label */
 .nn { color: #f8f8f2 } /* Name.Namespace */
 .nx { color: #a6e22e } /* Name.Other */
 .py { color: #f8f8f2 } /* Name.Property */
 .nt { color: #f92672 } /* Name.Tag */
 .nv { color: #f8f8f2 } /* Name.Variable */
 .ow { color: #f92672 } /* Operator.Word */
 .w { color: #f8f8f2 } /* Text.Whitespace */
 .mf { color: #ae81ff } /* Literal.Number.Float */
 .mh { color: #ae81ff } /* Literal.Number.Hex */
 .mi { color: #ae81ff } /* Literal.Number.Integer */
 .mo { color: #ae81ff } /* Literal.Number.Oct */
 .sb { color: #e6db74 } /* Literal.String.Backtick */
 .sc { color: #e6db74 } /* Literal.String.Char */
 .sd { color: #e6db74 } /* Literal.String.Doc */
 .s2 { color: #e6db74 } /* Literal.String.Double */
 .se { color: #ae81ff } /* Literal.String.Escape */
 .sh { color: #e6db74 } /* Literal.String.Heredoc */
 .si { color: #e6db74 } /* Literal.String.Interpol */
 .sx { color: #e6db74 } /* Literal.String.Other */
 .sr { color: #e6db74 } /* Literal.String.Regex */
 .s1 { color: #e6db74 } /* Literal.String.Single */
 .ss { color: #e6db74 } /* Literal.String.Symbol */
 .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
 .vc { color: #f8f8f2 } /* Name.Variable.Class */
 .vg { color: #f8f8f2 } /* Name.Variable.Global */
 .vi { color: #f8f8f2 } /* Name.Variable.Instance */
 .il { color: #ae81ff } /* Literal.Number.Integer.Long */
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/static/css/lib/jqueryui.custom.min.css
+++ b/static/css/lib/jqueryui.custom.min.css
--- a/static/css/main.css
+++ b/static/css/main.css
@@ -1,4 +0,0 @@
 /* Global project stylesheet.
 */
 p {
  font-size: 1.5em; }
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/static/google10335120a3066831.html
+++ b/static/google10335120a3066831.html
@@ -0,0 +1 @@
 google-site-verification: google10335120a3066831.html
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN
--- a/static/js/about.js
+++ b/static/js/about.js
@@ -0,0 +1,19 @@
 /*
 * @file Implements a parallax effect on the about page.
 */

 var lastVertPos = $(window).scrollTop();

 /*
 * Scroll `div#img-[1-4]` at a greater speed than the text, producing a
 * parallax effect.
 */
 $(window).scroll(function(e){
    var currVertPos = $(window).scrollTop();
    var delta = currVertPos - lastVertPos;
    $(".bg").each(function(){
        $(this).css("top", parseFloat($(this).css("top")) -
            delta * $(this).attr("speed") + "px");
    });
    lastVertPos = currVertPos;
 });
--- a/static/js/index.advanced-search-form.js
+++ b/static/js/index.advanced-search-form.js
@@ -0,0 +1,175 @@
 /*
 * @file Manages all advanced search form logic.
 */

 var searchGroups = $("div#search-groups");

 /*
 * Load all advanced search form libraries.
 */
 function loadInputFieldWidgets(){
    $(".search-group input#date-last-modified").datepicker();
    $(".search-group input#date-created").datepicker();
    $(".search-group input#autocomplete").autocomplete({
        source: function(request, response){
            var matcher = new RegExp(
                $.ui.autocomplete.escapeRegex(request.term), "i");
            response($.grep(AUTOCOMPLETE_LANGUAGES, function(item){
                return matcher.test(item);
            }));
        }
    });
 };
 loadInputFieldWidgets();

 /*
 * Set all advanced search form button callbacks.
 */
 (function setSearchFormCallbacks(){
    // Create a new search group, and update the `#sidebar` checklist.
    $("button#add-group").click(function(){
        $("div#sidebar input[type=checkbox]").prop("checked", false);

        searchGroups.children("#selected").removeAttr("id");
        var searchGroup = $("<div/>", {
            class : "search-group",
            id : "selected"
        });
        searchGroups.append(
            searchGroup.append(createSearchGroupInput("language", "languages")));
        loadInputFieldWidgets();
        $("div#sidebar input[type=checkbox]#language").prop("checked", true);

        searchGroups[0].scrollTop = searchGroups[0].scrollHeight;
    });

    // Remove the currently selected group if it's not the only one, and mark
    // one of its siblings as selected.
    $("button#remove-group").click(function(){
        var currentGroup = $("div.search-group#selected");

        if($("div.search-group").length == 1)
            return;
        else {
            var nextGroup = currentGroup.prev();
            if(nextGroup.size() == 0)
                nextGroup = currentGroup.next();
        }
        currentGroup.remove();
        nextGroup.click();
    });

    // Select a search group, and update the `#sidebar` checklist accordingly.
    $(document).on("click", "div.search-group", function(){
        searchGroups.children("#selected").removeAttr("id");
        $(this).attr("id", "selected");
        $("div#sidebar input[type=checkbox]").prop("checked", false);
        $(this).find("input[type=text]").each(function(){
            var checkBoxSelector = "div#sidebar input[type=checkbox]";
            $(checkBoxSelector + "#" + $(this).attr("class").split(" ")[0]).
                    prop("checked", true);
        })
    });

    // Toggle the presence of an input field.
    $("div#sidebar input[type=checkbox]").click(function(){
        var fieldId = $(this).prop("id");
        if($(this).is(":checked")){
            $("div.search-group#selected").append(
                    $.parseHTML(createSearchGroupInput(
                            fieldId, $(this).next("label").children("div").
                            text())));
            loadInputFieldWidgets();
            if(fieldId.slice(0, 4) == "date")
                $(".search-group#selected ." + fieldId).datepicker();
        }
        else {
            if($(".search-group#selected").children("div").length > 1)
                $(".search-group#selected #" + fieldId).remove()
            else
                $(this).prop("checked", true);
        }
        searchGroups[0].scrollTop = searchGroups[0].scrollHeight;
    });

    $("div#advanced-search button#submit").click(function(){
        $("div#advanced-search").hide();
        advancedSearchButton.removeClass("clicked");
        assembleQuery();
        populateResults();
    })

    var previousAdvancedQuery = "";
    var searchBar = $("form#search-bar input[name=query]");

    window.setInterval(function(){
        var currentQuery = assembleQuery();
        if(currentQuery != previousAdvancedQuery){
            previousAdvancedQuery = currentQuery;
            searchBar.val(assembleQuery());
        }
    }, 1e3 / 15);
 }());

 /*
 * Return an HTML string representing a new input field div in a search group.
 *
 * @param fieldId The id of the input field div, and its child elements.
 * @param name The name to display next to the input field.
 */
 function createSearchGroupInput(fieldId, name){
    var fieldHTML = [
        "<div id='" + fieldId + "'>",
            "<div class='name'>" + name + "</div>",
            "<input class='" + fieldId + "' name='" + fieldId + "'type='text'>",
            "<input type='checkbox' name='regex'>",
            "<span class='regex'>Regex</span>",
        "</div>"
    ]

    if(fieldId == "language")
        fieldHTML[2] = [
            "<input id='autocomplete' class='language'",
            "name='language' type='text'>"
        ].join(" ");

    return fieldHTML.join("");
 }

 /*
 * Create a query from advanced-search groups.
 */
 function assembleQuery(){
    var groups = searchGroups.children(".search-group");
    var groupQueries = [];

    for(var group = 0; group < groups.length; group++){
        var inputFields = groups[group].querySelectorAll("input[type=text]");
        var regexCheckbox = groups[group].querySelectorAll("input[name=regex]");
        var groupQuery = [];

        for(var field = 0; field < inputFields.length; field++)
            if(inputFields[field].value.length > 0)
                groupQuery.push(genFieldQueryString(
                        inputFields[field], regexCheckbox[field].checked));

        if(groupQuery.length > 0)
            groupQueries.push(groupQuery.join(" AND "));
    }

    return groupQueries.join(" OR ");
 }

 /*
 * Generate a processed query string for an input field's value.
 *
 * @param field (DOM element) An `input[type=text]` element.
 * @param hasRegex (boolean) Whether or not the field's value has regex.
 *
 * @return The processed query.
 */
 function genFieldQueryString(field, hasRegex){
    var terms = field.value.replace(/\\/g, "\\\\").replace(/\"/g, "\\\"");
    var query = field.getAttribute("name") + ":" + (hasRegex?"re:":"") + terms;
    return '"' + query + '"';
 }
--- a/static/js/index.js
+++ b/static/js/index.js
@@ -0,0 +1,447 @@
 /*
 * @file Manages all library initialization, jQuery callbacks, query entry
 *      callbacks, server querying, and results diplay for `index.html`.
 */

 var advancedSearchDiv = $("div#advanced-search");
 var advancedSearchButton = $("button#advanced-search");
 FINISH_TYPING_INTERVAL = 650;
 var searchBar = $("form#search-bar input[type='text']")[0];
 var resultsDiv = $("div#results")[0];

 var typingTimer, scrollTimer, lastValue;
 var searchResultsPage = 1;

 /*
 * Set all page callbacks.
 */
 (function setHomePageCallbabacks(){
    var results = $('#results').get(0);

    // Enable infinite scrolling down the results page.
    $(window).scroll(function(){
        if($(window).scrollTop() + $(window).height() == $(document).height() &&
                resultsDiv.querySelectorAll(".result").length > 0)
            loadMoreResults();

        clearTimeout(scrollTimer);
        if (!results.classList.contains('disable-hover'))
            results.classList.add('disable-hover')

        scrollTimer = setTimeout(function(){
            if (results.classList.contains('disable-hover'))
                results.classList.remove('disable-hover');
        }, 200);
    });

    // Toggle the advanced-search form's visibility.
    advancedSearchButton.click(function(){
        var searchField = $("div#search-field");
        if(!advancedSearchDiv.hasClass("visible")){
            searchField.addClass("partly-visible");
            advancedSearchDiv.fadeIn(500).addClass("visible");
            advancedSearchButton.addClass("clicked");
        }
        else {
            advancedSearchDiv.hide().removeClass("visible");
            advancedSearchButton.removeClass("clicked");
            if($("div#results .result").length == 0)
                searchField.removeClass("partly-visible");
            clearResults();
        }
    });

    // Enable capturing the `enter` key.
    $("form#search-bar").submit(function(event){
        event.preventDefault();
        return false;
    });
    searchBar.onkeyup = typingTimer;
 }());

 /*
 * Set keyboard shortcut mappings.
 */
 (function resultsHotkeys(){
    /*
     * If the currently viewed result is not the first, scroll to the previous
     * result.
     */
    var previousResult = function(){
        var currResult = $(".display-all");
        if(currResult.length) {
            currResult.removeClass("display-all");
            currResult = currResult.closest(".result").prev(".result");
        } else {
            currResult = $(document.querySelectorAll(".result")[0]);
        }

        currResult.addClass("display-all");
        currResult.each(function(){
            $('html,body').stop().animate({
                scrollTop: $(this).offset().top - (
                    $(window).height() - $(this).outerHeight(true)) / 2
            }, 140);
        });
    };

    /*
     * If the currently viewed result is not the last, scroll to the next
     * result.
     */
    var nextResult = function(){
        var currResult = $(".display-all");
        if(currResult.length) {
            currResult.removeClass("display-all");
            currResult = currResult.closest(".result").next(".result");
        } else {
            currResult = $(document.querySelectorAll(".result")[0]);
        }

        currResult.addClass('display-all');
        currResult.each(function(){
            $('html,body').stop().animate({
                scrollTop: $(this).offset().top - (
                    $(window).height() - $(this).outerHeight(true)) / 2
            }, 140);
        });
    };

    var displayHotkeyHelp = function(){
        var help = $("div#hotkey-help");
        if(help.hasClass("hidden"))
            help.fadeIn(420);
        else
            help.fadeOut(420);

        $("div#body").toggleClass("faded");
        help.toggleClass("hidden");
    }

    var hotkeyActions = {
        "k" : previousResult,
        "j" : nextResult,
        "h" : previousSymbolMatch,
        "l" : nextSymbolMatch,
        "?" : displayHotkeyHelp
    };

    $(window).keypress(function(key){
        for(var hotkey in hotkeyActions){
            var keyChar = String.fromCharCode(key.keyCode);
            if(keyChar == hotkey &&
                !($(key.target).is("textarea") || $(key.target).is("input")))
                hotkeyActions[keyChar]();
        }
    });
 }());

 // Enable infinite scrolling down the results page.
 $(window).scroll(function() {
    var searchField = $("div#search-field");
    if($(window).scrollTop() + $(window).height() == $(document).height() &&
        searchField.hasClass('partly-visible')){
        loadMoreResults();
    }
 });

 /*
 * Clear the existing timer and set a new one the the user types text into the
 * search bar.
 */
 function typingTimer(event){
    clearTimeout(typingTimer);

    var enterKeyCode = 13;
    if(event.keyCode != enterKeyCode){
        if(lastValue != searchBar.value)
            typingTimer = setTimeout(finishedTyping, FINISH_TYPING_INTERVAL);
    }
    else {
        event.preventDefault();
        finishedTyping();
        return false;
    }
 };

 /*
 * Callback which queries the server whenver the user stops typing.
 *
 * Whenever the user doesn't type for a `FINISH_TYPING_INTERVAL` after having
 * entered new text in the search bar, send the current query request to the
 * server.
 */
 function finishedTyping(){
    lastValue = searchBar.value;
    var searchField = $("div#search-field");

    clearResults();
    if(searchBar.value){
        searchField.addClass("partly-visible");
        populateResults();
    }
    else {
        searchField.removeClass("partly-visible");
        $("div#advanced-search").fadeOut(50);
        advancedSearchButton.removeClass("clicked");
        clearResults();
    }
 }

 /*
 * Removes any child elements of `div#results`.
 */
 function clearResults(){
    while(resultsDiv.firstChild)
        resultsDiv.removeChild(resultsDiv.firstChild);
 }

 /*
 * Create a result element based upon a codelet instance.
 *
 * @return {Element} The result element.
 */
 function createResult(codelet) {
    var maxAttributeLength = 20;

    //Level 1
    var newDiv = document.createElement("div"),
        table = document.createElement("table"),
        row = document.createElement("tr");
    //Level 2
    var displayInfo = document.createElement("div"),
        codeElt = document.createElement("td"),
        hiddenInfoContainer = document.createElement("td"),
        hiddenInfo = document.createElement("div"),
        cycle = document.createElement("div");
    //Level 3
    var title = document.createElement("span"),
        site = document.createElement("span"),
        nextMatch = document.createElement("a"),
        prevMatch = document.createElement("a"),
        dateModified = document.createElement("div"),
        language = document.createElement("div"),
        dateCreated = document.createElement("div"),
        authors = document.createElement("div");

    //Classes and ID's
    newDiv.classList.add('result');

    displayInfo.id = 'display-info';
    codeElt.id = 'code';
    hiddenInfo.id = 'hidden-info';
    cycle.id = 'cycle-matches'

    title.id = 'title';
    site.id = 'site';
    nextMatch.id = 'next-match';
    nextMatch.href = '#';
    prevMatch.id = 'prev-match';
    prevMatch.href = '#';
    dateModified.id = 'date-modified';
    language.id = 'language';
    dateCreated.id = 'date-created';
    authors.id = 'authors';

    //Add the bulk of the html
    title.innerHTML = ' &raquo; <a href="' + codelet.url + '">'
            + codelet.name + '</a>';
    site.innerHTML = '<a href="' + codelet.origin[1] + '">' +
            codelet.origin[0] +'</a>';
    nextMatch.innerHTML = 'next match';
    prevMatch.innerHTML = 'prev match';
    language.innerHTML = 'Language: <span>' + codelet.lang + '</span>';
    dateModified.innerHTML = 'Last modified: <span>' + codelet.modified +
            '</span>';
    // Needs to be changed from int to string on the server
    dateCreated.innerHTML = 'Created: <span>' +
            codelet.created.substring(0, maxAttributeLength) + '</span>';

    var authorsHtml = 'Authors: <span>';
    var currLength = 0;
    var authorsList = [];
    for(var auth = 0; auth < codelet.authors.length; auth++){
        currLength += codelet.authors[auth].length;
        if(6 < currLength){
            authorsList.push("...");
            break;
        }
        else
            authorsList.push('<a href=#>' + codelet.authors[auth] + '</a>');
    }
    authors.innerHTML = "Authors: <span>" + authorsList.join(" ") + "</span>";

    // Needs to be processed on the server
    codeElt.innerHTML = '<div id=tablecontainer>' + codelet.code + '</div>';

    //Event binding
    $(newDiv).on('mousemove', function(e) {
        var holdCondition = $('.disable-hover');

        if(holdCondition.length == 0) {
            $(this).siblings().removeClass('display-all');
            $(this).addClass('display-all');
        }
    });

    $(newDiv).on('mouseleave', function(e) {
        var holdCondition = $('.disable-hover');

        if(holdCondition.length == 0)
            $(this).removeClass('display-all');
    });

    $(nextMatch).click(function(e) {
        e.stopPropagation();
        e.preventDefault();
        nextSymbolMatch();
    });

    $(prevMatch).click(function(e) {
        e.stopPropagation();
        e.preventDefault();
        previousSymbolMatch();
    });

    //Finish and append elements to parent elements
    hiddenInfo.appendChild(dateCreated);
    hiddenInfo.appendChild(dateModified);
    hiddenInfo.appendChild(language);
    hiddenInfo.appendChild(authors);

    hiddenInfoContainer.appendChild(hiddenInfo);

    row.appendChild(codeElt);
    row.appendChild(hiddenInfoContainer);
    table.appendChild(row);

    displayInfo.appendChild(site);
    displayInfo.appendChild(title);

    cycle.appendChild(prevMatch);
    cycle.appendChild(nextMatch);

    newDiv.appendChild(displayInfo);
    newDiv.appendChild(table);

    return newDiv;
 }

 function previousSymbolMatch() {
    var currResult = $(".display-all"),
        currMatch = currResult.find(".hll.current"),
        matches = currResult.find(".hll"),
        scrollDiv = currResult.find("#tablecontainer");

    if (currMatch.length == 0)
        currMatch = matches[0];
    else
        currMatch.removeClass('current');

    var index = matches.index(currMatch.get(0)) - 1;
    index = index <= 0 ? matches.length - 1 : index;
    var newMatch = $(matches[index]);

    scrollDiv.scrollTop(scrollDiv.scrollTop()
            - scrollDiv.height() / 2
            + newMatch.position().top + newMatch.height() / 2);

    newMatch.effect("highlight", {color: '#FFF'}, 750)
    newMatch.addClass('current');
 };

 function nextSymbolMatch() {
    var currResult = $(".display-all"),
        currMatch = currResult.find(".hll.current"),
        matches = currResult.find(".hll"),
        scrollDiv = currResult.find("#tablecontainer");

    if (currMatch.length == 0)
        currMatch = $(matches[0]);
    else
        currMatch.removeClass("current");

    var index = matches.index(currMatch.get(0)) + 1;
    index = index >= matches.length ? 0 : index;
    var newMatch = $(matches[index]);

    scrollDiv.scrollTop(scrollDiv.scrollTop()
            - scrollDiv.height() / 2
            + newMatch.position().top + newMatch.height() / 2);

    newMatch.effect("highlight", {color: "#FFF"}, 750)
    newMatch.addClass("current");
 };

 /*
 * AJAX the current query string to the server, and return its response.
 *
 * @return {Array} The server's response in the form of `div.result` DOM
 *      elements, to fill `div#results`.
 */
 function queryServer(){
    var queryUrl = document.URL + "search.json?" + $.param({
        "q" : searchBar.value,
        "p" : searchResultsPage++,
        "hl": 1
    });

    var results = $.Deferred();
    $.getJSON(queryUrl, function(result){
        var resultDivs = [];
        if("error" in result)
            insertErrorMessage(result["error"]);
        else if(result["results"].length == 0 && searchResultsPage == 2)
            insertErrorMessage("No search results.");
        else
            for(var codelet = 0; codelet < result["results"].length; codelet++)
                resultDivs.push(createResult(result["results"][codelet]));
        results.resolve(resultDivs);
    });

    return results;
 }

 /*
 * Query the server with the current search string, and populate `div#results`
 * with its response.
 */
 function populateResults(){
    searchResultsPage = 1;
    loadMoreResults();
 }

 /*
 * Query the server for the next results page, and add its codelets to
 * `div#results`.
 */
 function loadMoreResults(){
    queryServer().done(function(results){
        for(var result = 0; result < results.length; result++){
            var newDiv = results[result];
            resultsDiv.appendChild(newDiv);
            setTimeout(
                (function(divReference){
                    return function(){
                        divReference.classList.add("cascade");
                    };
                }(newDiv)),
                result * 20);
        }
    });
 }

 /*
 * Displays a warning message in the UI.
 *
 * @param msg (str) The message string.
 */
 function insertErrorMessage(msg){
    var error = $(
            [
                "<div id='error'><span id='s1'>Error</span> ",
                "<span id='s2'>&raquo;</span> </div>"
            ].join(""));
    error.append(msg);
    resultsDiv.appendChild(error[0]);
 }
--- a/static/js/lib/jquery-ui.min.js
+++ b/static/js/lib/jquery-ui.min.js
--- a/static/js/lib/jquery.min.js
+++ b/static/js/lib/jquery.min.js
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -0,0 +1,6 @@
 (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
 ga('create', 'UA-51910807-1', 'bitshift.it');
 ga('send', 'pageview');
--- a/static/robots.txt
+++ b/static/robots.txt
@@ -0,0 +1,3 @@
 User-agent: *
 Disallow: /search.json
 Sitemap: http://www.bitshift.it/sitemap.xml
--- a/static/sass/_logo.sass
+++ b/static/sass/_logo.sass
@@ -0,0 +1,18 @@
 a#logo
    letter-spacing: 0.3em
    text-decoration: none

    div#logo
        font-size: 400%
        padding-bottom: 0.2em
        text-align: center

        #logo-bit
            color: $baseColor1

        #logo-angle
            color: $baseColor3

        #logo-shift
            color: $baseColor2
            font-style: italic
--- a/static/sass/_mixins.sass
+++ b/static/sass/_mixins.sass
@@ -1,11 +1,29 @@
 /*
 Partial to contain all globally-applicable mixins
 Partial to contain all globally-applicable mixins.
 */

 // add vendor prefixes for the property $property with value $value
 // Add vendor prefixes for the property $property with value $value.
@mixin vendor($property, $value)
    -webkit-#{$property}: $value
    -moz-#{$property}: $value
    -ms-#{$property}: $value
    -o-#{$property}: $value
    #{$property}: $value

 // Add portable opacity style.
@mixin opaque($opacity)
    @include vendor(opacity, $opacity)
    filter: alpha(opacity=$opacity)

@mixin delay($time)
    transition-delay: $time
    -webkit-transition-delay: $time

 .t1
    @include vendor(transition, all 0.1s ease-out)

 .t2
    @include vendor(transition, all 0.2s ease-out)

 .t3
    @include vendor(transition, all 0.3s ease-out)
--- a/static/sass/_variables.sass
+++ b/static/sass/_variables.sass
@@ -0,0 +1,12 @@
 /*
 Partial to contain all globally-applicable variables.
 */

 $baseColor1: #A31F34
 $baseColor2: #8A8B8C
 $baseColor3: #C2C0BF

 $lightGray: #F1F1F1

 $lightBlue: #67A0FD
 $blue: #3177EB
--- a/static/sass/about.sass
+++ b/static/sass/about.sass
@@ -0,0 +1,139 @@
 /*
 Stylesheet for `templates/about.html`
 */

@import mixins
@import variables

 $centered-section-min-width: 500px

 div.bg
    $img-height: 650px

    position: fixed
    width: 100%
    left: 0
    z-index: -1

    &#img-1
        background: url(../img/about/bg1.png) no-repeat
        background-size: cover
        height: 600px
        top: -300px

    &#img-2
        background: url(../img/about/bg2.png) no-repeat
        background-size: cover
        height: $img-height + 300
        top: 1150px

    &#img-3
        background: url(../img/about/bg3.png) no-repeat
        background-size: cover
        height: $img-height + 300
        top: 2050px

    &#img-4
        background: url(../img/about/bg4.png) no-repeat
        background-size: cover
        height: $img-height + 400
        top: 3200px

 div.section
    background-color: white
    border: 1px solid $baseColor2
    margin-bottom: 200px
    margin-top: 300px
    padding-bottom: 80px
    padding-top: 20px
    overflow: hidden

    &#top
        margin-top: 0px

        div#wrap
            width: 100%
            position: relative
            padding-top: 56.782% // aspect ration, 9 / 16

            iframe#vimeo
                border: 1px solid $baseColor3
                margin-top: 40px

                position: absolute
                top: 0
                left: 0
                height: none
                width: none

    div.centered
        font-size: 110%
        line-height: 150%
        margin-left: auto
        margin-right: auto
        min-width: 500px
        width: 65%

        &#how
            b
                font-family: monospace
                font-size: 110%

            ul
                list-style: none

                span
                    color: $baseColor1
                    font-weight: bold

        h1
            color: $baseColor1

            span
                color: $baseColor2

        a
            color: #727070
            font-style: italic
            text-decoration: none

            &:hover
                @extend .t3

                color: #575757

        span#title
            color: $baseColor1
            font-weight: bold

 div.person
    font-size: 80%
    overflow: hidden

    &#top
        margin-top: 40px

    >div
        $image-min-width: 100px

        display: inline-block
        height: 100%
        margin-bottom: 40px
        vertical-align: top

        &.photo
            margin-right: 40px
            width: $image-min-width

            img
                display: block
                height: $image-min-width
                width: $image-min-width

        &.bio
            min-width: $centered-section-min-width - $image-min-width - 50px
            width: 70%

            h1
                font-size: 130%
                margin: 0
--- a/static/sass/docs.sass
+++ b/static/sass/docs.sass
@@ -0,0 +1,63 @@
@import mixins
@import variables
@import logo

 h1
    color: $baseColor1

    span
        color: $baseColor2

 h2, h3
    color: $baseColor2 * 0.8

 p
    line-height: 1.8em

 ul
    list-style: none
    margin-bottom: 2%

    li
        margin-bottom: 2%

 a
    color: #727070
    font-style: italic
    text-decoration: none

    &:hover
        @extend .t3

        color: #575757

 span
    &.code
        background-color: $baseColor3 * 1.2
        font-family: monospace
        padding: 5px

    &.string
        color: $baseColor1
        font-family: monospace
        font-size: 1.1em

    &.title
        color: $baseColor1
        font-weight: bold

 table.code-example
    border-collapse: collapse
    width: 100% !important

    td.linenos
        border: 1px solid $baseColor2
        padding-left: 10px
        width: 20px

    td.code
        padding-left: 10px

 li#sec3 span#gasp
    color: $baseColor2 * 0.8
    font-style: italic
--- a/static/sass/error404.sass
+++ b/static/sass/error404.sass
@@ -0,0 +1,19 @@
@import variables

 div#message
    color: $baseColor1
    font-family: monospace
    font-size: 700%
    font-weight: normal
    margin-top: 8%
    text-align: center

    span
        &.light
            color: $baseColor3

        &.dark
            color: $baseColor2

        &.red
            color: $baseColor1
--- a/static/sass/index.sass
+++ b/static/sass/index.sass
@@ -0,0 +1,443 @@
 /*
 Stylesheet for `templates/index.html`.
 */

@import mixins
@import variables
@import logo

 $minSearchFieldsWidth: 490px
 $codeWidth: 700px
 $hiddenInfoWidth: 300px

 .ui-datepicker
    font-size: 70%

 .ui-autocomplete
    max-height: 30%
    overflow-x: hidden
    overflow-y: scroll
    padding: 0px

    >li.ui-menu-item a.ui-state-focus
        @include vendor(transition, background-color 0.3s ease-out)

 div#body
    @extend .t3

    &.faded
        @include opaque(0.8)

 div#hotkey-help
    $width: 40%

    background-color: white
    border: 1px solid $baseColor3
    left: 50% - $width / 2
    min-width: 400px
    padding: 35px
    position: fixed
    top: 30%
    width: $width
    z-index: 200

    &.hidden
        display: none

    div
        border-bottom: 1px solid $baseColor2
        color: $baseColor1
        font-size: 130%
        padding-bottom: 8px
        text-align: center

    ul
        list-style: none
        margin-left: auto
        margin-right: auto
        position: relative
        width: 300px

        li
            margin-bottom: 4px

            span.hotkey
                color: $baseColor1
                font-family: monospace
                font-size: 130%
                font-weight: bold

            span.seperator
                color: $baseColor2

 div#search-field
    @extend .t2

    bottom: 0
    height: 50%
    left: 0
    margin: auto
    margin-top: 15%
    max-height: 100px
    right: 0
    position: absolute
    z-index: 2
    top: 0
    width: 40%

    form#search-bar
        min-width: $minSearchFieldsWidth

        input[type="text"], button
            @extend .t3
            @include vendor(box-sizing, border-box)

            border: 1px solid $baseColor2
            font-size: 110%
            margin-bottom: 0px
            padding: 6px

        input[type="text"]#query
            width: 100%

            &:hover
                border-color: $baseColor1

        button#advanced-search
            background-color: white
            border: none
            color: $baseColor2
            font-size: 1.1em
            font-style: italic

            &:hover
                color: $baseColor1
                cursor: pointer

            &.clicked
                color: $baseColor1

            &:focus
                outline: 0

    &.partly-visible
        margin-top: 0%
        position: absolute
        width: 100%

        #logo
            position: absolute
            top: -1%
            left: 1%

            span
                font-size: 50%

        form#search-bar
            padding-top: 3%
            margin-left: auto
            margin-right: auto
            min-width: 800px
            width: 60%

            input
                @extend .t3

                &#query
                    width: 80%

                &:hover
                    border: 1px solid $baseColor1

            button#advanced-search
                margin-left: 30px

 div#advanced-search
    background-color: white
    border: 1px solid $baseColor3
    display: none
    font-size: 96%
    height: 400px
    min-width: $minSearchFieldsWidth
    padding-top: 0px
    overflow-x: auto
    overflow-y: hidden

    #heading
        color: $baseColor2
        display: block
        font-size: 120%
        padding-left: 1%
        padding-top: 1%
        width: 100%

        div
            display: inline-block
            font-size: 110%

            &#col1
                width: 25%

            &#col2
                width: 75%

                button
                    border: none
                    color: white
                    float: right
                    font-size: 80%
                    font-weight: bold
                    margin-right: 1%
                    padding-left: 4%
                    padding-right: 4%

                    &:hover
                        cursor: pointer

                    &#add-group
                        background-color: #7FAFFC

                        &:hover
                            background-color: #609AF8

                    &#remove-group
                        background-color: #E74C4C

                        &:hover
                            background-color: #D63636

                    &#submit
                        background-color: #4ee76c

                        &:hover
                            background-color: #0FDD38

    >div
        @include vendor(box-sizing, border-box)

        display: inline-block
        float: left

    #sidebar
        padding-left: 1%
        width: 25%

        >ul
            list-style: none
            padding-left: 0
            margin-bottom: 8%
            margin-top: 2%

            li
                margin-bottom: 2%

                label
                    user-select: none

                    div
                        @extend .t3

                        background-color: $lightGray
                        border: none
                        padding: 3%
                        width: 85%

                        &:hover, &.selectedInputField
                            @extend .t3

                            background-color: $baseColor2
                            color: white
                            cursor: pointer
                            width: 90%

                input[type="checkbox"]
                    display: none

                    &:checked + label > div
                        @extend .selectedInputField

                        background-color: $baseColor1
                        color: white
                        width: 90%

    #search-groups
        margin-top: 1%
        max-height: 87%
        overflow-y: auto
        width: 75%

        .search-group
            @include vendor(transition, all 0.6s ease-out)

            background-color: $lightGray
            padding: 1%
            margin-bottom: 2%
            width: 97%

            >div
                margin-bottom: 0.7%

                >div.name
                    display: inline-block
                    font-size: 90%
                    width: 20%

                >input[type=text]
                    display: inline-block
                    padding: 2px
                    width: 60%

                >input[type=checkbox]
                    margin-left: 2%

                    &:checked + span
                        @extend .t2

                        color: green
                        font-weight: bold

                    &:hover
                        cursor: checkbox

                span.regex
                    font-size: 80%

            &:hover
                cursor: pointer
                background-color: #d6d6d6

            &#selected
                background-color: #CACACA

 div#results
    margin: 3% auto 0 auto
    margin-left: auto
    margin-right: auto
    width: 80%

    a
        @extend .t3

        text-decoration: none

        &:hover
            color: $baseColor1

    div#error
        font-size: 170%
        margin-top: 22%
        text-align: center

        span
            margin-right: 10px
            font-size: 150%

            &#s1
                color: $baseColor1

            &#s2
                color: $baseColor2

    &.disable-hover
        pointer-events: none

 div.result
    @extend .t3

    height: 200px
    margin-bottom: 100%
    pointer-events: auto

    table
        border-collapse: collapse
        height: inherit

        tr
            @extend .t3
            @include opaque(0.8)

            height: inherit

    &.cascade
        @extend .t1
        margin-bottom: 15%

    &.display-all
        table tr
            @include opaque(1.0)

        #tablecontainer
            max-width: 70%
            overflow: auto !important

 div#display-info
    font-size: 1.3em
    padding: 5px 0px 5px 5px
    width: 100%

    #title
        margin-right: 10px

    #site
        text-transform: capitalize

 td#code
    @include vendor(transition, width 0.2s ease-in-out)

    width: $codeWidth
    max-width: $codeWidth
    height: inherit
    padding: 0px

    #tablecontainer
        width: 100%
        height: inherit
        overflow: hidden
        background-color: #49483e
        position: relative
        z-index: 1

        table
            border-collapse: collapse
            font-family: monospace

            .linenos
                padding-left: 8px

                pre
                    margin-top: 5px

            .code pre
                margin-top: 5px

            .hll
                background: #5B5A51

 div#hidden-info
    width: $hiddenInfoWidth
    margin-left: -$hiddenInfoWidth
    height: 100%
    padding-top: 40px
    font-size: 1.2em
    line-height: 1.5em
    position: relative
    z-index: 0

    @include vendor(transition, margin-left 0.2s ease-in-out)

    .display-all &
        margin-left: -$hiddenInfoWidth / 1.5
        padding-left: 20px

    span
        color: $baseColor1
        font-family: monospace
        font-size: 1.1em
        // float: right

    div
        display: block

    #authors
        a
            font-family: monospace
--- a/static/sass/main.sass
+++ b/static/sass/main.sass
@@ -2,6 +2,53 @@
 Global project stylesheet.
 */

 // placeholder
 p
    font-size: 1.5em
@import mixins
@import variables

 html, body
    height: 100%
    margin: 0
    padding: 0
    font-family: sans-serif

 div#container
    min-height: 100%
    position: relative

    div#header
        padding: 10px

    div#body
        height: 100%
        padding-bottom: 110px
        padding-top: 4%

        div#center
            margin-left: auto
            margin-right: auto
            width: 75%

    div#footer
        background-color: $baseColor1
        bottom: 0
        height: 30px
        padding-bottom: 5px
        padding-top: 15px
        position: fixed
        text-align: center
        width: 100%
        z-index: 100

        *
            color: white

        a
            @extend .t3

            font-size: 1.2em
            margin-left: 5%
            margin-right: 5%
            text-decoration: none

            &:hover
                text-decoration: underline
--- a/static/sitemap.xml
+++ b/static/sitemap.xml
@@ -0,0 +1,16 @@
 <?xml version="1.0" encoding="UTF-8"?>

 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
    <url>
        <loc>http://bitshift.it/</loc>
        <changefreq>monthly</changefreq>
    </url>
    <url>
        <loc>http://bitshift.it/about</loc>
        <changefreq>monthly</changefreq>
    </url>
    <url>
        <loc>http://bitshift.it/docs</loc>
        <changefreq>monthly</changefreq>
    </url>
 </urlset>
--- a/templates/about.html
+++ b/templates/about.html
@@ -0,0 +1,92 @@
 = extends "layout.html"

 = block title
  about
 = endblock

 = block head
  {{ assets.tag("lib/jquery.min.js") }}
  {{ assets.tag("main.css") }}
  {{ assets.tag("about.css") }}
 = endblock

 = block after_body
  <div id="img-1" class="bg" speed="-1.25"></div>
  <div id="img-2" class="bg" speed="1.4"></div>
  <div id="img-3" class="bg" speed="1.4"></div>
  <div id="img-4" class="bg" speed="1.4"></div>

  <div id="top" class="section">
    <div class="centered">
      <h1><span>&raquo;</span> What</h1>
      <span id="title">bitshift</span> is an <a href="https://github.com/earwig/bitshift">open-source</a>
      online source-code search engine, developed by programmers, for programmers. The engine currently aggregates
      publicly-available code from two online frameworks &#8211; <a href="https://github.com/">GitHub</a> and <a
        href="https://bitbucket.org/">Bitbucket</a> &#8211; but has the necessary infrastructure to quickly incorporate
      others, like <a href="http://stackoverflow.com/">StackOverflow</a> and
      <a href="https://gitorious.org/">Gitorious</a>.  <span id="title">bitshift</span> supports a robust query
      language, which allows users to search for specific languages, files, dates of creation and last modifcation,
      and symbols (function, class, and variable names), amongst other attributes.

      Watch our introductory video:
      <div id="wrap">
        <iframe id="vimeo" src="//player.vimeo.com/video/98697078" width="100%" height="100%" frameborder="0"
          webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
      </div>
    </div>
  </div>

  <div class="section">
    <div id="how" class="centered">
      <h1><span>&raquo;</span> How</h1>
      <span id="title">bitshift</span> has an extensive back-end, roughly divided into three sections:
      <ul>
        <li><span>indexer</span> : finds and downloads code from online frameworks</li>
        <li><span>parser</span> : parses newly crawled code, identifying its symbols</li>
        <li><span>database</span> : interprets and compiles user searches into database queries</li>
      </ul>
      The engine was developed over the span of four months, and is primarily implemented in <b>Python</b>, but has
      parsers in <b>Ruby</b>, <b>Java</b>, and a number of other languages.
    </div>
  </div>

  <div class="section">
    <div class="centered">
      <h1><span>&raquo;</span> Who</h1>
      <span id="title">bitshift</span> was developed by three seniors from New York City's Stuyvesant High School.
      <div id="top" class="person">
        <div class="photo">
          <a href="https://github.com/breuckelen"><img src="img/about/bio1.jpg" alt="Benjamin Attal's photo."></a>
        </div>
        <div class="bio">
          <h1><a href="https://github.com/breuckelen">Benjamin Attal</a></h1>
          Benjamin Attal hacked together <span id="title">bitshift</span>'s parsers and is working on
          data-visualization for bitshift's statistics page. He is a software developer and entrepreneur who enjoys
          listening to and playing country music, as well as working with smart people.
        </div>
      </div>
      <div class="person">
        <div class="photo">
          <a href="https://github.com/earwig"><img src="img/about/bio2.jpg" alt="Ben Kurtovic's photo."></a>
        </div>
        <div class="bio">
          <h1><a href="https://github.com/earwig">Ben Kurtovic</a></h1> Ben Kurtovic designed <span
            id="title">bitshift</span>’s database and acts as its server admin. In his free time, he edits Wikipedia
          and invents new ways of confusing the hell out of people through source code obfuscation.
        </div>
      </div>
      <div class="person">
        <div class="photo">
          <a href="https://github.com/sevko"><img src="img/about/bio3.jpg" alt="Severyn Kozak's photo."></a>
        </div>
        <div class="bio">
          <h1><a href="https://github.com/sevko">Severyn Kozak</a></h1>
          Severyn developed <span id="title">bitshift</span>'s crawlers and its front-end. He loves skiing, mathematics
          that he doesn't understand, and the art of good software development.
        </div>
      </div>
    </div>
  </div>

  {{ assets.tag("about.js") }}
 = endblock
--- a/templates/docs.html
+++ b/templates/docs.html
@@ -0,0 +1,282 @@
 = extends "layout.html"

 = block title
  docs
 = endblock

 = block head
  {{ assets.tag("lib/highlight.css") }}

  {{ assets.tag("docs.css") }}
 = endblock

 = block body
  <a id="logo" href="/">
    <div id="logo">
      <span id="logo-bit">bit</span
      ><span id="logo-angle">&laquo;</span
      ><span id="logo-shift">shift</span>
    </div>
  </a>

  <ul>
    <li>
      <h1><span>&raquo;</span> Usage</h1>
      <p>
        <span class="title">bitshift</span> is a search-engine optimized for
        source code: beyond supporting searches with the full range of ASCII
        symbols, the engine <em>understands</em> code, allowing users to query
        for metadata, like time of creation/last modification, programming
        language, and even symbols like function names and variables. Basic use
        boils down to general and advanced searches.

        <ul>
          <li>
            <h2>general search</h2>
            <p>
              To perform a "general search," simply place your cursor in the
              search bar on our home page and begin entering text; when you
              stop typing for a short period of time, we'll automatically
              execute the query for you. As you scroll down the page, new
              codelets, or results, will be seamlessly downloaded from our
              server and appended to the end.
            </p>
          </li>

          <li>
            <h2>advanced search</h2>
            <p>
              General searches, though, are limited. To allow users to make the
              best of our engine, we created an advanced search form that
              allows the creation of complex queries with the following
              specifiers:

              <ul>
                <li>
                  <h3>search fields</h3>
                  <ul id="search-fields">
                    <li>
                      <span class="code">languages</span> : The programming
                      languages to search for.
                    </li>
                    <li>
                      <span class="code">authors</span> : Search for code
                      written/modified by a specific person.
                    </li>
                    <li>
                      <span class="code">date last modified</span> : Search for
                      code last modified on a specific date
                      (<span class="code">mm/dd/yy</span> format).
                    </li>
                    <li>
                      <span class="code">date created</span> : Search for code
                      created on a specific date
                      (<span class="code">mm/dd/yy</span> format).
                    </li>
                    <li>
                      <span class="code">symbols</span> : Search for specific
                      symbols.
                    </li>
                    <li>
                      <span class="code">functions</span> : Search for
                      functions with specific names.
                    </li>
                    <li>
                      <span class="code">classes</span> : Search for classes
                      with specific names.
                    </li>
                    <li>
                      <span class="code">variables</span> : Search for
                      variables with specific names.
                    </li>
                  </ul>

                  <p>
                    Each of the search fields allows for numerous values; just
                    separate them with spaces. If you'd like to search for a
                    multi-word, space-delimited string, on the other hand,
                    enclose it in double quotes.

                    A query for <span class="code">foo bar</span> will search
                    for occurrences of both <span id="string">"foo"</span> and
                    <span class="string">"bar"</span>, while
                    <span class="code">"foo bar"</span> will search for
                    occurrences of <span class="string">"foo bar"</span>.
                  </p>
                </li>

                <li>
                  <h3>search groups</h3>
                  <p>
                    Search groups facilitate even more robust queries: they're
                    like a bunch of individual searches grouped into one. A
                    user searching for occurrenes of symbol
                    <span class="string">"curses"</span> in the language
                    <span class="string">"Python"</span>, and
                    <span class="string">"ncurses"</span> in
                    <span id="string">"C"</span>, won't get away with:
                    <span class="code">"symbols:curses ncurses"</span> and
                    <span class="code">"languages:Python C"</span>. The engine
                    might return results <span id="string">"curses"</span> in
                    <span class="string">"C"</span> and
                    <span class="string">"ncurses"</span> in
                    <span class="string">"Python"</span>!

                    To work around that, you can use two search groups: one for
                    <span class="string">"curses"</span> in
                    <span class="string">"Python"</span>, and another for
                    <span class="string">"curses"</span> in
                    <span id="string">"C"</span>.
                    <span class="title">bitshift</span> will return the union
                    of both sets of search results.
                  </p>
                </li>
              </ul>
            </p>
          </li>
        </ul>
      </p>
    </li>

    <li>
      <h1><span>&raquo;</span> API</h1>
      <p>
        <span class="title">bitshift</span> provides an API through GET
        requests to
        <a href="http://bitshift.it/search.json"><span class="code">/search.json</span></a>.
      </p>
      <h2>parameters</h2>
      <ul>
        <li>
          <span class="code">q</span> : The search query, as entered into the
          search bar.
        </li>
        <li>
          <span class="code">p</span> : The result page to return. Defaults to
          <span class="code">1</span>. Each page contains ten results, so this
          effectively offsets the search by
          <span class="code">10 * (p - 1)</span> codelets.
        </li>
        <li>
          <span class="code">hl</span> : Whether to return code as
          <a href="http://pygments.org/">pygments</a>-highlighted HTML or as
          plain source. Defaults to <span class="code">false</span>.
        </li>
      </ul>
      <h2>output</h2>
      <p>
        <span class="code">/search.json</span> returns a JSON-encoded
        dictionary. If there was an error, it will contain a single key,
        <span class="code">"error"</span>, whose value will contain a
        human-readable description of the error. Otherwise, there will be two
        keys: <span class="code">"count"</span>, storing the number of results,
        and <span class="code">"results"</span>, storing a list of codelets.
        Each codelet is a dictionary with the following key–value pairs:
      </p>
      <ul>
        <li>
          <span class="code">name</span> : The name of the codelet.
        </li>
        <li>
          <span class="code">code</span> : The actual source code if
          <span class="code">hl</span> was not given or was
          <span class="code">false</span>; HTML code otherwise.
        </li>
        <li>
          <span class="code">lang</span> : The language of the code.
        </li>
        <li>
          <span class="code">authors</span> : A list of authors. Each author is
          a list of two items: their name, and URL (or
          <span class="code">null</span> if none is known).
        </li>
        <li>
          <span class="code">url</span> : The URL of the page where the code
          was crawled from.
        </li>
        <li>
          <span class="code">created</span> : The date the code was created, as
          a
          <a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>-formatted
          string (e.g. <span class="code">"2014-06-01T12:41:28"</span>).
        </li>
        <li>
          <span class="code">modified</span> : The date the code was last
          modified, as a
          <a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>-formatted
          string (e.g. <span class="code">"2014-06-01T12:41:28"</span>).
        </li>
        <li>
          <span class="code">origin</span> : A list of two items: the
          originating site's name (e.g. <span class="code">"GitHub"</span>) and
          URL (e.g. <span class="code">"https://github.com"</span>).
        </li>
      </ul>
      <h2>example</h2>
      <p>
        The following example Python 2 code searches for a given Python
        function definition and prints the URL of the first result:
      </p>
      <table class="highlighttable code-example">
        <tr>
          <td class="linenos">
            <div class="linenodiv">
              <pre> 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19</pre>
            </div>
          </td>
          <td class="code">
            <div class="highlight">
              <pre><span class="c">#!/usr/bin/env python</span>

 <span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span>
 <span class="kn">from</span> <span class="nn">sys</span> <span class="kn">import</span> <span class="n">argv</span>
 <span class="kn">from</span> <span class="nn">urllib</span> <span class="kn">import</span> <span class="n">urlencode</span>
 <span class="kn">from</span> <span class="nn">urllib2</span> <span class="kn">import</span> <span class="n">urlopen</span>

 <span class="k">def</span> <span class="nf">get_function</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
    <span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s">&quot;q&quot;</span><span class="p">:</span> <span class="s">&quot;lang:python and func:def:</span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="n">name</span><span class="p">}</span>
    <span class="n">request</span> <span class="o">=</span> <span class="n">urlopen</span><span class="p">(</span><span class="s">&quot;http://bitshift.it/search.json?&quot;</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
    <span class="n">res</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">request</span><span class="o">.</span><span class="n">read</span><span class="p">())[</span><span class="s">&quot;results&quot;</span><span class="p">]</span>
    <span class="k">if</span> <span class="n">res</span><span class="p">:</span>
        <span class="k">print</span> <span class="s">&quot;</span><span class="si">%s</span><span class="s">: </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">res</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s">&quot;url&quot;</span><span class="p">])</span>
    <span class="k">else</span><span class="p">:</span>
        <span class="k">print</span> <span class="s">&quot;</span><span class="si">%s</span><span class="s"> not found.&quot;</span> <span class="o">%</span> <span class="n">name</span>

 <span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">&quot;__main__&quot;</span><span class="p">:</span>
    <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">argv</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
        <span class="n">get_function</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span></pre>
            </div>
          </td>
        </tr>
      </table>
    </li>

    <li id="sec3">
      <h1><span>&raquo;</span> Get Involved</h1>
      <p>
        <span class="title">bitshift</span> is <span id="gasp">(gasp)</span>
        open-source! The project is hosted on
        <a href="https://github.com/earwig/bitshift">GitHub</a>; feel free to
        file an issue or submit a pull request.
      </p>
    </li>
  </ul>
 = endblock
--- a/templates/error404.html
+++ b/templates/error404.html
@@ -0,0 +1,26 @@
 = extends "layout.html"

 = block title
  404
 = endblock

 = block head
  {{ assets.tag("error404.css") }}
 = endblock

 = block body
  <div id="message">
  {{ assets.syntax_highlight([
  'puts("404");',
  'printf("%d\n", 404);',
  'puts 404',
  'System.out.println("404")',
  'print 404',
  'console.log("404")',
  'echo 404',
  'std::cout << "404\\n"',
  '(println "404")',
  'say "404!";'
  ] | random) | safe }}
  </div>
 = endblock
--- a/templates/index.html
+++ b/templates/index.html
@@ -1,9 +1,121 @@
 = extends "layout.html"

 = block title
  Home
  home
 = endblock

 = block head
  {{ assets.tag("lib/jqueryui.custom.min.css") }}
  {{ assets.tag("lib/jquery.min.js") }}
  {{ assets.tag("lib/jquery-ui.min.js") }}
  {{ assets.tag("lib/highlight.css") }}

  {{ assets.tag("index.css") }}

  <script>
    AUTOCOMPLETE_LANGUAGES = {{ autocomplete_languages | safe }};
  </script>
 = endblock

 = block body
  <p>Hello, world.</p>
  <div id="search-field">
    <a id="logo" href="/">
      <div id="logo">
        <span id="logo-bit">bit</span
        ><span id="logo-angle">&laquo;</span
        ><span id="logo-shift">shift</span>
      </div>
    </a>

    <form id="search-bar">
      <input id="query" type="text" name="query"
      ><button id="advanced-search" title="advanced search" type="button">
        Adv. Search
      </button>

      <div id="advanced-search">
        <div id="heading">
          <div id="col1">Fields</div
          ><div id="col2">
            Search groups
            <button id="submit">
              <div>Search</div>
            </button>
            <button id="add-group">
              <div><span>+</span> Add</div>
            </button>
            <button id="remove-group">
              <div><span>-</span> Remove</div>
            </button>
          </div>
        </div>

        <div id="sidebar">
          <ul>
            <li>
              <input type="checkbox" id="language" checked="true">
              <label for="language"><div>languages</div></label>
            </li>
            <li>
              <input type="checkbox" id="author">
              <label for="author"><div>authors</div></label>
            </li>
            <li>
              <input type="checkbox" id="date-last-modified">
              <label for="date-last-modified"><div>date last modified</div></label>
            </li>
            <li>
              <input type="checkbox" id="date-created">
              <label for="date-created"><div>date created</div></label>
            </li>
            <li>
              <input type="checkbox" id="symbol">
              <label for="symbol"><div>symbols</div></label>
            </li>
            <li>
              <input type="checkbox" id="function">
              <label for="function"><div>functions</div></label>
            </li>
            <li>
              <input type="checkbox" id="class">
              <label for="class"><div>classes</div></label>
            </li>
            <li>
              <input type="checkbox" id="variable">
              <label for="variable"><div>variables</div></label>
            </li>
          </ul>
        </div>

        <div id="search-groups">
          <div class="search-group" id="selected">
            <div id="language">
              <div class="name">languages</div
              ><input id="autocomplete" class="language" name="language" type="text"
              ><input type="checkbox" name="regex"
              ><span class="regex">Regex</span>
            </div>
          </div>
        </div>
      </div>
    </form>
  </div>

  <div id="results"></div>

  {{ assets.tag("index.js") }}
  {{ assets.tag("index.advanced-search-form.js") }}
 = endblock

 = block after_body
  <div id="hotkey-help" class="hidden">
    <div>Hotkeys</div>
    <ul>
      <li><span class="hotkey">k</span> <span class="seperator">:</span> move window up to the previous result</li>
      <li><span class="hotkey">j</span> <span class="seperator">:</span> move window down to the next result</li>
      <li><span class="hotkey">h</span> <span class="seperator">:</span> move to the previous symbol match</li>
      <li><span class="hotkey">l</span> <span class="seperator">:</span> move to the next symbol match</li>
      <li><span class="hotkey">?</span> <span class="seperator">:</span> toggle help</li>
    </ul>
  </div>
 = endblock
--- a/templates/layout.html
+++ b/templates/layout.html
@@ -4,24 +4,44 @@
 <html>
  <head>
    <title>
      = block title
      = endblock
      bitshift &laquo;
      = filter lower
        = block title
        = endblock
      = endfilter
    </title>

    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
    <meta name="description" content="bitshift is an online code snippet
      exchange."/>
    <meta name="keywords" content="code snippet exchange golf programming
      software community"/>
    <meta name="description" content="bitshift is a source code search engine."/>
    <meta name="keywords" content="source code language search engine"/>
    <meta name="author" content="Benjamin Attal Ben Kurtovic Severyn Kozak"/>

    {{ assets.tag("main.css") }}
    {{ assets.tag("main.js") }}

    = block head
    = endblock
  </head>
  <body>
    = block body
    = endblock
    <div id="container">
      <div id="header">
      </div>

      <div id="body">
        <div id="center">
          = block body
          = endblock
        </div>
      </div>

      = block after_body
      = endblock

      <div id="footer">
        <a href="/">home</a>
        <a href="/about">about</a>
        <a href="/docs">docs</a>
      </div>
    </div>
  </body>
 </html>
--- a/+ 0
+++ b/+ 0
--- a/test/find_function_def.py
+++ b/test/find_function_def.py
@@ -0,0 +1,19 @@
 #!/usr/bin/env python

 from json import loads
 from sys import argv
 from urllib import urlencode
 from urllib2 import urlopen

 def get_function(name):
    params = {"q": "lang:python and func:def:%s" % name}
    request = urlopen("http://bitshift.it/search.json?" + urlencode(params))
    res = loads(request.read())["results"]
    if res:
        print "%s: %s" % (name, res[0]["url"])
    else:
        print "%s not found." % name

 if __name__ == "__main__":
    if len(argv) == 2:
        get_function(argv[1])
--- a/test/parser_test.py
+++ b/test/parser_test.py
@@ -0,0 +1,56 @@
 import socket, sys, struct

 file_name = 'resources/<name>.c'
 server_socket_number = 5001
 recv_size = 8192

 if __name__ == '__main__':
    if len(sys.argv) == 1:
        print "Please input a parser to test."

    elif len(sys.argv) > 2:
        print "Too many arguments."

    else:
        if sys.argv[1] == 'c':
            pass

        elif sys.argv[1] == 'java':
            file_name = "resources/Matrix.java"
            server_socket_number = 5002

        elif sys.argv[1] == 'ruby':
            file_name = "resources/parser.rb"
            server_socket_number = 5065

        server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server_socket.connect(("localhost", server_socket_number))

        with open(file_name, "r") as source_file:
            source = source_file.read()
            server_socket.send("%d\n%s" % (len(source), source));

        total_data = []; size_data = cur_data = ''
        total_size = 0; size = sys.maxint

        while total_size < size:
            cur_data = server_socket.recv(recv_size)

            if not total_data:
                if len(size_data) > 4:
                    size_data += cur_data
                    size = struct.unpack('>i', size_data[:4])[0]
                    recv_size = size
                    if recv_size > sys.maxint: recv_size = sys.maxint
                    total_data.append(size_data[4:])
                else:
                    size_data += cur_data

            else:
                total_data.append(cur_data)

            total_size = sum([len(s) for s in total_data])


        server_socket.close()
        print ''.join(total_data);
--- a/test/resources/Matrix.java
+++ b/test/resources/Matrix.java
@@ -0,0 +1,218 @@
 package battlechap;

 import java.io.PrintStream;

 public class Matrix {
    private Object[][] _datmatrix;

    public Matrix(int paramInt){
 	this._datmatrix = new Object[paramInt][paramInt];
    }

    public int size() {
 	return this._datmatrix.length;
    }

    public Object get(int paramInt1, int paramInt2) {
 	return this._datmatrix[paramInt1][paramInt2];
    }

    public boolean isEmpty(int paramInt1, int paramInt2) {
 	return this._datmatrix[paramInt1][paramInt2] == null;
    }

    public boolean equals(Object paramObject) {
 	boolean bool = true;
 	if ((paramObject instanceof Matrix)) {
 	    Matrix localMatrix = (Matrix)paramObject;
 	    if (localMatrix.size() == size()) {
 		for (int i = 0; i < size(); i++) {
 		    for (int j = 0; j < size(); j++) {
 			if (!localMatrix.get(i, j).equals(get(i, j))) {
 			    bool = false;
 			    break;
 			}
 		    }
 		    if (!bool)
 			break;
 		}
 	    }
 	    else
 		bool = false;
 	}
 	else
 	    {
 		bool = false;
 	    }
 	return bool;
    }

    public Object set(int paramInt1, int paramInt2, Object paramObject) {
 	Object localObject = this._datmatrix[paramInt1][paramInt2];
 	this._datmatrix[paramInt1][paramInt2] = paramObject;
 	return localObject;
    }

    public void transpose() {
 	int i = 0;
 	for (int j = 0; j < size(); j++) {
 	    for (int k = i; k < size(); k++) {
 		set(j, k, set(k, j, get(j, k)));
 	    }
 	    i++;
 	}
    }

    public static void swapRows(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) {
 	for (int i = 0; i < paramArrayOfObject[paramInt1].length; i++) {
 	    Object localObject = paramArrayOfObject[paramInt1][i];
 	    paramArrayOfObject[paramInt1][i] = paramArrayOfObject[paramInt2][i];
 	    paramArrayOfObject[paramInt2][i] = localObject;
 	}
    }

    public static void swapCols(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) {
 	for (int i = 0; i < paramArrayOfObject.length; i++) {
 	    Object localObject = paramArrayOfObject[i][paramInt1];
 	    paramArrayOfObject[i][paramInt1] = paramArrayOfObject[i][paramInt2];
 	    paramArrayOfObject[i][paramInt2] = localObject;
 	}
    }

    public Object[] getRow(int paramInt) {
 	Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length];
 	for (int i = 0; i < arrayOfObject.length; i++) {
 	    arrayOfObject[i] = this._datmatrix[paramInt][i];
 	}
 	return arrayOfObject;
    }

    public Object[] getCol(int paramInt) {
 	Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length];
 	for (int i = 0; i < arrayOfObject.length; i++) {
 	    arrayOfObject[i] = this._datmatrix[i][paramInt];
 	}
 	return arrayOfObject;
    }

    public Object[] setRow(int paramInt, Object[] paramArrayOfObject) {
 	Object[] arrayOfObject = getRow(paramInt);

 	for (int i = 0; i < size(); i++) {
 	    set(paramInt, i, paramArrayOfObject[i]);
 	}

 	return arrayOfObject;
    }

    public Object[] setCol(int paramInt, Object[] paramArrayOfObject) {
 	Object[] arrayOfObject = getCol(paramInt);

 	for (int i = 0; i < size(); i++) {
 	    set(i, paramInt, paramArrayOfObject[i]);
 	}

 	return arrayOfObject;
    }

    public String toString()
    {
 	String str1 = "";
 	for (int i = 0; i < this._datmatrix.length; i++) {
 	    if (i < 9)
 		str1 = str1 + (i + 1) + ": ";
 	    else
 		str1 = str1 + (i + 1) + ":";
 	    for (int j = 0; j < this._datmatrix[i].length; j++) {
 		int k = (this._datmatrix[i][j] + "").length();
 		String str2 = "   ".substring(k);
 		str1 = str1 + this._datmatrix[i][j] + str2;
 	    }
 	    str1 = str1 + "\n";
 	}
 	return str1;
    }

    public static void print(Object[][] paramArrayOfObject) {
 	for (int i = 0; i < paramArrayOfObject.length; i++) {
 	    for (int j = 0; j < paramArrayOfObject[i].length; j++) {
 		int k = (paramArrayOfObject[i][j] + "").length();
 		String str = "     ".substring(k);
 		System.out.print(paramArrayOfObject[i][j] + str);
 	    }
 	    System.out.print("\n");
 	}
    }

    public static void printArray(Object[] paramArrayOfObject) {
 	for (int i = 0; i < paramArrayOfObject.length; i++) {
 	    int j = (paramArrayOfObject[i] + "").length();
 	    String str = "     ".substring(j);
 	    System.out.print(paramArrayOfObject[i] + str);
 	}
 	System.out.print("\n");
    }

    public static void main(String[] paramArrayOfString) {
 	Matrix localMatrix1 = new Matrix(5);
 	Matrix localMatrix2 = new Matrix(5);
 	for (int i = 0; i < localMatrix1.size(); i++) {
 	    for (int j = 0; j < localMatrix1.size(); j++) {
 		Integer localInteger1 = new Integer((int)(Math.random() * 20.0D));
 		localMatrix1.set(i, j, localInteger1);
 		localMatrix2.set(i, j, localInteger1);
 	    }
 	}

 	System.out.println("\nDemonstrating equals method (should be true)\t" + localMatrix2.equals(localMatrix1) + "\n");

 	System.out.println("Demonstrating get method\n" + localMatrix1.get(0, 0) + "\n");
 	System.out.println("Demonstrating is empty method\n" + localMatrix1.isEmpty(1, 0) + "\n");
 	System.out.println("Demonstrating size method \n" + localMatrix1.size() + "\n");
 	System.out.println("Demonstrating toString method\n" + localMatrix1 + "\n");
 	localMatrix1.transpose();
 	System.out.println("Blop has been transposed\n" + localMatrix1 + "\n");

 	Object[][] arrayOfObject = new Object[4][4];
 	for (int j = 0; j < arrayOfObject.length; j++) {
 	    for (int k = 0; k < arrayOfObject[j].length; k++) {
 		Integer localInteger2 = new Integer((int)(Math.random() * 20.0D));
 		arrayOfObject[j][k] = localInteger2;
 	    }
 	}
 	System.out.println("\n\n**Swapping Rows Demo**");
 	print(arrayOfObject);
 	System.out.println("\nRows 1 and 2 have been Swapped \n");
 	swapRows(1, 2, arrayOfObject);
 	print(arrayOfObject);

 	System.out.println("\n**Swapping Columns Demo**");
 	print(arrayOfObject);
 	System.out.println("\n\nColumns 1 and 2 have been Swapped \n");
 	swapCols(1, 2, arrayOfObject);
 	print(arrayOfObject);

 	System.out.println("\n**Getting rows demo (from blop)**");
 	System.out.println(localMatrix1);
 	System.out.println("\nGetting row 1\n");
 	printArray(localMatrix1.getRow(1));

 	System.out.println("\n**Getting cols demo (from blop)**");
 	System.out.println(localMatrix1);
 	System.out.println("\nGetting col 1\n");
 	printArray(localMatrix1.getCol(1));

 	System.out.println("\n**Demonstrating set row method**");
 	System.out.println(localMatrix1);
 	System.out.println("\nSwitching row 1 of blop to 1st column of blop\n");
 	localMatrix1.setRow(1, localMatrix1.getCol(1));
 	System.out.println(localMatrix1 + "\n");

 	System.out.println("\n**Demonstrating set col method**");
 	System.out.println(localMatrix1);
 	System.out.println("\nSwitching col 1 of blop to 2nd row of blop\n");
 	localMatrix1.setCol(1, localMatrix1.getRow(2));
 	System.out.println(localMatrix1 + "\n");
    }
 }

--- a/test/resources/app.py
+++ b/test/resources/app.py
@@ -0,0 +1,40 @@
 """
 Module to contain all the project's Flask server plumbing.
 """

 from flask import Flask
 from flask import render_template, session

 from bitshift import assets
 # from bitshift.database import Database
 # from bitshift.query import parse_query

 app = Flask(__name__)
 app.config.from_object("bitshift.config")

 app_env = app.jinja_env
 app_env.line_statement_prefix = "="
 app_env.globals.update(assets=assets)

 # database = Database()

@app.route("/")
 def index():
    return render_template("index.html")

@app.route("/search/<query>")
 def search(query):
    # tree = parse_query(query)
    # database.search(tree)
    pass

@app.route("/about")
 def about():
    return render_template("about.html")

@app.route("/developers")
 def developers():
    return render_template("developers.html")

 if __name__ == "__main__":
    app.run(debug=True)
--- a/test/resources/parser.rb
+++ b/test/resources/parser.rb
@@ -0,0 +1,126 @@
 require 'socket'
 require 'ruby_parser'
 require 'sexp_processor'

 module Bitshift
    class Parser
        def initialize(source)
            @source = source
        end

        def parse
            parser = RubyParser.new
            tree = parser.parse(@source)
            puts tree.inspect
            offset = tree.line - 1
            processor = NodeVisitor.new offset
            processor.process tree
            return processor.symbols
        end
    end

    class NodeVisitor < SexpProcessor
        attr_accessor :symbols
        attr_accessor :offset

        def initialize(offset)
            super()
            @require_empty = false
            @offset = offset

            module_hash = Hash.new {|hash, key| hash[key] = Hash.new}
            class_hash = module_hash.clone
            function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } }
            var_hash = Hash.new {|hash, key| hash[key] = [] }

            @symbols = {
                modules: module_hash,
                classes: class_hash,
                functions: function_hash,
                vars: var_hash
            }
        end

        def block_position(exp)
            pos = Hash.new
            end_ln = (start_ln = exp.line - offset)
            cur_exp = exp

            while cur_exp.is_a? Sexp
                end_ln = cur_exp.line - offset
                cur_exp = cur_exp.last
                break if cur_exp == nil
            end

            pos[:coord] = {
                start_ln: start_ln,
                end_ln: end_ln }
            return pos
        end

        def statement_position(exp)
            pos = Hash.new
            end_ln = start_ln = exp.line - offset

            pos[:coord] = {
                start_ln: start_ln,
                end_ln: end_ln }
            return pos
        end

        def process_module(exp)
            pos = block_position exp
            exp.shift
            name = exp.shift
            symbols[:modules][name] = pos
            exp.each_sexp {|s| process(s)}
            return exp.clear
        end

        def process_class(exp)
            pos = block_position exp
            exp.shift
            name = exp.shift
            symbols[:classes][name] = pos
            exp.each_sexp {|s| process(s)}
            return exp.clear
        end

        def process_defn(exp)
            pos = block_position exp
            exp.shift
            name = exp.shift
            symbols[:functions][name][:declaration] = pos
            exp.each_sexp {|s| process(s)}
            return exp.clear
        end

        def process_call(exp)
            pos = statement_position exp
            exp.shift
            exp.shift
            name = exp.shift
            symbols[:functions][name][:calls] << pos
            exp.each_sexp {|s| process(s)}
            return exp.clear
        end

        def process_iasgn(exp)
            pos = statement_position exp
            exp.shift
            name = exp.shift
            symbols[:vars][name] << pos
            exp.each_sexp {|s| process(s)}
            return exp.clear
        end

        def process_lasgn(exp)
            pos = statement_position exp
            exp.shift
            name = exp.shift
            symbols[:vars][name] << pos
            exp.each_sexp {|s| process(s)}
            return exp.clear
        end
    end
 end
--- a/test/test_query_parser.py
+++ b/test/test_query_parser.py
@@ -0,0 +1,76 @@
 # -*- coding: utf-8  -*-

 from __future__ import unicode_literals
 import unittest

 from bitshift.query import parse_query

 TESTS = [
    # Text
    ("test", "Tree(Text(String(u'test')))"),
    ("re:test", "Tree(Text(Regex(u'test')))"),

    # Language
    ("language:python", "Tree(Language(Python))"),
    ("language:py", "Tree(Language(Python))"),
    ("l:r:r..y", "Tree(Language(Ruby))"),
    (r'"lang:re:python|^c$"',
     "Tree(BinaryOp(Language(C), OR, Language(Python)))"),

    # Author
    ('"author:Ben Kurtovic"', "Tree(Author(String(u'Ben Kurtovic')))"),
    (r"'a:re:b.*?\sk.*?'", r"Tree(Author(Regex(u'b.*?\\sk.*?')))"),

    # Date
    ("'create:before:Jan 1, 2014'",
     "Tree(Date(CREATE, BEFORE, 2014-01-01 00:00:00))"),
    ("'modify:after:2010-05-09 10:11:12'",
     "Tree(Date(MODIFY, AFTER, 2010-05-09 10:11:12))"),

    # Symbol
    ("sym:foobar", "Tree(Symbol(ALL, ALL, String(u'foobar')))"),
    ("func:foo_bar", "Tree(Symbol(ALL, FUNCTION, String(u'foo_bar')))"),
    ("func:foo_bar()", "Tree(Symbol(ALL, FUNCTION, String(u'foo_bar')))"),
    ("class:FooBar", "Tree(Symbol(ALL, CLASS, String(u'FooBar')))"),
    ("var:foobar", "Tree(Symbol(ALL, VARIABLE, String(u'foobar')))"),
    ("var:r:foobar", "Tree(Symbol(ALL, VARIABLE, Regex(u'foobar')))"),

    # Composition
    ("(a and b) or (c and d)", ", ".join([
        "Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
        "Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND",
        "Text(String(u'd')))))"])),
    ("a and b or c and d", ", ".join([
        "Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
        "Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND",
        "Text(String(u'd')))))"])),
    ("a and b or c or d", ", ".join([
        "Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
        "Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "OR",
        "Text(String(u'd')))))"])),
    ("a and (b or c or d)", ", ".join([
        "Tree(BinaryOp(Text(String(u'a'))", "AND",
        "BinaryOp(Text(String(u'b'))", "OR", "BinaryOp(Text(String(u'c'))", "OR",
        "Text(String(u'd'))))))"])),
    ("a not b", ", ".join([
        "Tree(BinaryOp(Text(String(u'a'))", "AND", "UnaryOp(NOT",
        "Text(String(u'b')))))"])),

    # Unicode, Escaping
    (r'lang:py "author:fo\\o \"bar\" baz\\"', ", ".join([
        "Tree(BinaryOp(Language(Python)", "AND",
        "Author(String(u'fo\\\\o \"bar\" baz\\\\'))))"])),
    ('"author:Ben Kurtović"', "Tree(Author(String(u'Ben Kurtovi\\u0107')))")
 ]

 class TestQueryParser(unittest.TestCase):
    """Unit tests for the query parser in :py:mod:`bitshift.query`."""

    def test_parse(self):
        """test full query parsing"""
        for test, expected in TESTS:
            self.assertEqual(expected, parse_query(test).serialize())


 if __name__ == "__main__":
    unittest.main(verbosity=2)