diff --git a/.gitignore b/.gitignore
index 6a014f5..7e00121 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .sass-cache
 .DS_Store
+.my.cnf
 
 # github premade rules
 *.py[cod]
diff --git a/README.md b/README.md
index 3cb81a1..8ca31d7 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
 bitshift
 ========
 
-bitshift is a semantic search engine for source code.
+bitshift is a semantic search engine for source code developed by Benjamin
+Attal, Ben Kurtovic, and Severyn Kozak.
 
 Branches
 --------
@@ -13,6 +14,11 @@ Branches
 - `feature/*`: individual components of the project with untested, likely
   horribly broken code - branch off from and merge into `develop` when done
 
+Style
+-----
+bitshift uses [SASS][SASS] for styling; compile the stylesheets to CSS with
+`sass --watch static/sass/:static/css`.
+
 Documentation
 -------------
 
@@ -24,3 +30,5 @@ new modules or packages, but *not* when adding functions or changing
 docstrings), run `sphinx-apidoc -fo docs/source/api bitshift` from the project
 root. Note that this will revert any custom changes made to the files in
 `docs/source/api`, so you might want to update them by hand instead.
+
+[SASS]: http://sass-lang.com/guide
diff --git a/app.py b/app.py
index c4083c9..b5e8b57 100644
--- a/app.py
+++ b/app.py
@@ -5,6 +5,8 @@ Module to contain all the project's Flask server plumbing.
 from flask import Flask
 from flask import render_template, session
 
+from bitshift import assets
+from bitshift.database import Database
 from bitshift.query import parse_query
 
 app = Flask(__name__)
@@ -12,7 +14,9 @@ app.config.from_object("bitshift.config")
 
 app_env = app.jinja_env
 app_env.line_statement_prefix = "="
-app_env.globals.update(assets = assets)
+app_env.globals.update(assets=assets)
+
+database = Database()
 
 @app.route("/")
 def index():
@@ -20,8 +24,8 @@ def index():
 
 @app.route("/search/<query>")
 def search(query):
-    ## tree = parse_query(query)
-    ## database.search(tree)
+    tree = parse_query(query)
+    database.search(tree)
     pass
 
 if __name__ == "__main__":
diff --git a/bitshift/__init__.py b/bitshift/__init__.py
index 9a18c9b..78ca5e9 100644
--- a/bitshift/__init__.py
+++ b/bitshift/__init__.py
@@ -1 +1 @@
-from . import assets, codelet, config, database, parser, query
+from . import assets, codelet, config, database, parser, query, crawler
diff --git a/bitshift/assets.py b/bitshift/assets.py
index 90564d2..b4f597b 100644
--- a/bitshift/assets.py
+++ b/bitshift/assets.py
@@ -1,6 +1,5 @@
 """
-.. module:: assets
-   :synopsis: Helper functions for use inside the project's Jinja templates.
+:synopsis: Helper functions for use inside the project's Jinja templates.
 """
 
 from flask import Markup
@@ -16,8 +15,11 @@ def tag(filename):
 
     :param filename: The filename of the asset to create a tag for.
 
+    :type filename: str
+
     :return: A string containing a `<source>` tag for JS files, and a `<link>`
         for CSS files.
+    :rtype: str
     """
 
     file_ext = filename.split(".")[-1]
diff --git a/bitshift/codelet.py b/bitshift/codelet.py
index df81294..453ace0 100644
--- a/bitshift/codelet.py
+++ b/bitshift/codelet.py
@@ -1,13 +1,57 @@
 __all__ = ["Codelet"]
 
 class Codelet(object):
-    ## object to store the following (it doesn't need to do anything with it):
-    ## author name, URL, date created/modified, language, source code itself
-    ## for VCS: project name, file in project
-    ## also: list of functions, etc (associations data)
+    """
+    A source-code object with code metadata and composition analysis.
 
-    ## DICTIONARY MAPPING STRINGS REPRESENTING ASSOCIATION TYPE WITH DICTIONARIES
-    ## MAPPING ASSOCIATION NAMES WITH TUPLES REPRESENTING THEIR PLACE IN THE FILE
-    ## STORED AS TWO INTEGERS REPRESENTING THE ROW AND THE COLUMN
+    :ivar name: (str) A suitable name for the codelet.
+    :ivar code: (str) A containing the raw source code.
+    :ivar filename: (str, or None) The filename of the snippet.
+    :ivar language: (int, or None) The inferred language of `code`.
+    :ivar authors: (array of tuples (str, str or None)) An array of tuples
+        containing an author's name and profile URL (on the service the code
+        was pulled from).
+    :ivar code_url: (str) The url of the (page containing the) source code.
+    :ivar date_created: (:class:`datetime.datetime`, or None) The date the code
+        was published.
+    :ivar date_modified: (:class:`datetime.datetime`, or None) The date the
+        code was last modified.
+    :ivar rank: (float) A quanitification of the source code's quality, as
+        per available ratings (stars, forks, upvotes, etc.).
+    """
 
-    ## {"functions": {"foo": (12, 13), "bar": (53, 3)}}
+    def __init__(self, name, code, filename, language, authors, code_url,
+            date_created, date_modified, rank):
+        """
+        Create a Codelet instance.
+
+        :param name: see :attr:`self.name`
+        :param code: see :attr:`self.code`
+        :param filename: see :attr:`self.filename`
+        :param language: see :attr:`self.language`
+        :param authors: see :attr:`self.authors`
+        :param code_url: see :attr:`self.code_url`
+        :param date_created: see :attr:`self.date_created`
+        :param date_modified: see :attr:`self.date_modified`
+        :param rank: see :attr:`self.rank`
+
+        :type name: see :attr:`self.name`
+        :type code: see :attr:`self.code`
+        :type filename: see :attr:`self.filename`
+        :type language: see :attr:`self.language`
+        :type authors: see :attr:`self.authors`
+        :type code_url: see :attr:`self.code_url`
+        :type date_created: see :attr:`self.date_created`
+        :type date_modified: see :attr:`self.date_modified`
+        :type rank: see :attr:`self.rank`
+        """
+
+        self.name = name
+        self.code = code
+        self.filename = filename
+        self.language = language
+        self.authors = authors
+        self.code_url = code_url
+        self.date_created = date_created
+        self.date_modified = date_modified
+        self.rank = rank
diff --git a/bitshift/crawler/__init__.py b/bitshift/crawler/__init__.py
new file mode 100644
index 0000000..73b1c22
--- /dev/null
+++ b/bitshift/crawler/__init__.py
@@ -0,0 +1,55 @@
+"""
+:synopsis: Parent crawler module, which supervises all crawlers.
+
+Contains functions for initializing all subsidiary, threaded crawlers.
+"""
+
+import logging, logging.handlers, os, Queue
+
+from bitshift.crawler import crawler, indexer
+
+__all__ = ["crawl"]
+
+def crawl():
+    """
+    Initialize all crawlers (and indexers).
+
+    Start the:
+    1. GitHub crawler, :class:`crawler.GitHubCrawler`.
+    2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
+    3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
+    """
+
+    _configure_logging()
+
+    MAX_URL_QUEUE_SIZE = 5e3
+
+    repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
+    threads = [crawler.GitHubCrawler(repo_clone_queue),
+            crawler.BitbucketCrawler(repo_clone_queue),
+            indexer.GitIndexer(repo_clone_queue)]
+
+    for thread in threads:
+        thread.start()
+
+def _configure_logging():
+    LOG_FILE_DIR = "log"
+
+    if not os.path.exists(LOG_FILE_DIR):
+        os.mkdir(LOG_FILE_DIR)
+
+    logging.getLogger("requests").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+    formatter = logging.Formatter(
+            fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s"
+            " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
+
+    handler = logging.handlers.TimedRotatingFileHandler(
+            "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1,
+            backupCount=20)
+    handler.setFormatter(formatter)
+
+    root_logger = logging.getLogger()
+    root_logger.addHandler(handler)
+    root_logger.setLevel(logging.NOTSET)
diff --git a/bitshift/crawler/crawler.py b/bitshift/crawler/crawler.py
new file mode 100644
index 0000000..9501bd0
--- /dev/null
+++ b/bitshift/crawler/crawler.py
@@ -0,0 +1,240 @@
+"""
+:synopsis: Main crawler module, to oversee all site-specific crawlers.
+
+Contains all website/framework-specific Class crawlers.
+"""
+
+import logging, requests, time, threading
+
+from bitshift.crawler import indexer
+
+from ..codelet import Codelet
+from ..database import Database
+
+class GitHubCrawler(threading.Thread):
+    """
+    Crawler that retrieves links to all of GitHub's public repositories.
+
+    GitHubCrawler is a threaded singleton that queries GitHub's API for urls
+    to its public repositories, which it inserts into a :class:`Queue.Queue`
+    shared with :class:`indexer.GitIndexer`.
+
+    :ivar clone_queue: (:class:`Queue.Queue`) Contains :class:`GitRepository`
+    with repository metadata retrieved by :class:`GitHubCrawler`, and other Git
+    crawlers, to be processed by :class:`indexer.GitIndexer`.
+    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
+    """
+
+    AUTHENTICATION = {
+        "client_id" : "436cb884ae09be7f2a4e",
+        "client_secret" : "8deeefbc2439409c5b7a092fd086772fe8b1f24e"
+    }
+
+    def __init__(self, clone_queue):
+        """
+        Create an instance of the singleton `GitHubCrawler`.
+
+        :param clone_queue: see :attr:`self.clone_queue`
+
+        :type clone_queue: see :attr:`self.clone_queue`
+        """
+
+        self.clone_queue = clone_queue
+        self._logger = logging.getLogger("%s.%s" %
+                (__name__, self.__class__.__name__))
+        self._logger.info("Starting.")
+        super(GitHubCrawler, self).__init__(name=self.__class__.__name__)
+
+    def run(self):
+        """
+        Query the GitHub API for data about every public repository.
+
+        Pull all of GitHub's repositories by making calls to its API in a loop,
+        accessing a subsequent page of results via the "next" URL returned in an
+        API response header. Uses Severyn Kozak's (sevko) authentication
+        credentials. For every new repository, a :class:`GitRepository` is
+        inserted into :attr:`self.clone_queue`.
+        """
+
+        next_api_url = "https://api.github.com/repositories"
+        api_request_interval = 5e3 / 60 ** 2
+
+        while len(next_api_url) > 0:
+            start_time = time.time()
+
+            try:
+                resp = requests.get(next_api_url, params=self.AUTHENTICATION)
+            except ConnectionError as excep:
+                self._logger.warning("API %s call failed: %s: %s",
+                        next_api_url, excep.__class__.__name__, excep)
+                time.sleep(0.5)
+                continue
+
+            queue_percent_full = (float(self.clone_queue.qsize()) /
+                    self.clone_queue.maxsize) * 100
+            self._logger.info("API call made. Queue size: %d/%d, %d%%." %
+                    ((self.clone_queue.qsize(), self.clone_queue.maxsize,
+                    queue_percent_full)))
+
+            repo_names = [repo["full_name"] for repo in resp.json()]
+            repo_stars = self._get_repositories_stars(repo_names)
+
+            for repo in resp.json():
+                while self.clone_queue.full():
+                    time.sleep(1)
+
+                self.clone_queue.put(indexer.GitRepository(
+                        repo["html_url"], repo["full_name"].replace("/", ""),
+                        "GitHub", repo_stars[repo["full_name"]]))
+
+            if int(resp.headers["x-ratelimit-remaining"]) == 0:
+                time.sleep(int(resp.headers["x-ratelimit-reset"]) -
+                        time.time())
+
+            next_api_url = resp.headers["link"].split(">")[0][1:]
+
+            sleep_time = api_request_interval - (time.time() - start_time)
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+
+    def _get_repositories_stars(self, repo_names):
+        """
+        Return the number of stargazers for several repositories.
+
+        Queries the GitHub API for the number of stargazers for any given
+        repositories, and blocks if the query limit is exceeded.
+
+        :param repo_names: An array of repository names, in
+            `username/repository_name` format.
+
+        :type repo_names: str
+
+        :return: A dictionary with repository name keys, and corresponding
+            stargazer count values.
+
+            Example dictionary:
+            .. code-block:: python
+                {
+                    "user/repository" : 100
+                }
+
+        :rtype: dictionary
+        """
+
+        API_URL = "https://api.github.com/search/repositories"
+        REPOS_PER_QUERY = 25
+
+        repo_stars = {}
+        for names in [repo_names[ind:ind + REPOS_PER_QUERY] for ind in
+                xrange(0, len(repo_names), REPOS_PER_QUERY)]:
+            query_url = "%s?q=%s" % (API_URL,
+                "+".join("repo:%s" % name for name in names))
+
+            params = self.AUTHENTICATION
+            resp = requests.get(query_url,
+                    params=params,
+                    headers={
+                        "Accept" : "application/vnd.github.preview"
+                    })
+
+            if int(resp.headers["x-ratelimit-remaining"]) == 0:
+                sleep_time = int(resp.headers["x-ratelimit-reset"]) - \
+                        time.time() + 1
+                if sleep_time > 0:
+                    logging.info("API quota exceeded. Sleep time: %d." %
+                            sleep_time)
+                    time.sleep(sleep_time)
+
+            for repo in resp.json()["items"]:
+                rank = float(repo["stargazers_count"]) / 1000
+                repo_stars[repo["full_name"]] = rank if rank < 1.0 else 1.0
+
+        for name in repo_names:
+            if name not in repo_stars:
+                repo_stars[name] = 0.5
+
+        return repo_stars
+
+class BitbucketCrawler(threading.Thread):
+    """
+    Crawler that retrieves links to all of Bitbucket's public repositories.
+
+    BitbucketCrawler is a threaded singleton that queries Bitbucket's API for
+    urls to its public repositories, and inserts them as
+    :class:`indexer.GitRepository` into a :class:`Queue.Queue` shared with
+    :class:`indexer.GitIndexer`.
+
+    :ivar clone_queue: (:class:`Queue.Queue`) The shared queue to insert
+        :class:`indexer.GitRepository` repository urls into.
+    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
+    """
+
+    def __init__(self, clone_queue):
+        """
+        Create an instance of the singleton `BitbucketCrawler`.
+
+        :param clone_queue: see :attr:`self.clone_queue`
+
+        :type clone_queue: see :attr:`self.clone_queue`
+        """
+
+        self.clone_queue = clone_queue
+        self._logger = logging.getLogger("%s.%s" %
+                (__name__, self.__class__.__name__))
+        self._logger.info("Starting.")
+        super(BitbucketCrawler, self).__init__(name=self.__class__.__name__)
+
+    def run(self):
+        """
+        Query  the Bitbucket API for data about every public repository.
+
+        Query the Bitbucket API's "/repositories" endpoint and read its
+        paginated responses in a loop; any "git" repositories have their
+        clone-urls and names inserted into a :class:`indexer.GitRepository` in
+        :attr:`self.clone_queue`.
+        """
+
+        next_api_url = "https://api.bitbucket.org/2.0/repositories"
+
+        while True:
+            try:
+                response = requests.get(next_api_url).json()
+            except ConnectionError as exception:
+                time.sleep(0.5)
+                self._logger.warning("API %s call failed: %s: %s",
+                        next_api_url, excep.__class__.__name__, excep)
+                continue
+
+            queue_percent_full = (float(self.clone_queue.qsize()) /
+                    self.clone_queue.maxsize) * 100
+            self._logger.info("API call made. Queue size: %d/%d, %d%%." %
+                    ((self.clone_queue.qsize(), self.clone_queue.maxsize,
+                    queue_percent_full)))
+
+            for repo in response["values"]:
+                if repo["scm"] == "git":
+                    while self.clone_queue.full():
+                        time.sleep(1)
+
+                    clone_links = repo["links"]["clone"]
+                    clone_url = (clone_links[0]["href"] if
+                            clone_links[0]["name"] == "https" else
+                            clone_links[1]["href"])
+                    links.append("clone_url")
+
+                    try:
+                        watchers = requests.get(
+                                repo["links"]["watchers"]["href"])
+                        rank = len(watchers.json()["values"]) / 100
+                    except ConnectionError as exception:
+                        time.sleep(0.5)
+                        self._logger.warning("API %s call failed: %s: %s",
+                                next_api_url, excep.__class__.__name__, excep)
+                        continue
+
+                    self.clone_queue.put(indexer.GitRepository(
+                        clone_url, repo["full_name"], "Bitbucket"),
+                        rank if rank < 1.0 else 1.0)
+
+            next_api_url = response["next"]
+            time.sleep(0.2)
diff --git a/bitshift/crawler/indexer.py b/bitshift/crawler/indexer.py
new file mode 100644
index 0000000..c1c77ad
--- /dev/null
+++ b/bitshift/crawler/indexer.py
@@ -0,0 +1,489 @@
+"""
+:synopsis: Contains a singleton GitIndexer class, which clones and indexes git
+    repositories.
+"""
+
+import bs4, datetime, logging, os, Queue, re, shutil, string, subprocess, time,\
+        threading
+
+from ..database import Database
+from ..codelet import Codelet
+
+GIT_CLONE_DIR = "/tmp/bitshift"
+THREAD_QUEUE_SLEEP = 0.5
+
+class GitRepository(object):
+    """
+    A representation of a Git repository's metadata.
+
+    :ivar url: (str) The repository's url.
+    :ivar name: (str) The name of the repository.
+    :ivar framework_name: (str) The name of the online Git framework that the
+        repository belongs to (eg, GitHub, BitBucket).
+    :ivar rank: (float) The rank of the repository, as assigned by
+        :class:`crawler.GitHubCrawler`.
+    """
+
+    def __init__(self, url, name, framework_name, rank):
+        """
+        Create a GitRepository instance.
+
+        :param url: see :attr:`GitRepository.url`
+        :param name: see :attr:`GitRepository.name`
+        :param framework_name: see :attr:`GitRepository.framework_name`
+        :param rank: see :attr:`GitRepository.rank`
+
+        :type url: str
+        :type name: str
+        :type framework_name: str
+        :type rank: float
+        """
+
+        self.url = url
+        self.name = name
+        self.framework_name = framework_name
+        self.rank = rank
+
+class GitIndexer(threading.Thread):
+    """
+    A singleton Git repository indexer.
+
+    :class:`GitIndexer` indexes the repositories cloned by the
+    :class:`_GitCloner` singleton.
+
+    :ivar index_queue: (:class:`Queue.Queue`) A queue containing
+        :class:`GitRepository` objects for every new repository succesfully
+        cloned by :class:`_GitCloner`, which are to be indexed.
+    :ivar git_cloner: (:class:`_GitCloner`) The corresponding repository cloner,
+        which feeds :class:`GitIndexer`.
+    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
+    """
+
+    def __init__(self, clone_queue):
+        """
+        Create an instance of the singleton `GitIndexer`.
+
+        :param clone_queue: see :attr:`self.index_queue`
+
+        :type index_queue: see :attr:`self.index_queue`
+        """
+
+        MAX_INDEX_QUEUE_SIZE = 10
+
+        self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE)
+        self.git_cloner = _GitCloner(clone_queue, self.index_queue)
+        self.git_cloner.start()
+        self._logger = logging.getLogger("%s.%s" %
+                (__name__, self.__class__.__name__))
+        self._logger.info("Starting.")
+
+        if not os.path.exists(GIT_CLONE_DIR):
+            os.makedirs(GIT_CLONE_DIR)
+
+        super(GitIndexer, self).__init__(name=self.__class__.__name__)
+
+    def run(self):
+        """
+        Retrieve metadata about newly cloned repositories and index them.
+
+        Blocks until new repositories appear in :attr:`self.index_queue`, then
+        retrieves one, and attempts indexing it. Should any errors occur, the
+        new repository will be discarded and the indexer will index the next in
+        the queue.
+        """
+
+        while True:
+            while self.index_queue.empty():
+                time.sleep(THREAD_QUEUE_SLEEP)
+
+            repo = self.index_queue.get()
+            self.index_queue.task_done()
+            try:
+                self._index_repository(repo)
+            except Exception as excep:
+                self._logger.warning("%s: %s.", excep.__class__.__name__, excep)
+
+    def _index_repository(self, repo):
+        """
+        Clone and index (create and insert Codeletes for) a Git repository.
+
+        `git clone` the Git repository located at **repo.url**, call
+        `_insert_repository_codelets()`, then remove said repository.
+
+        :param repo_url: The metadata of the repository to be indexed.
+
+        :type repo_url: :class:`GitRepository`
+        """
+
+        with _ChangeDir("%s/%s" % (GIT_CLONE_DIR, repo.name)) as repository_dir:
+            try:
+                self._insert_repository_codelets(repo)
+            except Exception as excep:
+                self._logger.warning("%s: %s.", excep.__class__.__name__, excep)
+
+        if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)):
+            shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name))
+
+    def _insert_repository_codelets(self, repo):
+        """
+        Create and insert a Codelet for the files inside a Git repository.
+
+        Create a new Codelet, and insert it into the Database singleton, for
+        every file inside the current working directory's default branch
+        (usually *master*).
+
+        :param repo_url: The metadata of the repository to be indexed.
+
+        :type repo_url: :class:`GitRepository`
+        """
+
+        commits_meta = self._get_commits_metadata()
+        if commits_meta is None:
+            return
+
+        for filename in commits_meta.keys():
+            try:
+                with open(filename) as source_file:
+                    source = self._decode(source_file.read())
+                    if source is None:
+                        continue
+            except IOError as exception:
+                continue
+
+            authors = [(self._decode(author), None) for author in \
+                    commits_meta[filename]["authors"]]
+            codelet = Codelet("%s:%s" % (repo.name, filename), source, filename,
+                            None, authors, self._generate_file_url(filename,
+                                    repo.url, repo.framework_name),
+                            commits_meta[filename]["time_created"],
+                            commits_meta[filename]["time_last_modified"],
+                            repo.rank)
+
+    def _generate_file_url(self, filename, repo_url, framework_name):
+        """
+        Return a url for a filename from a Git wrapper framework.
+
+        :param filename: The path of the file.
+        :param repo_url: The url of the file's parent repository.
+        :param framework_name: The name of the framework the repository is from.
+
+        :type filename: str
+        :type repo_url: str
+        :type framework_name: str
+
+        :return: The file's full url on the given framework, if successfully
+            derived.
+        :rtype: str, or None
+
+        .. warning::
+            Various Git subprocesses will occasionally fail, and, seeing as the
+            information they provide is a crucial component of some repository file
+            urls, None may be returned.
+        """
+
+        try:
+            if framework_name == "GitHub":
+                    default_branch = subprocess.check_output("git branch"
+                            " --no-color", shell=True)[2:-1]
+                    return ("%s/blob/%s/%s" % (repo_url, default_branch,
+                            filename)).replace("//", "/")
+            elif framework_name == "Bitbucket":
+                    commit_hash = subprocess.check_output("git rev-parse HEAD",
+                            shell=True).replace("\n", "")
+                    return ("%s/src/%s/%s" % (repo_url, commit_hash,
+                            filename)).replace("//", "/")
+        except subprocess.CalledProcessError as exception:
+            return None
+
+    def _get_git_commits(self):
+        """
+        Return the current working directory's formatted commit data.
+
+        Uses `git log` to generate metadata about every single file in the
+        repository's commit history.
+
+        :return: The author, timestamp, and names of all modified files of every
+            commit.
+            .. code-block:: python
+               sample_returned_array = [
+                   {
+                       "author" : (str) "author"
+                       "timestamp" : (`datetime.datetime`) <object>,
+                       "filenames" : (str array) ["file1", "file2"]
+                   }
+               ]
+        :rtype: array of dictionaries
+        """
+
+        git_log = subprocess.check_output(("git --no-pager log --name-only"
+                " --pretty=format:'%n%n%an%n%at' -z"), shell=True)
+
+        commits = []
+        for commit in git_log.split("\n\n"):
+            fields = commit.split("\n")
+            if len(fields) > 2:
+                commits.append({
+                    "author" : fields[0],
+                    "timestamp" : datetime.datetime.fromtimestamp(int(fields[1])),
+                    "filenames" : fields[2].split("\x00")[:-2]
+                })
+
+        return commits
+
+    def _get_tracked_files(self):
+        """
+        Return a list of the filenames of all valuable files in the Git repository.
+
+        Get a list of the filenames of the non-binary (Perl heuristics used for
+        filetype identification) files currently inside the current working
+        directory's Git repository. Then, weed out any boilerplate/non-code files
+        that match the regex rules in GIT_IGNORE_FILES.
+
+        :return: The filenames of all index-worthy non-binary files.
+        :rtype: str array
+        """
+
+        files = []
+        for dirname, subdir_names, filenames in os.walk("."):
+            for filename in filenames:
+                path = os.path.join(dirname, filename)
+                if self._is_ascii(path):
+                    files.append(path[2:])
+
+        return files
+
+    def _get_commits_metadata(self):
+        """
+        Return a dictionary containing every valuable tracked file's metadata.
+
+        :return: A dictionary with author names, time of creation, and time of last
+            modification for every filename key.
+            .. code-block:: python
+                   sample_returned_dict = {
+                       "my_file" : {
+                           "authors" : (str array) ["author1", "author2"],
+                           "time_created" : (`datetime.datetime`) <object>,
+                           "time_last_modified" : (`datetime.datetime`) <object>
+                       }
+                   }
+        :rtype: dictionary of dictionaries
+        """
+
+        commits = self._get_git_commits()
+        tracked_files = self._get_tracked_files()
+
+        files_meta = {}
+        for commit in commits:
+            for filename in commit["filenames"]:
+                if filename not in tracked_files:
+                    continue
+
+                if filename not in files_meta.keys():
+                    files_meta[filename] = {
+                        "authors" : [commit["author"]],
+                        "time_last_modified" : commit["timestamp"],
+                        "time_created" : commit["timestamp"]
+                    }
+                else:
+                    if commit["author"] not in files_meta[filename]["authors"]:
+                        files_meta[filename]["authors"].append(commit["author"])
+                    files_meta[filename]["time_created"] = commit["timestamp"]
+
+        return files_meta
+
+    def _decode(self, raw):
+        """
+        Return a decoded a raw string.
+
+        :param raw: The string to string.
+
+        :type raw: (str)
+
+        :return: If the original encoding is successfully inferenced, return the
+            decoded string.
+        :rtype: str, or None
+
+        .. warning::
+            The raw string's original encoding is identified by heuristics which
+            can, and occasionally will, fail. Decoding will then fail, and None
+            will be returned.
+        """
+
+        try:
+            encoding = bs4.BeautifulSoup(raw).original_encoding
+            return raw.decode(encoding) if encoding is not None else None
+
+        except (LookupError, UnicodeDecodeError, UserWarning) as exception:
+            return None
+
+    def _is_ascii(self, filename):
+        """
+        Heuristically determine whether a file is ASCII text or binary.
+
+        If a portion of the file contains null bytes, or the percentage of bytes
+        that aren't ASCII is greater than 30%, then the file is concluded to be
+        binary. This heuristic is used by the `file` utility, Perl's inbuilt `-T`
+        operator, and is the de-facto method for in : passdetermining whether a
+        file is ASCII.
+
+        :param filename: The path of the file to test.
+
+        :type filename: str
+
+        :return: Whether the file is probably ASCII.
+        :rtype: Boolean
+        """
+
+        try:
+            with open(filename) as source:
+                file_snippet = source.read(512)
+
+                if not file_snippet:
+                    return True
+
+                ascii_characters = "".join(map(chr, range(32, 127)) +
+                        list("\n\r\t\b"))
+                null_trans = string.maketrans("", "")
+
+                if "\0" in file_snippet:
+                    return False
+
+                non_ascii = file_snippet.translate(null_trans, ascii_characters)
+                return not float(len(non_ascii)) / len(file_snippet) > 0.30
+
+        except IOError as exception:
+            return False
+
+class _GitCloner(threading.Thread):
+    """
+    A singleton Git repository cloner.
+
+    Clones the repositories crawled by :class:`crawler.GitHubCrawler` for
+    :class:`GitIndexer` to index.
+
+    :ivar clone_queue: (:class:`Queue.Queue`) see
+        :attr:`crawler.GitHubCrawler.clone_queue`.
+    :ivar index_queue: (:class:`Queue.Queue`) see
+        :attr:`GitIndexer.index_queue`.
+    :ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
+    """
+
+    def __init__(self, clone_queue, index_queue):
+        """
+        Create an instance of the singleton :class:`_GitCloner`.
+
+        :param clone_queue: see :attr:`self.clone_queue`
+        :param index_queue: see :attr:`self.index_queue`
+
+        :type clone_queue: see :attr:`self.clone_queue`
+        :type index_queue: see :attr:`self.index_queue`
+        """
+
+        self.clone_queue = clone_queue
+        self.index_queue = index_queue
+        self._logger = logging.getLogger("%s.%s" %
+                (__name__, self.__class__.__name__))
+        self._logger.info("Starting.")
+        super(_GitCloner, self).__init__(name=self.__class__.__name__)
+
+    def run(self):
+        """
+        Retrieve metadata about newly crawled repositories and clone them.
+
+        Blocks until new :class:`GitRepository` appear in
+        :attr:`self.clone_queue`, then attempts cloning them. If
+        succcessful, the cloned repository is added to :attr:`self.index_queue`
+        for the `GitIndexer` to clone; otherwise, it is discarded.
+        """
+
+        while True:
+            while self.clone_queue.empty():
+                time.sleep(THREAD_QUEUE_SLEEP)
+            repo = self.clone_queue.get()
+            self.clone_queue.task_done()
+
+            try:
+                self._clone_repository(repo)
+            except Exception as exception:
+                pass
+
+    def _clone_repository(self, repo):
+        """
+        Attempt cloning a Git repository.
+
+        :param repo: Metadata about the repository to clone.
+
+        :type repo: :class:`GitRepository`
+        """
+
+        GIT_CLONE_TIMEOUT = 500
+
+        queue_percent_full = (float(self.index_queue.qsize()) /
+                self.index_queue.maxsize) * 100
+
+        exit_code = None
+        command = ("perl -e 'alarm shift @ARGV; exec @ARGV' %d git clone"
+        " --single-branch %s %s/%s || pkill -f git")
+
+        command_attempt = 0
+        while exit_code is None:
+            try:
+                exit_code = subprocess.call(command % (GIT_CLONE_TIMEOUT,
+                        repo.url, GIT_CLONE_DIR, repo.name), shell=True)
+            except Exception as exception:
+                time.sleep(1)
+                command_attempt += 1
+                if command_attempt == 20:
+                    break
+                else:
+                    continue
+            else:
+                break
+
+        if exit_code != 0:
+            if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)):
+                shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name))
+            return
+
+        while self.index_queue.full():
+            time.sleep(THREAD_QUEUE_SLEEP)
+
+        self.index_queue.put(repo)
+
+class _ChangeDir(object):
+    """
+    A wrapper class for os.chdir(), to map onto `with` and handle exceptions.
+
+    :ivar new_path: (str) The path to change the current directory to.
+    :ivar old_path: (str) The path of the directory to return to.
+    """
+
+    def __init__(self, new_path):
+        """
+        Create a _ChangeDir instance.
+
+        :param new_path: The directory to enter.
+
+        :type new_path: str
+        """
+
+        self.new_path = new_path
+
+    def __enter__(self):
+        """
+        Change the current working-directory to **new_path**.
+        """
+
+        self.old_path = os.getcwd()
+        os.chdir(self.new_path)
+
+    def __exit__(self, *exception):
+        """
+        Change the current working-directory to **old_path**.
+
+        :param exception: Various exception arguments passed by `with`.
+
+        :type exception: varargs
+        """
+
+        os.chdir(self.old_path)
diff --git a/bitshift/database.py b/bitshift/database.py
deleted file mode 100644
index b8995ee..0000000
--- a/bitshift/database.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-Module with classes and functions to handle communication with the MySQL
-database backend, which manages the search index.
-"""
-
-import oursql
-
-class Database(object):
-    """Represents the MySQL database."""
-
-    def __init__(self):
-        pass
-
-    def _connect(self):
-        pass
-
-    def _create(self):
-        pass
diff --git a/bitshift/database/__init__.py b/bitshift/database/__init__.py
new file mode 100644
index 0000000..75f39da
--- /dev/null
+++ b/bitshift/database/__init__.py
@@ -0,0 +1,153 @@
+"""
+Subpackage with classes and functions to handle communication with the MySQL
+database backend, which manages the search index.
+"""
+
+import os
+
+import mmh3
+import oursql
+
+from .migration import VERSION, MIGRATIONS
+
+__all__ = ["Database"]
+
+class Database(object):
+    """Represents the MySQL database."""
+
+    def __init__(self, migrate=False):
+        self._conn = self._connect()
+        self._check_version(migrate)
+
+    def _connect(self):
+        """Establish a connection to the database."""
+        root = os.path.dirname(os.path.abspath(__file__))
+        default_file = os.path.join(root, ".my.cnf")
+        return oursql.connect(db="bitshift", read_default_file=default_file,
+                              autoping=True, autoreconnect=True)
+
+    def _migrate(self, cursor, current):
+        """Migrate the database to the latest schema version."""
+        for version in xrange(current, VERSION):
+            print "Migrating to %d..." % version + 1
+            for query in MIGRATIONS[version - 1]:
+                cursor.execute(query)
+            cursor.execute("UPDATE version SET version = ?", (version + 1,))
+
+    def _check_version(self, migrate):
+        """Check the database schema version and respond accordingly.
+
+        If the schema is out of date, migrate if *migrate* is True, else raise
+        an exception.
+        """
+        with self._conn.cursor() as cursor:
+            cursor.execute("SELECT version FROM version")
+            version = cursor.fetchone()[0]
+            if version < VERSION:
+                if migrate:
+                    self._migrate(cursor, version)
+                else:
+                    err = "Database schema out of date. " \
+                          "Run `python -m bitshift.database.migration`."
+                    raise RuntimeError(err)
+
+    def _get_codelets_from_ids(self, cursor, ids):
+        """Return a list of Codelet objects given a list of codelet IDs."""
+        raise NotImplementedError()  ## TODO
+
+    def _decompose_url(self, cursor, url):
+        """Break up a URL into an origin (with a URL base) and a suffix."""
+        query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base))
+                   FROM origins
+                   WHERE origin_url_base IS NOT NULL
+                   AND ? LIKE CONCAT(origin_url_base, "%")"""
+
+        cursor.execute(query, (url, url))
+        result = cursor.fetchone()
+        return result if result else (1, url)
+
+    def _insert_symbols(self, cursor, code_id, sym_type, symbols):
+        """Insert a list of symbols of a given type into the database."""
+        sym_types = ["functions", "classes", "variables"]
+        query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)"
+        query2 = """INSERT INTO symbol_locations VALUES
+                    (DEFAULT, ?, ?, ?, ?, ?, ?)"""
+
+        for (name, decls, uses) in symbols:
+            cursor.execute(query1, (code_id, sym_types.index(sym_type), name))
+            sym_id = cursor.lastrowid
+            params = ([tuple([sym_id, 0] + list(loc)) for loc in decls] +
+                      [tuple([sym_id, 1] + list(loc)) for loc in uses])
+            cursor.executemany(query2, params)
+
+    def close(self):
+        """Disconnect from the database."""
+        self._conn.close()
+
+    def search(self, query, page=1):
+        """
+        Search the database for a query and return the *n*\ th page of results.
+
+        :param query: The query to search for.
+        :type query: :py:class:`~.query.tree.Tree`
+        :param page: The result page to display.
+        :type page: int
+
+        :return: The total number of results, and the *n*\ th page of results.
+        :rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s)
+        """
+        query1 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp
+                    FROM cache
+                    INNER JOIN cache_data ON cache_id = cdata_cache
+                    WHERE cache_id = ?"""
+        query2 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)"
+        query3 = "INSERT INTO cache_data VALUES (?, ?)"
+
+        cache_id = mmh3.hash64(str(page) + ":" + query.serialize())[0]
+
+        with self._conn.cursor() as cursor:
+            cursor.execute(query1, (cache_id,))
+            results = cursor.fetchall()
+            if results:  # Cache hit
+                num_results = results[0][1] * (10 ** results[0][2])
+                ids = [res[0] for res in results]
+            else:  # Cache miss
+                ## TODO: build and execute search query
+                results = cursor.fetchall()
+                ids = NotImplemented  ## TODO: extract ids from results
+                num_results = NotImplemented  ## TODO: num if results else 0
+                num_exp = max(len(str(num_results)) - 3, 0)
+                num_results = int(round(num_results, -num_exp))
+                num_mnt = num_results / (10 ** num_exp)
+                cursor.execute(query2, (cache_id, num_mnt, num_exp))
+                cursor.executemany(query3, [(cache_id, c_id) for c_id in ids])
+            return (num_results, self._get_codelets_from_ids(cursor, ids))
+
+    def insert(self, codelet):
+        """
+        Insert a codelet into the database.
+
+        :param codelet: The codelet to insert.
+        :type codelet: :py:class:`.Codelet`
+        """
+        query1 = """INSERT INTO code VALUES (?, ?, ?)
+                    ON DUPLICATE KEY UPDATE code_id=code_id"""
+        query2 = """INSERT INTO codelets VALUES
+                    (DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
+        query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"
+
+        hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
+        code_id = mmh3.hash64(hash_key)[0]
+
+        with self._conn.cursor() as cursor:
+            cursor.execute(query1, (code_id, codelet.language, codelet.code))
+            if cursor.rowcount == 1:
+                for sym_type, symbols in codelet.symbols.iteritems():
+                    self._insert_symbols(cursor, code_id, sym_type, symbols)
+            origin, url = self._decompose_url(cursor, codelet.url)
+            cursor.execute(query2, (codelet.name, code_id, origin, url,
+                                    codelet.rank, codelet.date_created,
+                                    codelet.date_modified))
+            codelet_id = cursor.lastrowid
+            authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]
+            cursor.executemany(query3, authors)
diff --git a/bitshift/database/migration.py b/bitshift/database/migration.py
new file mode 100644
index 0000000..24f744a
--- /dev/null
+++ b/bitshift/database/migration.py
@@ -0,0 +1,97 @@
+"""
+Contains information about database schema versions, and SQL queries to update
+between them.
+"""
+
+VERSION = 6
+
+MIGRATIONS = [
+    # 1 -> 2
+    [
+        """ALTER TABLE `codelets`
+           DROP FOREIGN KEY `codelets_ibfk_1`""",
+        """ALTER TABLE `code`
+           DROP KEY `code_hash`,
+           DROP COLUMN `code_hash`,
+           MODIFY COLUMN `code_id` BIGINT NOT NULL""",
+        """ALTER TABLE `codelets`
+           MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL,
+           ADD KEY (`codelet_lang`),
+           ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`)
+               REFERENCES `code` (`code_id`)
+               ON DELETE RESTRICT ON UPDATE CASCADE""",
+        """ALTER TABLE `symbols`
+           ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL,
+           ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL"""
+    ],
+    # 2 -> 3
+    [
+        """ALTER TABLE `symbols`
+           DROP FOREIGN KEY `symbols_ibfk_1`,
+           CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL,
+           ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`)
+               REFERENCES `code` (`code_id`)
+               ON DELETE CASCADE ON UPDATE CASCADE"""
+    ],
+    # 3 -> 4
+    [
+        """ALTER TABLE `symbols`
+           DROP COLUMN `symbol_row`,
+           DROP COLUMN `symbol_col`,
+           DROP COLUMN `symbol_end_row`,
+           DROP COLUMN `symbol_end_col`""",
+        """CREATE TABLE `symbol_locations` (
+           `sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+           `sloc_symbol` BIGINT UNSIGNED NOT NULL,
+           `sloc_type` TINYINT UNSIGNED NOT NULL,
+           `sloc_row` INT UNSIGNED NOT NULL,
+           `sloc_col` INT UNSIGNED NOT NULL,
+           `sloc_end_row` INT UNSIGNED NOT NULL,
+           `sloc_end_col` INT UNSIGNED NOT NULL,
+           PRIMARY KEY (`sloc_id`),
+           FOREIGN KEY (`sloc_symbol`)
+               REFERENCES `symbols` (`symbol_id`)
+               ON DELETE CASCADE ON UPDATE CASCADE
+           ) ENGINE=InnoDB"""
+    ],
+    # 4 -> 5
+    [
+        """ALTER TABLE `origins`
+           MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL,
+           MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL,
+           MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL"""
+    ],
+    # 5 -> 6
+    [
+        """ALTER TABLE `code`
+           ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL
+               AFTER `code_id`,
+           ADD KEY (`code_lang`)""",
+        """ALTER TABLE `codelets`
+           DROP KEY `codelet_lang`,
+           DROP COLUMN `codelet_lang`""",
+        """ALTER TABLE `cache_data`
+           DROP FOREIGN KEY `cache_data_ibfk_1`""",
+        """ALTER TABLE `cache`
+           MODIFY COLUMN `cache_id` BIGINT NOT NULL,
+           DROP COLUMN `cache_hash`,
+           DROP COLUMN `cache_last_used`,
+           MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""",
+        """ALTER TABLE `cache_data`
+           MODIFY COLUMN `cdata_cache` BIGINT NOT NULL,
+           ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
+           ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`)
+               REFERENCES `codelets` (`codelet_id`)
+               ON DELETE CASCADE ON UPDATE CASCADE""",
+        """CREATE EVENT `flush_cache`
+           ON SCHEDULE EVERY 1 HOUR
+           DO
+               DELETE FROM `cache`
+                   WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);"""
+    ]
+]
+
+if __name__ == "__main__":
+    from . import Database
+
+    Database(migrate=True).close()
diff --git a/bitshift/database/schema.sql b/bitshift/database/schema.sql
new file mode 100644
index 0000000..8634416
--- /dev/null
+++ b/bitshift/database/schema.sql
@@ -0,0 +1,114 @@
+-- Schema version 6
+
+CREATE DATABASE `bitshift` DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
+USE `bitshift`;
+
+CREATE TABLE `version` (
+    `version` INT UNSIGNED NOT NULL
+) ENGINE=InnoDB;
+INSERT INTO `version` VALUES (6);
+
+CREATE TABLE `origins` (
+    `origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT,
+    `origin_name` VARCHAR(64) DEFAULT NULL,
+    `origin_url` VARCHAR(512) DEFAULT NULL,
+    `origin_url_base` VARCHAR(512) DEFAULT NULL,
+    `origin_image` BLOB DEFAULT NULL,
+    PRIMARY KEY (`origin_id`)
+) ENGINE=InnoDB;
+INSERT INTO `origins` VALUES (1, NULL, NULL, NULL, NULL);
+
+CREATE TABLE `code` (
+    `code_id` BIGINT NOT NULL,
+    `code_lang` SMALLINT UNSIGNED DEFAULT NULL,
+    `code_code` MEDIUMTEXT NOT NULL,
+    PRIMARY KEY (`code_id`),
+    KEY (`code_lang`),
+    FULLTEXT KEY (`code_code`)
+) ENGINE=InnoDB;
+
+CREATE TABLE `codelets` (
+    `codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+    `codelet_name` VARCHAR(300) NOT NULL,
+    `codelet_code_id` BIGINT NOT NULL,
+    `codelet_origin` TINYINT UNSIGNED NOT NULL,
+    `codelet_url` VARCHAR(512) NOT NULL,
+    `codelet_rank` FLOAT NOT NULL,
+    `codelet_date_created` DATETIME DEFAULT NULL,
+    `codelet_date_modified` DATETIME DEFAULT NULL,
+    PRIMARY KEY (`codelet_id`),
+    FULLTEXT KEY (`codelet_name`),
+    KEY (`codelet_rank`),
+    KEY (`codelet_date_created`),
+    KEY (`codelet_date_modified`),
+    FOREIGN KEY (`codelet_code_id`)
+        REFERENCES `code` (`code_id`)
+        ON DELETE RESTRICT ON UPDATE CASCADE,
+    FOREIGN KEY (`codelet_origin`)
+        REFERENCES `origins` (`origin_id`)
+        ON DELETE RESTRICT ON UPDATE CASCADE
+) ENGINE=InnoDB;
+
+CREATE TABLE `authors` (
+    `author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+    `author_codelet` BIGINT UNSIGNED NOT NULL,
+    `author_name` VARCHAR(128) NOT NULL,
+    `author_url` VARCHAR(512) DEFAULT NULL,
+    PRIMARY KEY (`author_id`),
+    FULLTEXT KEY (`author_name`),
+    FOREIGN KEY (`author_codelet`)
+        REFERENCES `codelets` (`codelet_id`)
+        ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB;
+
+CREATE TABLE `symbols` (
+    `symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+    `symbol_code` BIGINT NOT NULL,
+    `symbol_type` TINYINT UNSIGNED NOT NULL,
+    `symbol_name` VARCHAR(512) NOT NULL,
+    PRIMARY KEY (`symbol_id`),
+    KEY (`symbol_type`, `symbol_name`(32)),
+    FOREIGN KEY (`symbol_code`)
+        REFERENCES `code` (`code_id`)
+        ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB;
+
+CREATE TABLE `symbol_locations` (
+    `sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+    `sloc_symbol` BIGINT UNSIGNED NOT NULL,
+    `sloc_type` TINYINT UNSIGNED NOT NULL,
+    `sloc_row` INT UNSIGNED NOT NULL,
+    `sloc_col` INT UNSIGNED NOT NULL,
+    `sloc_end_row` INT UNSIGNED NOT NULL,
+    `sloc_end_col` INT UNSIGNED NOT NULL,
+    PRIMARY KEY (`sloc_id`),
+    FOREIGN KEY (`sloc_symbol`)
+        REFERENCES `symbols` (`symbol_id`)
+        ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB;
+
+CREATE TABLE `cache` (
+    `cache_id` BIGINT NOT NULL,
+    `cache_count_mnt` SMALLINT UNSIGNED NOT NULL,
+    `cache_count_exp` TINYINT UNSIGNED NOT NULL,
+    `cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    PRIMARY KEY (`cache_id`)
+) ENGINE=InnoDB;
+
+CREATE TABLE `cache_data` (
+    `cdata_cache` BIGINT NOT NULL,
+    `cdata_codelet` BIGINT UNSIGNED NOT NULL,
+    PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
+    FOREIGN KEY (`cdata_cache`)
+        REFERENCES `cache` (`cache_id`)
+        ON DELETE CASCADE ON UPDATE CASCADE,
+    FOREIGN KEY (`cdata_codelet`)
+        REFERENCES `codelets` (`codelet_id`)
+        ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=InnoDB;
+
+CREATE EVENT `flush_cache`
+    ON SCHEDULE EVERY 1 HOUR
+    DO
+        DELETE FROM `cache`
+            WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);
diff --git a/bitshift/query/__init__.py b/bitshift/query/__init__.py
index bc70cde..5498b62 100644
--- a/bitshift/query/__init__.py
+++ b/bitshift/query/__init__.py
@@ -22,4 +22,6 @@ def parse_query(query):
 
 
     # gets a string, returns a Tree
+    # TODO: note: resultant Trees should be normalized so that "foo OR bar"
+    # and "bar OR foo" result in equivalent trees
     pass
diff --git a/docs/source/api/bitshift.query.rst b/docs/source/api/bitshift.query.rst
new file mode 100644
index 0000000..35b39a6
--- /dev/null
+++ b/docs/source/api/bitshift.query.rst
@@ -0,0 +1,11 @@
+query Package
+=============
+
+:mod:`query` Package
+--------------------
+
+.. automodule:: bitshift.query
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/docs/source/api/bitshift.rst b/docs/source/api/bitshift.rst
index a5f0898..1b1c703 100644
--- a/docs/source/api/bitshift.rst
+++ b/docs/source/api/bitshift.rst
@@ -1,30 +1,51 @@
-bitshift package
+bitshift Package
 ================
 
-Submodules
-----------
+:mod:`bitshift` Package
+-----------------------
 
-bitshift.assets module
-----------------------
+.. automodule:: bitshift.__init__
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`assets` Module
+--------------------
 
 .. automodule:: bitshift.assets
     :members:
     :undoc-members:
     :show-inheritance:
 
-bitshift.config module
-----------------------
+:mod:`codelet` Module
+---------------------
 
-.. automodule:: bitshift.config
+.. automodule:: bitshift.codelet
     :members:
     :undoc-members:
     :show-inheritance:
 
+:mod:`config` Module
+--------------------
 
-Module contents
----------------
+.. automodule:: bitshift.config
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`database` Module
+----------------------
 
-.. automodule:: bitshift
+.. automodule:: bitshift.database
     :members:
     :undoc-members:
     :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+
+    bitshift.parser
+    bitshift.query
+
diff --git a/setup.py b/setup.py
index 0ec5f77..47508e9 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,9 @@ setup(
     name = "bitshift",
     version = "0.1",
     packages = find_packages(),
-    install_requires = ["Flask>=0.10.1", "pygments>=1.6"],
+    install_requires = [
+        "Flask>=0.10.1", "pygments>=1.6", "requests>=2.2.0",
+        "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3"],
     author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
     license = "MIT",
     url = "https://github.com/earwig/bitshift"