Browse Source

Merge branch 'develop'. Version 1.0.

Merge the latest version of `develop`: bitshift v1.0 (beta).
tags/v1.0^0
Severyn Kozak 10 years ago
parent
commit
061454a9c4
100 changed files with 6957 additions and 38 deletions
  1. +18
    -1
      .gitignore
  2. +2
    -2
      LICENSE
  3. +41
    -1
      README.md
  4. +53
    -7
      app.py
  5. +8
    -1
      bitshift/__init__.py
  6. +31
    -7
      bitshift/assets.py
  7. +103
    -0
      bitshift/codelet.py
  8. +0
    -0
     
  9. +94
    -0
      bitshift/crawler/crawl.py
  10. +243
    -0
      bitshift/crawler/crawler.py
  11. +348
    -0
      bitshift/crawler/indexer.py
  12. +239
    -0
      bitshift/database/__init__.py
  13. +147
    -0
      bitshift/database/migration.py
  14. +141
    -0
      bitshift/database/schema.sql
  15. +22
    -0
      bitshift/languages.py
  16. +368
    -0
      bitshift/languages.yml
  17. +90
    -0
      bitshift/parser/__init__.py
  18. +106
    -0
      bitshift/parser/c.py
  19. +217
    -0
      bitshift/parser/python.py
  20. +320
    -0
      bitshift/query/__init__.py
  21. +297
    -0
      bitshift/query/nodes.py
  22. +84
    -0
      bitshift/query/tree.py
  23. +177
    -0
      docs/Makefile
  24. +27
    -0
      docs/source/api/bitshift.crawler.rst
  25. +19
    -0
      docs/source/api/bitshift.database.rst
  26. +11
    -0
      docs/source/api/bitshift.query.rst
  27. +45
    -0
      docs/source/api/bitshift.rst
  28. +7
    -0
      docs/source/api/modules.rst
  29. +268
    -0
      docs/source/conf.py
  30. +20
    -0
      docs/source/index.rst
  31. +9
    -0
      gunicorn.cfg
  32. +72
    -0
      parsers/java/pom.xml
  33. +35
    -0
      parsers/java/src/main/java/com/bitshift/parsing/Parse.java
  34. +214
    -0
      parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java
  35. +71
    -0
      parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java
  36. +177
    -0
      parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java
  37. +17
    -0
      parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java
  38. +4
    -0
      parsers/ruby/Gemfile
  39. +6
    -0
      parsers/ruby/Rakefile
  40. +137
    -0
      parsers/ruby/lib/parser.rb
  41. +14
    -0
      setup.py
  42. +65
    -0
      static/css/lib/github.css
  43. +64
    -0
      static/css/lib/highlight.css
  44. BIN
     
  45. BIN
     
  46. BIN
     
  47. BIN
     
  48. BIN
     
  49. BIN
     
  50. BIN
     
  51. BIN
     
  52. BIN
     
  53. BIN
     
  54. BIN
     
  55. BIN
     
  56. BIN
     
  57. BIN
     
  58. BIN
     
  59. BIN
     
  60. BIN
     
  61. +7
    -0
      static/css/lib/jqueryui.custom.min.css
  62. +0
    -4
      static/css/main.css
  63. BIN
     
  64. BIN
     
  65. +1
    -0
      static/google10335120a3066831.html
  66. BIN
     
  67. BIN
     
  68. BIN
     
  69. BIN
     
  70. BIN
     
  71. BIN
     
  72. BIN
     
  73. +19
    -0
      static/js/about.js
  74. +175
    -0
      static/js/index.advanced-search-form.js
  75. +447
    -0
      static/js/index.js
  76. +7
    -0
      static/js/lib/jquery-ui.min.js
  77. +4
    -0
      static/js/lib/jquery.min.js
  78. +6
    -0
      static/js/main.js
  79. +3
    -0
      static/robots.txt
  80. +18
    -0
      static/sass/_logo.sass
  81. +20
    -2
      static/sass/_mixins.sass
  82. +12
    -0
      static/sass/_variables.sass
  83. +139
    -0
      static/sass/about.sass
  84. +63
    -0
      static/sass/docs.sass
  85. +19
    -0
      static/sass/error404.sass
  86. +443
    -0
      static/sass/index.sass
  87. +50
    -3
      static/sass/main.sass
  88. +16
    -0
      static/sitemap.xml
  89. +92
    -0
      templates/about.html
  90. +282
    -0
      templates/docs.html
  91. +26
    -0
      templates/error404.html
  92. +114
    -2
      templates/index.html
  93. +28
    -8
      templates/layout.html
  94. +0
    -0
     
  95. +19
    -0
      test/find_function_def.py
  96. +56
    -0
      test/parser_test.py
  97. +218
    -0
      test/resources/Matrix.java
  98. +40
    -0
      test/resources/app.py
  99. +126
    -0
      test/resources/parser.rb
  100. +76
    -0
      test/test_query_parser.py

+ 18
- 1
.gitignore View File

@@ -1,4 +1,10 @@
static/css/*
!lib

*.swp
.sass-cache
.DS_Store
.my.cnf

# github premade rules
*.py[cod]
@@ -18,7 +24,6 @@ var
sdist
develop-eggs
.installed.cfg
lib
lib64
__pycache__

@@ -37,3 +42,15 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject

# Maven
target

# Ruby
!parsers/ruby/lib

# Ctags
*/tags
logs
Gemfile.lock
parsing.jar

+ 2
- 2
LICENSE View File

@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2014 Ben Kurtovic
Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.

+ 41
- 1
README.md View File

@@ -1,4 +1,44 @@
bitshift
========

bitshift is an online code snippet exchange.
bitshift is a semantic search engine for source code developed by Benjamin
Attal, Ben Kurtovic, and Severyn Kozak. This README is intended for developers
only. For a user overview of the project:

* read our [about page](http://bitshift.it/)
* watch our [demo video](https://vimeo.com/98697078)

Branches
--------

- `master`: working, tested, version-numbered code - no direct commits; should
only accept merges from `develop` when ready to release
- `develop`: integration branch with unreleased but mostly functional code -
direct commits allowed but should be minor
- `feature/*`: individual components of the project with untested, likely
horribly broken code - branch off from and merge into `develop` when done

Style
-----
bitshift uses [SASS][SASS] for styling; compile the stylesheets to CSS with
`sass --watch static/sass/:static/css`.

Documentation
-------------

To build documentation, run `make html` from the `docs` subdirectory. You can
then browse from `docs/build/html/index.html`.

To automatically update the API documentation structure (necessary when adding
new modules or packages, but *not* when adding functions or changing
docstrings), run `sphinx-apidoc -fo docs/source/api bitshift` from the project
root. Note that this will revert any custom changes made to the files in
`docs/source/api`, so you might want to update them by hand instead.

[SASS]: http://sass-lang.com/guide

Releasing
---------

- Update `__version__` in `bitshift/__init__.py`, `version` in `setup.py`, and
`version` and `release` in `docs/conf.py`.

+ 53
- 7
app.py View File

@@ -2,21 +2,67 @@
Module to contain all the project's Flask server plumbing.
"""

from flask import Flask
from flask import render_template, session
from json import dumps

from bitshift import *
from flask import Flask, make_response, render_template, request

app = Flask(__name__)
from bitshift import assets
from bitshift.database import Database
from bitshift.languages import LANGS
from bitshift.query import parse_query, QueryParseException

app = Flask(__name__, static_folder="static", static_url_path="")
app.config.from_object("bitshift.config")

app_env = app.jinja_env
app_env.line_statement_prefix = "="
app_env.globals.update(assets = assets)
app_env.globals.update(assets=assets)

database = Database()

@app.route("/")
def index():
return render_template("index.html")
return render_template("index.html", autocomplete_languages=LANGS)

@app.route("/search.json")
def search():
def reply(json):
resp = make_response(dumps(json))
resp.mimetype = "application/json"
return resp

query = request.args.get("q")
if not query:
return reply({"error": "No query given"})
try:
tree = parse_query(query)
except QueryParseException as exc:
return reply({"error": exc.args[0]})

page = request.args.get("p", 1)
try:
page = int(page)
except ValueError:
return reply({"error": u"Invalid page number: %s" % page})

highlight = request.args.get("hl", "0")
highlight = highlight.lower() not in ["0", "false", "no"]

count, codelets = database.search(tree, page)
results = [clt.serialize(highlight) for clt in codelets]
return reply({"count": count, "results": results})

@app.route("/about")
def about():
return render_template("about.html")

@app.route("/docs")
def docs():
return render_template("docs.html")

@app.errorhandler(404)
def error404(error):
return render_template("error404.html"), 404

if __name__ == "__main__":
app.run()
app.run(debug=True)

+ 8
- 1
bitshift/__init__.py View File

@@ -1 +1,8 @@
__all__ = ["config", "assets"]
# -*- coding: utf-8 -*-

__author__ = "Benjamin Attal, Ben Kurtovic, Severyn Kozak"
__copyright__ = "Copyright (c) 2014 Benjamin Attal, Ben Kurtovic, Severyn Kozak"
__license__ = "MIT License"
__version__ = "0.1.dev"

from . import assets, codelet, config, crawler, database, parser, query

+ 31
- 7
bitshift/assets.py View File

@@ -1,22 +1,46 @@
"""
Module contains helper functions to be used inside the project's Jinja
templates.
:synopsis: Helper functions for use inside the project's Jinja templates.
"""

import re

from flask import Markup

ASSET_HTML_TEMPLATES = {
'css': "<link rel='stylesheet' type='text/css' href='/static/css/%s'>",
'js': "<script src='/static/js/%s'></script>"
'css': "<link rel='stylesheet' type='text/css' href='/css/%s'>",
'js': "<script src='/js/%s'></script>"
}

def tag(filename):
"""
Return HTML tag for asset named filename.
Generate an HTML tag for a CSS/JS asset, based on its file extension.

:param filename: The filename of the asset to create a tag for.

:type filename: str

Return either a <script> or <link> tag to the file named filename,
based on its extension.
:return: A string containing a `<source>` tag for JS files, and a `<link>`
for CSS files.
:rtype: str
"""

file_ext = filename.split(".")[-1]
return Markup(ASSET_HTML_TEMPLATES[file_ext] % filename)

def syntax_highlight(msg):
"""
Inserts HTML `<span>` elements into a string, for symbol/word styling.

Args:
msg : (str) A message.
"""

msg.replace("<", "&;lt")
msg.replace(">", "&;gt")

font_size = 16.0 / len(msg)
msg = re.sub('([!()"%])', '<span class="dark">\\1</span>', msg)
msg = re.sub('([:.;,])', '<span class="red">\\1</span>', msg)
msg = msg.replace("404", '<span class="red">404</span>')
return "<span class='light' style='font-size: %fem'>%s</span>" % (
font_size, msg)

+ 103
- 0
bitshift/codelet.py View File

@@ -0,0 +1,103 @@
from operator import concat

from pygments import highlight
from pygments.lexers import find_lexer_class, get_lexer_by_name
from pygments.formatters.html import HtmlFormatter

from .languages import LANGS

__all__ = ["Codelet"]

class Codelet(object):
"""
A source-code object with code metadata and composition analysis.

:ivar name: (str) A suitable name for the codelet.
:ivar code: (str) A containing the raw source code.
:ivar filename: (str, or None) The filename of the snippet.
:ivar language: (int, or None) The inferred language of `code`.
:ivar authors: (array of tuples (str, str or None)) An array of tuples
containing an author's name and profile URL (on the service the code
was pulled from).
:ivar url: (str) The url of the (page containing the) source code.
:ivar date_created: (:class:`datetime.datetime`, or None) The date the code
was published.
:ivar date_modified: (:class:`datetime.datetime`, or None) The date the
code was last modified.
:ivar rank: (float) A quanitification of the source code's quality, as
per available ratings (stars, forks, upvotes, etc.).
:ivar symbols: (dict) Dictionary containing dictionaries of functions,
classes, variable definitions, etc.
:ivar origin: (tuple) 2-tuple of (site_name, site_url), as added by the
database.
"""

def __init__(self, name, code, filename, language, authors, url,
date_created, date_modified, rank, symbols=None, origin=None):
"""
Create a Codelet instance.

:param name: see :attr:`self.name`
:param code: see :attr:`self.code`
:param filename: see :attr:`self.filename`
:param language: see :attr:`self.language`
:param authors: see :attr:`self.authors`
:param url: see :attr:`self.url`
:param date_created: see :attr:`self.date_created`
:param date_modified: see :attr:`self.date_modified`
:param rank: see :attr:`self.rank`
:param symbols: see :attr:`self.symbols`
:param origin: see :attr:`self.origin`

:type name: see :attr:`self.name`
:type code: see :attr:`self.code`
:type filename: see :attr:`self.filename`
:type language: see :attr:`self.language`
:type authors: see :attr:`self.authors`
:type url: see :attr:`self.url`
:type date_created: see :attr:`self.date_created`
:type date_modified: see :attr:`self.date_modified`
:type rank: see :attr:`self.rank`
:type symbols: see :attr:`self.symbols`
:type origin: see :attr:`self.origin`
"""

self.name = name
self.code = code
self.filename = filename
self.language = language
self.authors = authors
self.url = url
self.date_created = date_created
self.date_modified = date_modified
self.rank = rank
self.symbols = symbols or {}
self.origin = origin or (None, None)

def serialize(self, highlight_code=False):
"""
Convert the codelet into a dictionary that can be sent as JSON.

:param highlight_code: Whether to return code as pygments-highlighted
HTML or as plain source.
:type highlight_code: bool

:return: The codelet as a dictionary.
:rtype: str
"""
lang = LANGS[self.language]
code = self.code
if highlight_code:
lexer = find_lexer_class(lang)() or get_lexer_by_name("text")
symbols = reduce(concat, self.symbols.values(), [])
lines = reduce(concat, [[loc[0] for loc in sym[1] + sym[2]]
for sym in symbols], [])
formatter = HtmlFormatter(linenos=True, hl_lines=lines)
code = highlight(code, lexer, formatter)

return {
"name": self.name, "code": code, "lang": lang,
"authors": self.authors, "url": self.url,
"created": self.date_created.isoformat(),
"modified": self.date_modified.isoformat(), "origin": self.origin
}

+ 0
- 0
View File


+ 94
- 0
bitshift/crawler/crawl.py View File

@@ -0,0 +1,94 @@
"""
:synopsis: Parent crawler module, which supervises all crawlers.

Contains functions for initializing all subsidiary, threaded crawlers.
"""

import logging
import logging.handlers
import os
import Queue
import sys
import time
from threading import Event

from .crawler import GitHubCrawler, BitbucketCrawler
from .indexer import GitIndexer, GitRepository

__all__ = ["crawl"]

MAX_URL_QUEUE_SIZE = 5e3

def crawl():
"""
Initialize all crawlers (and indexers).

Start the:
1. GitHub crawler, :class:`crawler.GitHubCrawler`.
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
"""

_configure_logging()
time.sleep(5)

repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
run_event = Event()
run_event.set()
threads = [GitIndexer(repo_clone_queue, run_event)]

if sys.argv[1:]:
names = sys.argv[1:]
ranks = GitHubCrawler.get_ranks(names)
for name in names:
repo = GitRepository("https://github.com/" + name, name, "GitHub",
ranks[name])
repo_clone_queue.put(repo)
else:
threads += [GitHubCrawler(repo_clone_queue, run_event),
BitbucketCrawler(repo_clone_queue, run_event)]

for thread in threads:
thread.start()

try:
while 1:
time.sleep(0.1)
except KeyboardInterrupt:
run_event.clear()
with repo_clone_queue.mutex:
repo_clone_queue.queue.clear()
for thread in threads:
thread.join()

def _configure_logging():
# This isn't ideal, since it means the bitshift python package must be kept
# inside the app, but it works for now:
root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
log_dir = os.path.join(root, "logs")

if not os.path.exists(log_dir):
os.mkdir(log_dir)

logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)

formatter = logging.Formatter(
fmt=("%(asctime)s %(levelname)s %(name)s %(message)s"),
datefmt="%y-%m-%d %H:%M:%S")

file_handler = logging.handlers.TimedRotatingFileHandler(
"%s/%s" % (log_dir, "app.log"), when="H", interval=1,
backupCount=20)
stream_handler = logging.StreamHandler()
file_handler.setFormatter(formatter)
stream_handler.setFormatter(formatter)

root_logger = logging.getLogger()
root_logger.handlers = []
root_logger.addHandler(file_handler)
root_logger.addHandler(stream_handler)
root_logger.setLevel(logging.NOTSET)

if __name__ == "__main__":
crawl()

+ 243
- 0
bitshift/crawler/crawler.py View File

@@ -0,0 +1,243 @@
"""
:synopsis: Main crawler module, to oversee all site-specific crawlers.

Contains all website/framework-specific Class crawlers.
"""

import logging
import math
import time
import threading

import requests

from . import indexer

class GitHubCrawler(threading.Thread):
"""
Crawler that retrieves links to all of GitHub's public repositories.

GitHubCrawler is a threaded singleton that queries GitHub's API for urls
to its public repositories, which it inserts into a :class:`Queue.Queue`
shared with :class:`indexer.GitIndexer`.

:ivar clone_queue: (:class:`Queue.Queue`) Contains :class:`GitRepository`
with repository metadata retrieved by :class:`GitHubCrawler`, and other Git
crawlers, to be processed by :class:`indexer.GitIndexer`.
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
"""

AUTHENTICATION = {
"client_id" : "436cb884ae09be7f2a4e",
"client_secret" : "8deeefbc2439409c5b7a092fd086772fe8b1f24e"
}

def __init__(self, clone_queue, run_event):
"""
Create an instance of the singleton `GitHubCrawler`.

:param clone_queue: see :attr:`self.clone_queue`

:type clone_queue: see :attr:`self.clone_queue`
"""

self.clone_queue = clone_queue
self.run_event = run_event
self._logger = logging.getLogger("%s.%s" %
(__name__, self.__class__.__name__))
self._logger.info("Starting.")
super(GitHubCrawler, self).__init__(name=self.__class__.__name__)

def run(self):
"""
Query the GitHub API for data about every public repository.

Pull all of GitHub's repositories by making calls to its API in a loop,
accessing a subsequent page of results via the "next" URL returned in an
API response header. Uses Severyn Kozak's (sevko) authentication
credentials. For every new repository, a :class:`GitRepository` is
inserted into :attr:`self.clone_queue`.
"""

next_api_url = "https://api.github.com/repositories"
api_request_interval = 5e3 / 60 ** 2

while next_api_url and self.run_event.is_set():
start_time = time.time()

try:
resp = requests.get(next_api_url, params=self.AUTHENTICATION)
except requests.ConnectionError:
self._logger.exception("API %s call failed:" % next_api_url)
time.sleep(0.5)
continue

queue_percent_full = (float(self.clone_queue.qsize()) /
self.clone_queue.maxsize) * 100
self._logger.info("API call made. Queue size: %d/%d, %d%%." %
((self.clone_queue.qsize(), self.clone_queue.maxsize,
queue_percent_full)))

repo_names = [repo["full_name"] for repo in resp.json()]
repo_ranks = self.get_ranks(repo_names)

for repo in resp.json():
while self.clone_queue.full():
time.sleep(1)

self.clone_queue.put(indexer.GitRepository(
repo["html_url"], repo["full_name"], "GitHub",
repo_ranks[repo["full_name"]]))

if int(resp.headers["x-ratelimit-remaining"]) == 0:
time.sleep(int(resp.headers["x-ratelimit-reset"]) -
time.time())

next_api_url = resp.headers["link"].split(">")[0][1:]

sleep_time = api_request_interval - (time.time() - start_time)
if sleep_time > 0:
time.sleep(sleep_time)

@classmethod
def get_ranks(cls, repo_names):
"""
Return the ranks for several repositories.

Queries the GitHub API for the number of stargazers for any given
repositories, and blocks if the query limit is exceeded. The rank is
calculated using these numbers.

:param repo_names: An array of repository names, in
`username/repository_name` format.

:type repo_names: str

:return: A dictionary mapping repository names to ranks.

Example dictionary:
.. code-block:: python
{
"user/repository" : 0.2564949357461537
}

:rtype: dictionary
"""

API_URL = "https://api.github.com/search/repositories"
REPOS_PER_QUERY = 25

repo_ranks = {}
for names in [repo_names[ind:ind + REPOS_PER_QUERY] for ind in
xrange(0, len(repo_names), REPOS_PER_QUERY)]:
query_url = "%s?q=%s" % (API_URL,
"+".join("repo:%s" % name for name in names))

params = cls.AUTHENTICATION
resp = requests.get(query_url,
params=params,
headers={
"Accept" : "application/vnd.github.preview"
})

if int(resp.headers["x-ratelimit-remaining"]) == 0:
sleep_time = int(resp.headers["x-ratelimit-reset"]) - \
time.time() + 1
if sleep_time > 0:
logging.info("API quota exceeded. Sleep time: %d." %
sleep_time)
time.sleep(sleep_time)

for repo in resp.json()["items"]:
stars = repo["stargazers_count"]
rank = min(math.log(max(stars, 1), 5000), 1.0)
repo_ranks[repo["full_name"]] = rank

for name in repo_names:
if name not in repo_ranks:
repo_ranks[name] = 0.1

return repo_ranks

class BitbucketCrawler(threading.Thread):
"""
Crawler that retrieves links to all of Bitbucket's public repositories.

BitbucketCrawler is a threaded singleton that queries Bitbucket's API for
urls to its public repositories, and inserts them as
:class:`indexer.GitRepository` into a :class:`Queue.Queue` shared with
:class:`indexer.GitIndexer`.

:ivar clone_queue: (:class:`Queue.Queue`) The shared queue to insert
:class:`indexer.GitRepository` repository urls into.
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
"""

def __init__(self, clone_queue, run_event):
"""
Create an instance of the singleton `BitbucketCrawler`.

:param clone_queue: see :attr:`self.clone_queue`

:type clone_queue: see :attr:`self.clone_queue`
"""

self.clone_queue = clone_queue
self.run_event = run_event
self._logger = logging.getLogger("%s.%s" %
(__name__, self.__class__.__name__))
self._logger.info("Starting.")
super(BitbucketCrawler, self).__init__(name=self.__class__.__name__)

def run(self):
"""
Query the Bitbucket API for data about every public repository.

Query the Bitbucket API's "/repositories" endpoint and read its
paginated responses in a loop; any "git" repositories have their
clone-urls and names inserted into a :class:`indexer.GitRepository` in
:attr:`self.clone_queue`.
"""

next_api_url = "https://api.bitbucket.org/2.0/repositories"

while self.run_event.is_set():
try:
response = requests.get(next_api_url).json()
except requests.ConnectionError:
self._logger.exception("API %s call failed:", next_api_url)
time.sleep(0.5)
continue

queue_percent_full = (float(self.clone_queue.qsize()) /
self.clone_queue.maxsize) * 100
self._logger.info("API call made. Queue size: %d/%d, %d%%." %
((self.clone_queue.qsize(), self.clone_queue.maxsize,
queue_percent_full)))

for repo in response["values"]:
if repo["scm"] == "git":
while self.clone_queue.full():
time.sleep(1)

clone_links = repo["links"]["clone"]
clone_url = (clone_links[0]["href"] if
clone_links[0]["name"] == "https" else
clone_links[1]["href"])

try:
watchers = requests.get(
repo["links"]["watchers"]["href"])
num = len(watchers.json()["values"])
rank = min(math.log(max(num, 1), 500), 1.0)
except requests.ConnectionError:
err = "API %s call failed:" % next_api_url
self._logger.exception(err)
time.sleep(0.5)
continue

self.clone_queue.put(indexer.GitRepository(
clone_url, repo["full_name"], "Bitbucket"), rank)

next_api_url = response["next"]
time.sleep(0.2)

+ 348
- 0
bitshift/crawler/indexer.py View File

@@ -0,0 +1,348 @@
"""
:synopsis: Contains a singleton GitIndexer class, which clones and indexes git
repositories.
"""

from datetime import datetime
import logging
import os
import Queue
import shutil
import string
import time
import threading

from bs4 import UnicodeDammit
import git

from ..database import Database
from ..parser import parse, UnsupportedFileError
from ..codelet import Codelet

GIT_CLONE_DIR = "/tmp/bitshift"
THREAD_QUEUE_SLEEP = 0.5
MAX_INDEX_QUEUE_SIZE = 10

class GitRepository(object):
"""
A representation of a Git repository's metadata.

:ivar url: (str) The repository's url.
:ivar name: (str) The name of the repository.
:ivar framework_name: (str) The name of the online Git framework that the
repository belongs to (eg, GitHub, BitBucket).
:ivar rank: (float) The rank of the repository, as assigned by
:class:`crawler.GitHubCrawler`.
:ivar path: (str) The repository's on-disk directory path.
:ivar repo: (git.Repo) A git.Repo representation of the repository.
"""

def __init__(self, url, name, framework_name, rank):
"""
Create a GitRepository instance.

:param url: see :attr:`GitRepository.url`
:param name: see :attr:`GitRepository.name`
:param framework_name: see :attr:`GitRepository.framework_name`
:param rank: see :attr:`GitRepository.rank`

:type url: str
:type name: str
:type framework_name: str
:type rank: float
"""

self.url = url
self.name = name
self.framework_name = framework_name
self.rank = rank
dirname = name.replace("/", "-") + "-" + str(int(time.time()))
self.path = os.path.join(GIT_CLONE_DIR, dirname)
self.repo = None

class GitIndexer(threading.Thread):
"""
A singleton Git repository indexer.

:class:`GitIndexer` indexes the repositories cloned by the
:class:`_GitCloner` singleton.

:ivar index_queue: (:class:`Queue.Queue`) A queue containing
:class:`GitRepository` objects for every new repository succesfully
cloned by :class:`_GitCloner`, which are to be indexed.
:ivar git_cloner: (:class:`_GitCloner`) The corresponding repository
cloner, which feeds :class:`GitIndexer`.
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
"""

def __init__(self, clone_queue, run_event):
"""
Create an instance of the singleton `GitIndexer`.

:param clone_queue: see :attr:`self.index_queue`

:type index_queue: see :attr:`self.index_queue`
"""

self.index_queue = Queue.Queue(maxsize=MAX_INDEX_QUEUE_SIZE)
self.run_event = run_event
self.git_cloner = _GitCloner(clone_queue, self.index_queue, run_event)
self.git_cloner.start()
self.database = Database()
self._logger = logging.getLogger("%s.%s" %
(__name__, self.__class__.__name__))
self._logger.info("Starting.")

if not os.path.exists(GIT_CLONE_DIR):
os.makedirs(GIT_CLONE_DIR)

super(GitIndexer, self).__init__(name=self.__class__.__name__)

def run(self):
"""
Retrieve metadata about newly cloned repositories and index them.

Blocks until new repositories appear in :attr:`self.index_queue`, then
retrieves one, and attempts indexing it. Should any errors occur, the
new repository will be discarded and the indexer will index the next in
the queue.
"""

while True:
while self.index_queue.empty() and self.run_event.is_set():
time.sleep(THREAD_QUEUE_SLEEP)
if not self.run_event.is_set():
break

repo = self.index_queue.get()
self.index_queue.task_done()
self._index_repository(repo)

def _index_repository(self, repo):
"""
Clone and index (create and insert Codeletes for) a Git repository.

`git clone` the Git repository located at **repo.url**, call
`_insert_repository_codelets()`, then remove said repository.

:param repo: The metadata of the repository to be indexed.
:type repo: :class:`GitRepository`
"""

self._logger.info(u"Indexing repo: %s", repo.name)
try:
self._insert_repository_codelets(repo)
except Exception:
self._logger.exception("Exception raised while indexing:")
finally:
if os.path.isdir(repo.path):
shutil.rmtree(repo.path)

def _insert_repository_codelets(self, repo):
"""
Create and insert a Codelet for the files inside a Git repository.

Create a new Codelet, and insert it into the Database singleton, for
every file inside the current working directory's default branch
(usually *master*).

:param repo_url: The metadata of the repository to be indexed.

:type repo_url: :class:`GitRepository`
"""

file_meta = self._get_file_metadata(repo.repo)
if file_meta is None:
return

for filename, data in file_meta.iteritems():
name = ("%s: %s" % (repo.name, filename)).encode("utf8")
authors = [(author, None) for author in data["authors"]]
encoded_source = data["blob"].data_stream.read()
source = UnicodeDammit(encoded_source).unicode_markup
url = self._generate_file_url(filename, repo)
codelet = Codelet(name, source, filename, None, authors, url,
data["time_created"], data["time_last_modified"],
repo.rank)
self._logger.debug("Indexing file: %s", codelet.name)
try:
parse(codelet)
except UnsupportedFileError:
continue
except Exception:
self._logger.exception("Exception raised while parsing:")
self.database.insert(codelet)

def _generate_file_url(self, filename, repo):
"""
Return a url for a filename from a Git wrapper framework.

:param filename: The path of the file.
:param repo: The git repo.

:type filename: str
:type repo: :class:`GitRepository`

:return: The file's full url on the given framework, if successfully
derived.
:rtype: str, or None
"""

if repo.framework_name == "GitHub":
default_branch = repo.repo.active_branch.name
parts = [repo.url, "blob", default_branch, filename]
elif repo.framework_name == "Bitbucket":
try:
commit_hash = repo.repo.head.commit.hexsha
except ValueError: # No commits
return None
parts = [repo.url, "src", commit_hash, filename]
return "/".join(s.strip("/") for s in parts)

def _get_file_metadata(self, repo):
"""
Return a dictionary containing every valuable tracked file's metadata.

:return: A dictionary with author names, time of creation, and time of
last modification for every filename key.
.. code-block:: python
sample_returned_dict = {
"my_file" : {
"blob": (GitPython Blob) <object>,
"authors" : (str list) ["author1", "author2"],
"time_created" : (`datetime.datetime`) <object>,
"time_last_modified" : (`datetime.datetime`) <object>
}
}
:rtype: dictionary of dictionaries
"""
try:
tree = repo.head.commit.tree
except ValueError: # No commits
return {}

files = {}
self._logger.debug("Building file metadata")
for item in tree.traverse():
if item.type != "blob" or not self._is_ascii(item.data_stream):
continue
log = repo.git.log("--follow", '--format=%an %ct', "--", item.path)
lines = log.splitlines()
authors = {line.rsplit(" ", 1)[0].decode("utf8") for line in lines}
last_mod = int(lines[0].rsplit(" ", 1)[1])
created = int(lines[-1].rsplit(" ", 1)[1])

files[item.path] = {
"blob": item,
"authors" : authors,
"time_last_modified": datetime.fromtimestamp(last_mod),
"time_created": datetime.fromtimestamp(created)
}

return files

def _is_ascii(self, source):
"""
Heuristically determine whether a file is ASCII text or binary.

If a portion of the file contains null bytes, or the percentage of bytes
that aren't ASCII is greater than 30%, then the file is concluded to be
binary. This heuristic is used by the `file` utility, Perl's inbuilt `-T`
operator, and is the de-facto method for in : passdetermining whether a
file is ASCII.

:param source: The file object to test.

:type source: `file`

:return: Whether the file is probably ASCII.
:rtype: Boolean
"""

file_snippet = source.read(512)

if not file_snippet:
return True

ascii_characters = "".join(map(chr, range(32, 127)) +
list("\n\r\t\b"))
null_trans = string.maketrans("", "")

if "\0" in file_snippet:
return False

non_ascii = file_snippet.translate(null_trans, ascii_characters)
return not float(len(non_ascii)) / len(file_snippet) > 0.30

class _GitCloner(threading.Thread):
"""
A singleton Git repository cloner.

Clones the repositories crawled by :class:`crawler.GitHubCrawler` for
:class:`GitIndexer` to index.

:ivar clone_queue: (:class:`Queue.Queue`) see
:attr:`crawler.GitHubCrawler.clone_queue`.
:ivar index_queue: (:class:`Queue.Queue`) see
:attr:`GitIndexer.index_queue`.
:ivar _logger: (:class:`logging.Logger`) A class-specific logger object.
"""

def __init__(self, clone_queue, index_queue, run_event):
"""
Create an instance of the singleton :class:`_GitCloner`.

:param clone_queue: see :attr:`self.clone_queue`
:param index_queue: see :attr:`self.index_queue`

:type clone_queue: see :attr:`self.clone_queue`
:type index_queue: see :attr:`self.index_queue`
"""

self.clone_queue = clone_queue
self.index_queue = index_queue
self.run_event = run_event
self._logger = logging.getLogger("%s.%s" %
(__name__, self.__class__.__name__))
self._logger.info("Starting.")
super(_GitCloner, self).__init__(name=self.__class__.__name__)

def run(self):
"""
Retrieve metadata about newly crawled repositories and clone them.

Blocks until new :class:`GitRepository` appear in
:attr:`self.clone_queue`, then attempts cloning them. If
succcessful, the cloned repository is added to :attr:`self.index_queue`
for the `GitIndexer` to clone; otherwise, it is discarded.
"""

while True:
while self.clone_queue.empty() and self.run_event.is_set():
time.sleep(THREAD_QUEUE_SLEEP)
if not self.run_event.is_set():
break
repo = self.clone_queue.get()
self.clone_queue.task_done()

try:
self._clone_repository(repo)
except Exception:
self._logger.exception("Exception raised while cloning:")

def _clone_repository(self, repo):
"""
Attempt cloning a Git repository.

:param repo: Metadata about the repository to clone.

:type repo: :class:`GitRepository`
"""

self._logger.info("Cloning repo: %s", repo.url)
repo.repo = git.Repo.clone_from(repo.url, to_path=repo.path, bare=True,
single_branch=True)
while self.index_queue.full() and self.run_event.is_set():
time.sleep(THREAD_QUEUE_SLEEP)
if self.run_event.is_set():
self.index_queue.put(repo)

+ 239
- 0
bitshift/database/__init__.py View File

@@ -0,0 +1,239 @@
"""
Subpackage with classes and functions to handle communication with the MySQL
database backend, which manages the search index.
"""

import codecs
import os

import mmh3
import oursql

from .migration import VERSION, MIGRATIONS
from ..codelet import Codelet
from ..query.nodes import (String, Regex, Text, Language, Author, Date, Symbol,
BinaryOp, UnaryOp)

__all__ = ["Database"]

class Database(object):
"""Represents the MySQL database."""

def __init__(self, migrate=False):
self._conn = self._connect()
self._check_version(migrate)

def _connect(self):
"""Establish a connection to the database."""
try:
codecs.lookup("utf8mb4")
except LookupError:
utf8 = codecs.lookup("utf8")
codecs.register(lambda name: utf8 if name == "utf8mb4" else None)

root = os.path.dirname(os.path.abspath(__file__))
default_file = os.path.join(root, ".my.cnf")
return oursql.connect(
db="bitshift", read_default_file=default_file, autoping=True,
autoreconnect=True, charset="utf8mb4")

def _migrate(self, cursor, current):
"""Migrate the database to the latest schema version."""
for version in xrange(current, VERSION):
print "Migrating to %d..." % (version + 1)
for query in MIGRATIONS[version - 1]:
cursor.execute(query)
cursor.execute("UPDATE version SET version = ?", (version + 1,))

def _check_version(self, migrate):
"""Check the database schema version and respond accordingly.

If the schema is out of date, migrate if *migrate* is True, else raise
an exception.
"""
with self._conn.cursor() as cursor:
cursor.execute("SELECT version FROM version")
version = cursor.fetchone()[0]
if version < VERSION:
if migrate:
self._migrate(cursor, version)
else:
err = "Database schema out of date. " \
"Run `python -m bitshift.database.migration`."
raise RuntimeError(err)

def _search_with_query(self, cursor, tree, page):
"""Execute an SQL query based on a query tree, and return results.

The returned data is a 2-tuple of (list of codelet IDs, estimated
number of total results).
"""
query, args = tree.build_query(page)
cursor.execute(query, args)
ids = [cid for cid, _ in cursor.fetchall()]
num_results = len(ids) # TODO: This is not entirely correct
return ids, num_results

def _get_authors_for_codelet(self, cursor, codelet_id):
"""Return a list of authors for a given codelet."""
query = """SELECT author_name, author_url
FROM authors
WHERE author_codelet = ?"""

cursor.execute(query, (codelet_id,))
return cursor.fetchall()

def _get_symbols_for_code(self, cursor, code_id, tree):
"""Return a list of symbols for a given codelet."""
query = """SELECT symbol_type, symbol_name, sloc_type, sloc_row,
sloc_col, sloc_end_row, sloc_end_col
FROM symbols
INNER JOIN symbol_locations ON sloc_symbol = symbol_id
WHERE symbol_code = ? AND (%s)"""

conds, args = [], [code_id]
for node in tree.walk(Symbol):
node_cond, node_args, _, _ = node.parameterize(set())
conds.append(node_cond)
args += node_args
if not conds:
return {}
cond = " OR ".join(conds)

symbols = {type_: {} for type_ in Symbol.TYPES}
cursor.execute(query % cond, tuple(args))
for type_, name, loc_type, row, col, erow, ecol in cursor.fetchall():
sdict = symbols[Symbol.TYPES[type_]]
if name not in sdict:
sdict[name] = ([], [])
sdict[name][loc_type].append((row, col, erow, ecol))
for type_, sdict in symbols.items():
symbols[type_] = [(n, d, u) for n, (d, u) in sdict.iteritems()]
return symbols

def _get_codelets_from_ids(self, cursor, ids, tree):
"""Return a list of Codelet objects given a list of codelet IDs."""
query = """SELECT *
FROM codelets
INNER JOIN code ON codelet_code_id = code_id
INNER JOIN origins ON codelet_origin = origin_id
WHERE codelet_id = ?"""

with self._conn.cursor(oursql.DictCursor) as dict_cursor:
for codelet_id in ids:
dict_cursor.execute(query, (codelet_id,))
row = dict_cursor.fetchall()[0]
code_id = row["code_id"]
if row["origin_url_base"]:
url = row["origin_url_base"] + row["codelet_url"]
else:
url = row["codelet_url"]
origin = (row["origin_name"], row["origin_url"])
authors = self._get_authors_for_codelet(cursor, codelet_id)
symbols = self._get_symbols_for_code(cursor, code_id, tree)
yield Codelet(
row["codelet_name"], row["code_code"], None,
row["code_lang"], authors, url,
row["codelet_date_created"], row["codelet_date_modified"],
row["codelet_rank"], symbols, origin)

def _decompose_url(self, cursor, url):
"""Break up a URL into an origin (with a URL base) and a suffix."""
query = """SELECT origin_id, SUBSTR(?, LENGTH(origin_url_base) + 1)
FROM origins
WHERE origin_url_base IS NOT NULL
AND ? LIKE CONCAT(origin_url_base, "%")"""

cursor.execute(query, (url, url))
result = cursor.fetchone()
return result if result else (1, url)

def _insert_symbols(self, cursor, code_id, sym_type, symbols):
"""Insert a list of symbols of a given type into the database."""
query1 = "INSERT INTO symbols VALUES (DEFAULT, ?, ?, ?)"
query2 = """INSERT INTO symbol_locations VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?)"""
build = lambda id, L, typ: [tuple([id, typ] + list(loc)) for loc in L]

type_id = Symbol.TYPES.index(sym_type)
for (name, defs, uses) in symbols:
cursor.execute(query1, (code_id, type_id, name))
sym_id = cursor.lastrowid
params = (build(sym_id, defs, Symbol.DEFINE) +
build(sym_id, uses, Symbol.USE))
cursor.executemany(query2, params)

def close(self):
"""Disconnect from the database."""
self._conn.close()

def search(self, tree, page=1):
"""
Search the database for a query and return the *n*\ th page of results.

:param tree: The query to search for.
:type tree: :py:class:`~.query.tree.Tree`
:param page: The result page to display.
:type page: int

:return: The total number of results, and the *n*\ th page of results.
:rtype: 2-tuple of (long, list of :py:class:`.Codelet`\ s)
"""
query1 = "SELECT 1 FROM cache WHERE cache_id = ?"
query2 = """SELECT cdata_codelet, cache_count_mnt, cache_count_exp
FROM cache
INNER JOIN cache_data ON cache_id = cdata_cache
WHERE cache_id = ?
ORDER BY cdata_index ASC"""
query3 = "INSERT INTO cache VALUES (?, ?, ?, DEFAULT)"
query4 = "INSERT INTO cache_data VALUES (?, ?, ?)"

cache_id = mmh3.hash64(str(page) + ":" + tree.serialize())[0]

with self._conn.cursor() as cursor:
cursor.execute(query1, (cache_id,))
cache_hit = cursor.fetchall()
if cache_hit:
cursor.execute(query2, (cache_id,))
rows = cursor.fetchall()
num_results = rows[0][1] * (10 ** rows[0][2]) if rows else 0
ids = [row[0] for row in rows]
else:
ids, num_results = self._search_with_query(cursor, tree, page)
num_exp = max(len(str(num_results)) - 3, 0)
num_results = int(round(num_results, -num_exp))
num_mnt = num_results / (10 ** num_exp)
cursor.execute(query3, (cache_id, num_mnt, num_exp))
cdata = [(cache_id, c_id, i) for i, c_id in enumerate(ids)]
cursor.executemany(query4, cdata)
codelet_gen = self._get_codelets_from_ids(cursor, ids, tree)
return (num_results, list(codelet_gen))

def insert(self, codelet):
"""
Insert a codelet into the database.

:param codelet: The codelet to insert.
:type codelet: :py:class:`.Codelet`
"""
query1 = """INSERT INTO code VALUES (?, ?, ?)
ON DUPLICATE KEY UPDATE code_id=code_id"""
query2 = """INSERT INTO codelets VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"

hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
code_id = mmh3.hash64(hash_key)[0]

with self._conn.cursor() as cursor:
cursor.execute(query1, (code_id, codelet.language, codelet.code))
if cursor.rowcount == 1:
for sym_type, symbols in codelet.symbols.iteritems():
self._insert_symbols(cursor, code_id, sym_type, symbols)
origin, url = self._decompose_url(cursor, codelet.url)
cursor.execute(query2, (codelet.name, code_id, origin, url,
codelet.rank, codelet.date_created,
codelet.date_modified))
codelet_id = cursor.lastrowid
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]
cursor.executemany(query3, authors)

+ 147
- 0
bitshift/database/migration.py View File

@@ -0,0 +1,147 @@
"""
Contains information about database schema versions, and SQL queries to update
between them.
"""

VERSION = 12

MIGRATIONS = [
# 1 -> 2
[
"""ALTER TABLE `codelets`
DROP FOREIGN KEY `codelets_ibfk_1`""",
"""ALTER TABLE `code`
DROP KEY `code_hash`,
DROP COLUMN `code_hash`,
MODIFY COLUMN `code_id` BIGINT NOT NULL""",
"""ALTER TABLE `codelets`
MODIFY COLUMN `codelet_code_id` BIGINT NOT NULL,
ADD KEY (`codelet_lang`),
ADD CONSTRAINT `codelets_ibfk_1` FOREIGN KEY (`codelet_code_id`)
REFERENCES `code` (`code_id`)
ON DELETE RESTRICT ON UPDATE CASCADE""",
"""ALTER TABLE `symbols`
ADD COLUMN `symbol_end_row` INT UNSIGNED NOT NULL,
ADD COLUMN `symbol_end_col` INT UNSIGNED NOT NULL"""
],
# 2 -> 3
[
"""ALTER TABLE `symbols`
DROP FOREIGN KEY `symbols_ibfk_1`,
CHANGE COLUMN `symbol_codelet` `symbol_code` BIGINT NOT NULL,
ADD CONSTRAINT `symbols_ibfk_1` FOREIGN KEY (`symbol_code`)
REFERENCES `code` (`code_id`)
ON DELETE CASCADE ON UPDATE CASCADE"""
],
# 3 -> 4
[
"""ALTER TABLE `symbols`
DROP COLUMN `symbol_row`,
DROP COLUMN `symbol_col`,
DROP COLUMN `symbol_end_row`,
DROP COLUMN `symbol_end_col`""",
"""CREATE TABLE `symbol_locations` (
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`sloc_symbol` BIGINT UNSIGNED NOT NULL,
`sloc_type` TINYINT UNSIGNED NOT NULL,
`sloc_row` INT UNSIGNED NOT NULL,
`sloc_col` INT UNSIGNED NOT NULL,
`sloc_end_row` INT UNSIGNED NOT NULL,
`sloc_end_col` INT UNSIGNED NOT NULL,
PRIMARY KEY (`sloc_id`),
FOREIGN KEY (`sloc_symbol`)
REFERENCES `symbols` (`symbol_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB"""
],
# 4 -> 5
[
"""ALTER TABLE `origins`
MODIFY COLUMN `origin_name` VARCHAR(64) DEFAULT NULL,
MODIFY COLUMN `origin_url` VARCHAR(512) DEFAULT NULL,
MODIFY COLUMN `origin_url_base` VARCHAR(512) DEFAULT NULL"""
],
# 5 -> 6
[
"""ALTER TABLE `code`
ADD COLUMN `code_lang` SMALLINT UNSIGNED DEFAULT NULL
AFTER `code_id`,
ADD KEY (`code_lang`)""",
"""ALTER TABLE `codelets`
DROP KEY `codelet_lang`,
DROP COLUMN `codelet_lang`""",
"""ALTER TABLE `cache_data`
DROP FOREIGN KEY `cache_data_ibfk_1`""",
"""ALTER TABLE `cache`
MODIFY COLUMN `cache_id` BIGINT NOT NULL,
DROP COLUMN `cache_hash`,
DROP COLUMN `cache_last_used`,
MODIFY COLUMN `cache_count_mnt` SMALLINT UNSIGNED NOT NULL""",
"""ALTER TABLE `cache_data`
MODIFY COLUMN `cdata_cache` BIGINT NOT NULL,
ADD PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
ADD CONSTRAINT `cache_data_ibfk_1` FOREIGN KEY (`cdata_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE""",
"""CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);"""
],
# 6 -> 7
[
"""DELETE FROM `cache`""",
"""ALTER TABLE `cache_data`
ADD COLUMN `cdata_index` TINYINT UNSIGNED NOT NULL
AFTER `cdata_codelet`"""
],
# 7 -> 8
[
"""ALTER TABLE `origins`
DROP COLUMN `origin_image`"""
],
# 8 -> 9
[
"""DELIMITER //
CREATE PROCEDURE `empty_database`()
BEGIN
DELETE FROM `codelets`;
DELETE FROM `code`;
DELETE FROM `cache`;
ALTER TABLE `codelets` AUTO_INCREMENT = 1;
ALTER TABLE `authors` AUTO_INCREMENT = 1;
ALTER TABLE `symbols` AUTO_INCREMENT = 1;
ALTER TABLE `symbol_locations` AUTO_INCREMENT = 1;
END//
DELIMITER ;"""
],
# 9 -> 10
[
"""ALTER TABLE `symbol_locations`
MODIFY COLUMN `sloc_col` INT UNSIGNED DEFAULT NULL,
MODIFY COLUMN `sloc_end_row` INT UNSIGNED DEFAULT NULL,
MODIFY COLUMN `sloc_end_col` INT UNSIGNED DEFAULT NULL"""
],
# 10 -> 11
[
"""ALTER DATABASE `bitshift`
CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci"""
],
# 11 -> 12
[
"""CREATE TABLE `stopwords` (
`value` varchar(18) NOT NULL DEFAULT ""
) ENGINE=InnoDB DEFAULT CHARSET=utf8""",
"""INSERT INTO `stopwords` VALUES
("a"), ("about"), ("an"), ("are"), ("as"), ("at"), ("be"), ("by"),
("how"), ("i"), ("it"), ("la"), ("of"), ("on"), ("that"), ("the"),
("to"), ("und"), ("was"), ("what"), ("when"), ("where"), ("who"),
("will")"""
]
]

if __name__ == "__main__":
from . import Database

Database(migrate=True).close()

+ 141
- 0
bitshift/database/schema.sql View File

@@ -0,0 +1,141 @@
-- Schema version 12

CREATE DATABASE `bitshift`
DEFAULT CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
USE `bitshift`;

CREATE TABLE `version` (
`version` INT UNSIGNED NOT NULL
) ENGINE=InnoDB;
INSERT INTO `version` VALUES (12);

CREATE TABLE `stopwords`
LIKE information_schema.innodb_ft_default_stopword
ENGINE=InnoDB;

CREATE TABLE `stopwords` (
`value` varchar(18) NOT NULL DEFAULT ""
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `stopwords` VALUES
("a"), ("about"), ("an"), ("are"), ("as"), ("at"), ("be"), ("by"), ("how"),
("i"), ("it"), ("la"), ("of"), ("on"), ("that"), ("the"), ("to"), ("und"),
("was"), ("what"), ("when"), ("where"), ("who"), ("will");

CREATE TABLE `origins` (
`origin_id` TINYINT UNSIGNED NOT NULL AUTO_INCREMENT,
`origin_name` VARCHAR(64) DEFAULT NULL,
`origin_url` VARCHAR(512) DEFAULT NULL,
`origin_url_base` VARCHAR(512) DEFAULT NULL,
PRIMARY KEY (`origin_id`)
) ENGINE=InnoDB;
INSERT INTO `origins` VALUES (1, NULL, NULL, NULL);

CREATE TABLE `code` (
`code_id` BIGINT NOT NULL,
`code_lang` SMALLINT UNSIGNED DEFAULT NULL,
`code_code` MEDIUMTEXT NOT NULL,
PRIMARY KEY (`code_id`),
KEY (`code_lang`),
FULLTEXT KEY (`code_code`)
) ENGINE=InnoDB;

CREATE TABLE `codelets` (
`codelet_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`codelet_name` VARCHAR(300) NOT NULL,
`codelet_code_id` BIGINT NOT NULL,
`codelet_origin` TINYINT UNSIGNED NOT NULL,
`codelet_url` VARCHAR(512) NOT NULL,
`codelet_rank` FLOAT NOT NULL,
`codelet_date_created` DATETIME DEFAULT NULL,
`codelet_date_modified` DATETIME DEFAULT NULL,
PRIMARY KEY (`codelet_id`),
FULLTEXT KEY (`codelet_name`),
KEY (`codelet_rank`),
KEY (`codelet_date_created`),
KEY (`codelet_date_modified`),
FOREIGN KEY (`codelet_code_id`)
REFERENCES `code` (`code_id`)
ON DELETE RESTRICT ON UPDATE CASCADE,
FOREIGN KEY (`codelet_origin`)
REFERENCES `origins` (`origin_id`)
ON DELETE RESTRICT ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `authors` (
`author_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`author_codelet` BIGINT UNSIGNED NOT NULL,
`author_name` VARCHAR(128) NOT NULL,
`author_url` VARCHAR(512) DEFAULT NULL,
PRIMARY KEY (`author_id`),
FULLTEXT KEY (`author_name`),
FOREIGN KEY (`author_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `symbols` (
`symbol_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`symbol_code` BIGINT NOT NULL,
`symbol_type` TINYINT UNSIGNED NOT NULL,
`symbol_name` VARCHAR(512) NOT NULL,
PRIMARY KEY (`symbol_id`),
KEY (`symbol_type`, `symbol_name`(32)),
FOREIGN KEY (`symbol_code`)
REFERENCES `code` (`code_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `symbol_locations` (
`sloc_id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
`sloc_symbol` BIGINT UNSIGNED NOT NULL,
`sloc_type` TINYINT UNSIGNED NOT NULL,
`sloc_row` INT UNSIGNED NOT NULL,
`sloc_col` INT UNSIGNED DEFAULT NULL,
`sloc_end_row` INT UNSIGNED DEFAULT NULL,
`sloc_end_col` INT UNSIGNED DEFAULT NULL,
PRIMARY KEY (`sloc_id`),
FOREIGN KEY (`sloc_symbol`)
REFERENCES `symbols` (`symbol_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

CREATE TABLE `cache` (
`cache_id` BIGINT NOT NULL,
`cache_count_mnt` SMALLINT UNSIGNED NOT NULL,
`cache_count_exp` TINYINT UNSIGNED NOT NULL,
`cache_created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`cache_id`)
) ENGINE=InnoDB;

CREATE TABLE `cache_data` (
`cdata_cache` BIGINT NOT NULL,
`cdata_codelet` BIGINT UNSIGNED NOT NULL,
`cdata_index` TINYINT UNSIGNED NOT NULL,
PRIMARY KEY (`cdata_cache`, `cdata_codelet`),
FOREIGN KEY (`cdata_cache`)
REFERENCES `cache` (`cache_id`)
ON DELETE CASCADE ON UPDATE CASCADE,
FOREIGN KEY (`cdata_codelet`)
REFERENCES `codelets` (`codelet_id`)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB;

DELIMITER //
CREATE PROCEDURE `empty_database`()
BEGIN
DELETE FROM `codelets`;
DELETE FROM `code`;
DELETE FROM `cache`;
ALTER TABLE `codelets` AUTO_INCREMENT = 1;
ALTER TABLE `authors` AUTO_INCREMENT = 1;
ALTER TABLE `symbols` AUTO_INCREMENT = 1;
ALTER TABLE `symbol_locations` AUTO_INCREMENT = 1;
END//
DELIMITER ;

CREATE EVENT `flush_cache`
ON SCHEDULE EVERY 1 HOUR
DO
DELETE FROM `cache`
WHERE `cache_created` < DATE_SUB(NOW(), INTERVAL 1 DAY);

+ 22
- 0
bitshift/languages.py View File

@@ -0,0 +1,22 @@
from os import path

import yaml

__all__ = ["LANGS", "LANGS_ALL"]

def _load_langs():
filename = path.join(path.dirname(__file__), "languages.yml")
with open(filename) as fp:
data = yaml.load(fp)["languages"]
langs = [(it.keys()[0] if isinstance(it, dict) else it).encode("utf8")
for it in data]
all_langs = {}
for i, lang in enumerate(data):
if isinstance(lang, dict):
for val in lang.values()[0]:
all_langs[val] = i
else:
all_langs[lang] = i
return langs, all_langs

LANGS, LANGS_ALL = _load_langs()

+ 368
- 0
bitshift/languages.yml View File

@@ -0,0 +1,368 @@
# A list of programming languages supported by bitshift:

languages:
# With parsers:
- Python:
- Python
- Python 3
- Python 3.0 Traceback
- Python console session
- Python Traceback
- NumPy
- C
- Java
- Ruby:
- Ruby
- Ruby irb session

# Without parsers:
- ABAP
- APL
- ActionScript:
- ActionScript
- ActionScript 3
- ANTLR:
- ANTLR
- ANTLR With ActionScript Target
- ANTLR With CPP Target
- "ANTLR With C# Target"
- ANTLR With Java Target
- ANTLR With ObjectiveC Target
- ANTLR With Perl Target
- ANTLR With Python Target
- ANTLR With Ruby Target
- Ada
- Agda:
- Agda
- Literate Agda
- Alloy
- AmbientTalk
- ApacheConf
- AppleScript
- AspectJ
- aspx-cs
- aspx-vb
- Asymptote
- autohotkey
- AutoIt
- Awk
- BBCode
- BUGS
- Bash:
- Bash
- Bash Session
- Batchfile
- Befunge
- BlitzBasic:
- BlitzBasic
- BlitzMax
- Boo
- Brainfuck
- Bro
- "C#"
- C++
- ca65
- CBM BASIC V2
- Ceylon
- CFEngine3
- cfstatement
- ChaiScript
- Chapel
- Cheetah
- Cirru
- Clay
- Clojure:
- Clojure
- ClojureScript
- CMake
- COBOL:
- COBOL
- COBOLFree
- CoffeeScript
- Coldfusion CFC
- Coldfusion HTML
- Common Lisp
- Coq
- Croc
- Cryptol:
- Cryptol
- Literate Cryptol
- CSS:
- CSS
- CSS+Django/Jinja
- CSS+Genshi Text
- CSS+Lasso
- CSS+Mako
- CSS+Mako
- CSS+Myghty
- CSS+PHP
- CSS+Ruby
- CSS+Smarty
- CUDA
- Cypher
- Cython
- D
- Darcs Patch
- Dart
- Debian Control file
- Debian Sourcelist
- Delphi
- dg
- Diff
- Django/Jinja
- Docker
- DTD
- Duel
- Dylan:
- Dylan
- Dylan session
- DylanLID
- EBNF
- eC
- ECL
- Eiffel
- Elixir:
- Elixir
- Elixir iex session
- Embedded Ragel
- ERB:
- ERB
- RHTML
- Erlang:
- Erlang
- Erlang erl session
- Evoque
- Factor
- Fancy
- Fantom
- Felix
- Fortran
- FoxPro
- FSharp
- GAP
- GAS
- Genshi
- Genshi Text
- Gettext Catalog
- Gherkin
- GLSL
- Gnuplot
- Go
- Golo
- GoodData-CL
- Gosu
- Gosu Template
- Groff
- Groovy
- Haml
- Handlebars
- Haskell:
- Haskell
- Literate Haskell
- Haxe
- HTML:
- HTML
- HTML+Cheetah
- HTML+Django/Jinja
- HTML+Evoque
- HTML+Genshi
- HTML+Lasso
- HTML+Mako
- HTML+Mako
- HTML+Myghty
- HTML+PHP
- HTML+Smarty
- HTML+Velocity
- Hxml
- Hy
- Hybris
- IDL
- Idris:
- Idris
- Literate Idris
- Igor
- Inform 6:
- Inform 6
- Inform 6 template
- Inform 7
- INI
- Io
- Ioke
- Jade
- JAGS
- Jasmin
- Java Server Page
- JavaScript:
- JavaScript
- JavaScript+Cheetah
- JavaScript+Django/Jinja
- JavaScript+Genshi Text
- JavaScript+Lasso
- JavaScript+Mak
- JavaScript+Mako
- JavaScript+Myghty
- JavaScript+PHP
- JavaScript+Ruby
- JavaScript+Smarty
- JSON
- Julia:
- Julia
- Julia console
- Kal
- Kconfig
- Koka
- Kotlin
- Lasso
- Lighttpd configuration file
- Limbo
- LiveScript
- LLVM
- Logos
- Logtalk
- LSL
- Lua
- Makefile
- Makefile
- Base Makefile
- Mako
- MAQL
- Mask
- Mason
- Mathematica
- Matlab:
- Matlab
- Matlab session
- MiniD
- Modelica
- Modula-2
- Monkey
- MOOCode
- MoonScript
- MQL
- Mscgen
- MuPAD
- MXML
- Myghty
- NASM
- Nemerle
- nesC
- NewLisp
- Newspeak
- Nginx configuration file
- Nimrod
- Nix
- NSIS
- Objective-C
- Objective-C++
- Objective-J
- OCaml
- Octave
- Ooc
- Opa
- OpenEdge ABL
- Pan
- Pawn
- Perl:
- Perl
- Perl6
- PHP
- Pig
- Pike
- PostScript
- POVRay
- PowerShell
- Prolog
- Properties
- Protocol Buffer
- Puppet
- PyPy Log
- QBasic
- QML
- Racket
- Ragel:
- Ragel
- Ragel in C Host
- Ragel in CPP Host
- Ragel in D Host
- Ragel in Java Host
- Ragel in Objective C Host
- Ragel in Ruby Host
- RConsole
- Rd
- REBOL
- Red
- Redcode
- reg
- reStructuredText
- Rexx
- RobotFramework
- RPMSpec
- RQL
- RSL
- Rust
- S
- Sass:
- Sass
- SCSS
- Scala
- Scalate Server Page
- Scaml
- Scheme
- Scilab
- Shell Session
- Slim
- Smali
- Smalltalk
- Smarty
- Snobol
- SourcePawn
- SPARQL
- SQL:
- SQL
- MySQL
- PL/pgSQL
- PostgreSQL console (psql)
- PostgreSQL SQL dialect
- sqlite3con

- SquidConf
- Stan
- Standard ML
- SWIG
- systemverilog
- Tcl
- Tcsh
- Tea
- TeX
- Todotxt
- Treetop
- TypeScript
- UrbiScript
- Vala
- VB.net
- VCTreeStatus
- Velocity
- verilog
- VGL
- vhdl
- VimL
- XML:
- XML
- XML+Cheetah
- XML+Django/Jinja
- XML+Evoque
- XML+Lasso
- XML+Mako
- XML+Mako
- XML+Myghty
- XML+PHP
- XML+Ruby
- XML+Smarty
- XML+Velocity
- XQuery
- XSLT
- Xtend
- YAML:
- YAML
- YAML+Jinja
- Zephir

+ 90
- 0
bitshift/parser/__init__.py View File

@@ -0,0 +1,90 @@
import json
import subprocess

from os import path
from pygments import lexers as pgl, util

from ..languages import LANGS, LANGS_ALL
from .python import parse_py

__all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]

# TODO: Change these
PARSER_COMMANDS = {
'Java': ['java', '-cp',
path.join(path.dirname(__file__), "../../parsers/java/parsing.jar"),
'com.bitshift.parsing.Parse'],
'Ruby': ['rake', '-f',
path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
'parse']
}

class UnsupportedFileError(Exception):
pass

def _lang(codelet):
"""
Private function to identify the language of a codelet.

:param codelet: The codelet object to identified.

:type code: Codelet

.. todo::
Modify function to incorporate tags from stackoverflow.
"""

try:
if codelet.filename:
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
else:
lex = pgl.guess_lexer(codelet.code)
return LANGS_ALL[lex.name]
except (util.ClassNotFound, KeyError):
raise UnsupportedFileError(codelet.filename)

def parse_via_proc(codelet):
proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)

data = proc.communicate(codelet.code)[0]
symbols = json.loads(data)
return symbols

PARSERS = {
"Python": parse_py,
"Java": parse_via_proc,
"Ruby": parse_via_proc,
}

def parse(codelet):
"""
Dispatches the codelet to the correct parser based on its language.
It is the job of the respective parsers to accumulate data about the
code and to convert it into a string representing a python dict.
The codelet is then given dict as its 'symbols' field.

:param codelet: The codelet object to parsed.

:type code: Codelet
"""
lang = _lang(codelet)
lang_string = LANGS[lang]
codelet.language = lang

def loc_helper(l):
for i in l:
if i == -1:
yield None
else:
yield i

if lang_string in PARSERS:
symbols = PARSERS[lang_string](codelet)
symbols = {
key: [(name,
[tuple(loc_helper(loc)) for loc in syms[name]["assignments"]],
[tuple(loc_helper(loc)) for loc in syms[name]["uses"]])
for name in syms]
for key, syms in symbols.iteritems()}
codelet.symbols = symbols

+ 106
- 0
bitshift/parser/c.py View File

@@ -0,0 +1,106 @@
from pycparser import c_parser, c_ast

class _TreeCutter(c_ast.NodeVisitor):
"""
Local node visitor for c abstract syntax trees.

:ivar accum: (dict) Information on variables, functions, and structs
accumulated from an abstract syntax tree.

:ivar cache: (dict or None) Information stored about parent nodes. Added
to accum when node reaches the lowest possible level.

.. todo::
Add visit function for c_ast.ID to record all uses of a variable.

Use self.cache to store extra information about variables.
"""

def __init__(self):
"""
Create a _TreeCutter instance.
"""

self.accum = {'vars': {}, 'functions': {}, 'structs': {}}
self.cache = None

def start_n_end(self, node):
pass

def visit_FuncDecl(self, node):
"""
Visits FuncDecl nodes in a tree. Adds relevant data about them to accum
after visiting all of its children as well.

:param node: The current node.

:type node: c_ast.FuncDecl

.. todo::
Add other relevant information about functions like parameters and
return type.
"""

self.cache['group'] = 'functions'
self.cache['meta']['end_ln'] = node.coord.line
self.cache['meta']['end_col'] = node.coord.column

self.generic_visit(node)

def visit_Struct(self, node):
"""
Visits Struct nodes in a tree. Adds relevant data about them to accum
after visiting all of its children as well.

:param node: The current node.

:type node: c_ast.Struct

.. todo::
Find other relevant information to add about structs.
"""

self.cache['group'] = 'structs'
self.cache['meta']['end_ln'] = node.coord.line
self.cache['meta']['end_col'] = node.coord.column

self.generic_visit(node)

def visit_Decl(self, node):
"""
Visits Decl nodes in a tree. Adds relevant data about them to accum
after visiting all of its children as well.

:param node: The current node.

:type node: c_ast.Decl
"""

self.cache = {'group': 'vars', 'meta': {}}

self.cache['meta']['start_ln'] = node.coord.line
self.cache['meta']['start_col'] = node.coord.column
self.cache['meta']['end_ln'] = node.coord.line
self.cache['meta']['end_col'] = node.coord.column

self.generic_visit(node)

self.accum[self.cache['group']][node.name] = self.cache['meta']
self.cache = None

def parse_c(codelet):
"""
Adds 'symbols' field to the codelet after parsing the c code.

:param codelet: The codelet object to parsed.

:type code: Codelet

.. todo::
Preprocess c code so that no ParseErrors are thrown.
"""

tree = c_parser.CParser().parse(codelet.code)
cutter = _TreeCutter()
cutter.visit(tree)
codelet.symbols = cutter.accum

+ 217
- 0
bitshift/parser/python.py View File

@@ -0,0 +1,217 @@
import ast
import re

encoding_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)

class _TreeWalker(ast.NodeVisitor):
"""
Local node visitor for python abstract syntax trees.

:ivar symbols: (dict) Information on variables, functions, and classes
symbolsulated from an abstract syntax tree.

:ivar cache: (dict or None) Information stored about parent nodes. Added
to symbols when node reaches the lowest possible level.

.. todo::
Add visit funciton for ast.Name to record all uses of a variable.

Use self.cache to store extra information about nodes.
"""

def __init__(self):
"""
Create a _TreeCutter instance.
"""

self.symbols = {'vars': {}, 'functions': {}, 'classes': {}}
self.cache = []

def clear_cache(self):
self.cache = []

def block_position(self, node):
"""
Helper function to get the start and end lines of an AST node.

:param node: The node.

:type node: ast.FunctionDef or ast.ClassDef or ast.Module
"""

start_line, start_col = node.lineno, node.col_offset
temp_node = node

while 'body' in temp_node.__dict__:
temp_node = temp_node.body[-1]

end_line, end_col = temp_node.lineno, temp_node.col_offset

if start_line == end_line:
return [start_line, start_col, end_line, -1]

return [start_line, start_col, end_line, end_col]

def visit_Assign(self, node):
"""
Visits Assign nodes in a tree. Adds relevant data about them to symbols.

:param node: The current node.

:type node: ast.Assign

.. todo::
Add value and type metadata to symbols.
"""

pos = self.block_position(node)

for t in node.targets:
self.visit(t)

for name in self.cache:
if not self.symbols['vars'].has_key(name):
self.symbols['vars'][name] = {'assignments': [], 'uses': []}

self.symbols['vars'][name]['assignments'].append(pos)

self.clear_cache()
self.visit(node.value)

for name in self.cache:
if not self.symbols['vars'].has_key(name):
self.symbols['vars'][name] = {'assignments': [], 'uses': []}

self.symbols['vars'][name]['uses'].append(pos)

self.clear_cache()

def visit_FunctionDef(self, node):
"""
Visits FunctionDef nodes in a tree. Adds relevant data about them to symbols.

:param node: The current node.

:type node: ast.FunctionDef

.. todo::
Add arguments and decorators metadata to symbols.
"""

pos = self.block_position(node)

if not self.symbols['functions'].has_key(node.name):
self.symbols['functions'][node.name] = {'assignments': [], 'uses': []}

self.symbols['functions'][node.name]['assignments'].append(pos)

self.generic_visit(node)

def visit_Call(self, node):
"""
Visits Function Call nodes in a tree. Adds relevant data about them
in the functions section for symbols.

:param node: The current node.

:type node: ast.Call

.. todo::
Add arguments and decorators metadata to symbols.
"""

pos = self.block_position(node)

self.visit(node.func)
if not self.cache:
return
name = self.cache.pop()

if not self.symbols['functions'].has_key(name):
self.symbols['functions'][name] = {'assignments': [], 'uses': []}

self.symbols['functions'][name]['uses'].append(pos)

for name in self.cache:
if not self.symbols['vars'].has_key(name):
self.symbols['vars'][name] = {'assignments': [], 'uses': []}

self.symbols['vars'][name]['uses'].append(pos)

self.clear_cache()

for a in node.args:
self.visit(a)

for name in self.cache:
if not self.symbols['vars'].has_key(name):
self.symbols['vars'][name] = {'assignments': [], 'uses': []}

self.symbols['vars'][name]['uses'].append(pos)

self.clear_cache()

def visit_ClassDef(self, node):
"""
Visits ClassDef nodes in a tree. Adds relevant data about them to symbols.

:param node: The current node.

:type node: ast.ClassDef

.. todo::
Add arguments, inherits, and decorators metadata to symbols.
"""

pos = self.block_position(node)

if node.name not in self.symbols['classes']:
self.symbols['classes'][node.name] = {'assignments': [], 'uses': []}
self.symbols['classes'][node.name]['assignments'].append(pos)

self.generic_visit(node)

def visit_Name(self, node):
self.cache.append(node.id)

def visit_Attribute(self, node):
self.visit(node.value)
self.cache.append(node.attr)

def visit_Import(self, node):
pos = self.block_position(node)
# look through aliases

def parse_py(codelet):
"""
Adds 'symbols' field to the codelet after parsing the python code.

:param codelet: The codelet object to parsed.

:type code: Codelet
"""

def strip_encoding(lines):
"""Strips the encoding line from a file, which breaks the parser."""
it = iter(lines)
try:
first = next(it)
if not encoding_re.match(first):
yield first
second = next(it)
if not encoding_re.match(second):
yield second
except StopIteration:
return
for line in it:
yield line

try:
tree = ast.parse("\n".join(strip_encoding(codelet.code.splitlines())))
except SyntaxError:
## TODO: add some logging here?
return {}

walker = _TreeWalker()
walker.visit(tree)
return walker.symbols

+ 320
- 0
bitshift/query/__init__.py View File

@@ -0,0 +1,320 @@
"""
This subpackage contains code to parse search queries received from the
frontend into trees that can be used by the database backend.
"""

from __future__ import unicode_literals
from re import IGNORECASE, search
from sys import maxsize

from dateutil.parser import parse as parse_date

from .nodes import (String, Regex, Text, Language, Author, Date, Symbol,
BinaryOp, UnaryOp)
from .tree import Tree
from ..languages import LANGS

__all__ = ["QueryParseException", "parse_query"]

class QueryParseException(Exception):
"""Raised by parse_query() when a query is invalid."""
pass


class _QueryParser(object):
"""Wrapper class with methods to parse queries. Used as a singleton."""

def __init__(self):
self._prefixes = {
self._parse_language: ["l", "lang", "language"],
self._parse_author: ["a", "author"],
self._parse_modified: ["m", "mod", "modified", "modify"],
self._parse_created: ["cr", "create", "created"],
self._parse_symbol: ["s", "sym", "symb", "symbol"],
self._parse_function: ["f", "fn", "fun", "func", "function",
"meth", "method"],
self._parse_class: ["cl", "class", "clss"],
self._parse_variable: ["v", "var", "variable"],
self._parse_namespace: ["n", "ns", "namespace", "module"],
self._parse_interface: ["in", "inter", "interface", "implements"],
self._parse_import: ["im", "imp", "import", "include", "require",
"imports", "requires"]
}

def _scan_query(self, query, markers):
"""Scan a query (sub)string for the first occurance of some markers.

Returns a 2-tuple of (first_marker_found, marker_index).
"""
def is_escaped(query, index):
"""Return whether a query marker is backslash-escaped."""
return (index > 0 and query[index - 1] == "\\" and
(index < 2 or query[index - 2] != "\\"))

best_marker, best_index = None, maxsize
for marker in markers:
index = query.find(marker)
if is_escaped(query, index):
_, new_index = self._scan_query(query[index + 1:], marker)
index += new_index + 1
if index >= 0 and index < best_index:
best_marker, best_index = marker, index
return best_marker, best_index

def _split_query(self, query, markers, parens=False):
"""Split a query string into a nested list of query terms.

Returns a list of terms and/or nested sublists of terms. Each term and
sublist is guarenteed to be non-empty.
"""
query = query.lstrip()
if not query:
return []
marker, index = self._scan_query(query, markers)
if not marker:
return [query]
nest = [query[:index]] if index > 0 else []
after = query[index + 1:]

if marker == " ":
nest += self._split_query(after, markers, parens)
elif marker in ('"', "'"):
close_marker, close_index = self._scan_query(after, marker)
if close_marker:
if close_index > 0:
nest.append(after[:close_index])
after = after[close_index + 1:]
nest += self._split_query(after, markers, parens)
elif after:
nest.append(after)
elif marker == "(":
inner, after = self._split_query(after, markers, True), []
if inner and isinstance(inner[-1], tuple):
after = self._split_query(inner.pop()[0], markers, parens)
if inner:
nest.append(inner)
if after:
nest += after
elif marker == ")":
if parens:
nest.append((after,))
else:
nest += self._split_query(after, markers)
return nest

def _parse_literal(self, literal):
"""Parse part of a search query into a string or regular expression."""
if literal.startswith(("r:", "re:", "regex:", "regexp:")):
arg = literal.split(":", 1)[1]
if not arg:
err = 'Incomplete query term: "%s"' % literal
raise QueryParseException(err)
return Regex(arg)
return String(literal)

def _parse_language(self, term):
"""Parse part of a query into a language node and return it."""
term = self._parse_literal(term)
if isinstance(term, Regex):
langs = [i for i, lang in enumerate(LANGS)
if search(term.regex, lang, IGNORECASE)]
if not langs:
err = 'No languages found for regex: "%s"' % term.regex
raise QueryParseException(err)
node = Language(langs.pop())
while langs:
node = BinaryOp(Language(langs.pop()), BinaryOp.OR, node)
return node

needle = term.string.lower()
for i, lang in enumerate(LANGS):
if lang.lower() == needle:
return Language(i)
for i, lang in enumerate(LANGS):
if lang.lower().startswith(needle):
return Language(i)
err = 'No languages found for string: "%s"' % term.string
raise QueryParseException(err)

def _parse_author(self, term):
"""Parse part of a query into an author node and return it."""
return Author(self._parse_literal(term))

def _parse_date(self, term, type_):
"""Parse part of a query into a date node and return it."""
if ":" not in term:
err = "A date relationship is required " \
'("before:<date>" or "after:<date>"): "%s"'
raise QueryParseException(err % term)
relstr, dtstr = term.split(":", 1)
if relstr.lower() in ("before", "b"):
relation = Date.BEFORE
elif relstr.lower() in ("after", "a"):
relation = Date.AFTER
else:
err = 'Bad date relationship (should be "before" or "after"): "%s"'
raise QueryParseException(err % relstr)
try:
dt = parse_date(dtstr)
except (TypeError, ValueError):
raise QueryParseException('Bad date/time string: "%s"' % dtstr)
return Date(type_, relation, dt)

def _parse_modified(self, term):
"""Parse part of a query into a date modified node and return it."""
return self._parse_date(term, Date.MODIFY)

def _parse_created(self, term):
"""Parse part of a query into a date created node and return it."""
return self._parse_date(term, Date.CREATE)

def _parse_symbol(self, term, stype=Symbol.ALL):
"""Parse part of a query into a symbol node and return it."""
defines = ("a:", "assign:", "assignment:", "d:", "def:", "definition:",
"decl:", "declare:", "declaration:")
uses = ("u:", "use:", "c:", "call:")
if term.startswith(defines) or term.startswith(uses):
context = Symbol.DEFINE if term.startswith(defines) else Symbol.USE
term_part = term.split(":", 1)[1]
if not term_part:
raise QueryParseException('Incomplete query term: "%s"' % term)
term = term_part
else:
context = Symbol.ALL
literal = self._parse_literal(term)
if isinstance(literal, String):
make_symbol = lambda lit: Symbol(context, stype, String(lit))
symbols = self._split_query(literal.string, " \"'")
node = make_symbol(symbols.pop())
while symbols:
node = BinaryOp(make_symbol(symbols.pop()), BinaryOp.OR, node)
return node
return Symbol(context, stype, literal)

def _parse_function(self, term):
"""Parse part of a query into a function node and return it."""
return self._parse_symbol(term, Symbol.FUNCTION)

def _parse_class(self, term):
"""Parse part of a query into a class node and return it."""
return self._parse_symbol(term, Symbol.CLASS)

def _parse_variable(self, term):
"""Parse part of a query into a variable node and return it."""
return self._parse_symbol(term, Symbol.VARIABLE)

def _parse_namespace(self, term):
"""Parse part of a query into a namespace node and return it."""
return self._parse_symbol(term, Symbol.NAMESPACE)

def _parse_interface(self, term):
"""Parse part of a query into a interface node and return it."""
return self._parse_symbol(term, Symbol.INTERFACE)

def _parse_import(self, term):
"""Parse part of a query into a import node and return it."""
return self._parse_symbol(term, Symbol.IMPORT)

def _parse_term(self, term):
"""Parse a query term into a tree node and return it."""
term = term.replace('\\"', '"').replace("\\\\", "\\")
if ":" in term and not term[0] == ":":
prefix, arg = term.split(":", 1)
invert = prefix.lower() == "not"
if invert:
prefix, arg = arg.split(":", 1)
if not arg:
raise QueryParseException('Incomplete query term: "%s"' % term)
for meth, prefixes in self._prefixes.iteritems():
if prefix.lower() in prefixes:
if invert:
return UnaryOp(UnaryOp.NOT, meth(arg))
return meth(arg)
return Text(self._parse_literal(term))

def _parse_boolean_operators(self, nest):
"""Parse boolean operators in a nested query list."""
op_lookup = {
"and": BinaryOp.AND,
"or": BinaryOp.OR,
"not": UnaryOp.NOT
}
for i, term in enumerate(nest):
if isinstance(term, list):
self._parse_boolean_operators(term)
else:
nest[i] = op_lookup.get(term.lower(), term)

def _parse_nest(self, nest):
"""Recursively parse a nested list of search query terms."""
def parse_binary_op(op):
"""Parse a binary operator in a nested query list."""
index = nest.index(op)
if index == 0 or index == len(nest) - 1:
err = "Invalid query: '%s' given without argument."
raise QueryParseException(err % BinaryOp.OPS[op])
left = self._parse_nest(nest[:index])
right = self._parse_nest(nest[index + 1:])
return BinaryOp(left, op, right)

if not nest:
err = "Error while parsing query: empty nest detected."
raise QueryParseException(err)
elif BinaryOp.OR in nest:
return parse_binary_op(BinaryOp.OR)
elif BinaryOp.AND in nest:
return parse_binary_op(BinaryOp.AND)
elif UnaryOp.NOT in nest:
index = nest.index(UnaryOp.NOT)
if index == len(nest) - 1:
err = "Invalid query: '%s' given without argument."
raise QueryParseException(err % UnaryOp.OPS[UnaryOp.NOT])
right = UnaryOp(UnaryOp.NOT, self._parse_nest(nest[index + 1:]))
if index > 0:
left = self._parse_nest(nest[:index])
return BinaryOp(left, BinaryOp.AND, right)
return right
elif len(nest) > 1:
left, right = self._parse_term(nest[0]), self._parse_nest(nest[1:])
return BinaryOp(left, BinaryOp.AND, right)
elif isinstance(nest[0], list):
return self._parse_nest(nest[0])
else:
return self._parse_term(nest[0])

def _balance_tree(self, node):
"""Auto-balance a tree using a string sorting function."""
if isinstance(node, BinaryOp):
self._balance_tree(node.left)
self._balance_tree(node.right)
if node.right.sortkey() < node.left.sortkey():
node.left, node.right = node.right, node.left
elif isinstance(node, UnaryOp):
self._balance_tree(node.node)

def parse(self, query):
"""
Parse a search query.

The result is normalized with a sorting function so that
``"foo OR bar"`` and ``"bar OR foo"`` result in the same tree. This is
important for caching purposes.

:param query: The query be converted.
:type query: str

:return: A tree storing the data in the query.
:rtype: :py:class:`~.query.tree.Tree`

:raises: :py:class:`.QueryParseException`
"""
nest = self._split_query(query.rstrip(), " \"'()")
if not nest:
raise QueryParseException('Empty query: "%s"' % query)
self._parse_boolean_operators(nest)
root = self._parse_nest(nest)
self._balance_tree(root)
return Tree(root)


parse_query = _QueryParser().parse

+ 297
- 0
bitshift/query/nodes.py View File

@@ -0,0 +1,297 @@
from ..languages import LANGS

__all__ = ["String", "Regex", "Text", "Language", "Author", "Date", "Symbol",
"BinaryOp", "UnaryOp"]

class _Node(object):
"""Represents a single node in a query tree.

Generally speaking, a node is a constraint applied to the database. Thus,
a :py:class:`~.Language` node represents a constraint where only codelets
of a specific language are selected.
"""

def _null_regex(self, expr):
"""Implements a regex search with support for a null expression."""
return "IF(ISNULL(%s), 0, %s REGEXP ?)" % (expr, expr)

def sortkey(self):
"""Return a string sort key for the node."""
return ""

def parameterize(self, tables):
"""Parameterize the node.

Returns a 4-tuple of (conditional string, parameter list, rank list,
should-we-rank boolean). If the rank list is empty, then it is assumed
to contain the conditional string.
"""
return "", [], [], False


class _Literal(object):
"""Represents a literal component of a search query, present at the leaves.

A literal might be a string or a regular expression.
"""
pass


class String(_Literal):
"""Represents a string literal."""

def __init__(self, string):
"""
:type string: unicode
"""
self.string = string

def __repr__(self):
return "String({0!r})".format(self.string)

def sortkey(self):
return self.string


class Regex(_Literal):
"""Represents a regular expression literal."""

def __init__(self, regex):
"""
:type string: unicode
"""
self.regex = regex

def __repr__(self):
return "Regex({0!r})".format(self.regex)

def sortkey(self):
return self.regex


class Text(_Node):
"""Represents a text node.

Searches in codelet names (full-text search), symbols (equality), and
source code (full-text search).
"""

def __init__(self, text):
"""
:type text: :py:class:`._Literal`
"""
self.text = text

def __repr__(self):
return "Text({0})".format(self.text)

def sortkey(self):
return self.text.sortkey()

def parameterize(self, tables):
tables |= {"code", "symbols"}
if isinstance(self.text, Regex):
ranks = ["(codelet_name REGEXP ?)", "(code_code REGEXP ?)",
self._null_regex("symbol_name")]
text = self.text.regex
else:
ranks = ["(MATCH(codelet_name) AGAINST (? IN BOOLEAN MODE))",
"(MATCH(code_code) AGAINST (? IN BOOLEAN MODE))",
"(symbol_name <=> ?)"]
text = self.text.string
cond = "(" + " OR ".join(ranks) + ")"
return cond, [text] * 3, ranks, True


class Language(_Node):
"""Represents a language node.

Searches in the code_lang field.
"""

def __init__(self, lang):
"""
:type lang: int
"""
self.lang = lang

def __repr__(self):
return "Language({0})".format(LANGS[self.lang])

def sortkey(self):
return LANGS[self.lang]

def parameterize(self, tables):
tables |= {"code"}
return "(code_lang <=> ?)", [self.lang], [], False


class Author(_Node):
"""Represents a author node.

Searches in the author_name field (full-text search).
"""

def __init__(self, name):
"""
:type name: :py:class:`_Literal`
"""
self.name = name

def __repr__(self):
return "Author({0})".format(self.name)

def sortkey(self):
return self.name.sortkey()

def parameterize(self, tables):
tables |= {"authors"}
if isinstance(self.name, Regex):
cond = self._null_regex("author_name")
return cond, [self.name.regex], [], False
cond = "(MATCH(author_name) AGAINST (? IN BOOLEAN MODE))"
return cond, [self.name.string], [], True


class Date(_Node):
"""Represents a date node.

Searches in the codelet_date_created or codelet_date_modified fields.
"""
CREATE = 1
MODIFY = 2

BEFORE = 1
AFTER = 2

def __init__(self, type_, relation, date):
"""
:type type_: int (``CREATE`` or ``MODIFY``)
:type relation: int (``BEFORE``, ``AFTER``)
:type date: datetime.datetime
"""
self.type = type_
self.relation = relation
self.date = date

def __repr__(self):
types = {self.CREATE: "CREATE", self.MODIFY: "MODIFY"}
relations = {self.BEFORE: "BEFORE", self.AFTER: "AFTER"}
tm = "Date({0}, {1}, {2})"
return tm.format(types[self.type], relations[self.relation], self.date)

def sortkey(self):
return self.date.strftime("%Y%m%d%H%M%S")

def parameterize(self, tables):
column = {self.CREATE: "codelet_date_created",
self.MODIFY: "codelet_date_modified"}[self.type]
op = {self.BEFORE: "<=", self.AFTER: ">="}[self.relation]
cond = "IF(ISNULL(%s), 0, %s %s ?)" % (column, column, op)
return cond, [self.date], [], False


class Symbol(_Node):
"""Represents a symbol node.

Searches in symbol_type and symbol_name.
"""
ALL = -1
DEFINE = 0
USE = 1

FUNCTION = 0
CLASS = 1
VARIABLE = 2
NAMESPACE = 3
INTERFACE = 4
IMPORT = 5
TYPES = ["functions", "classes", "vars", "namespaces", "interfaces",
"imports"]
TYPE_REPR = ["FUNCTION", "CLASS", "VARIABLE", "NAMESPACE", "INTERFACE",
"IMPORT"]

def __init__(self, context, type_, name):
"""
:type context: int (``DEFINE`` or ``USE``)
:type type_: int (``ALL``, ``FUNCTION``, ``CLASS``, etc.)
:type name: :py:class:`._Literal`
"""
self.context = context
self.type = type_
self.name = name

def __repr__(self):
context = ["DEFINE", "USE", "ALL"][self.context]
type_ = self.TYPE_REPR[self.type] if self.type >= 0 else "ALL"
return "Symbol({0}, {1}, {2})".format(context, type_, self.name)

def sortkey(self):
return self.name.sortkey()

def parameterize(self, tables):
tables |= {"code", "symbols"}
if isinstance(self.name, Regex):
cond, name = self._null_regex("symbol_name"), self.name.regex
else:
cond, name = "symbol_name <=> ?", self.name.string
if self.type == self.ALL:
types = ", ".join(str(typ) for typ in xrange(len(self.TYPES)))
part = " AND IF(ISNULL(symbol_type), 0, symbol_type IN (%s))"
cond += part % types
if self.type != self.ALL:
cond += " AND symbol_type <=> %d" % self.type
if self.context != self.ALL:
tables |= {"symbol_locations"}
cond += " AND sloc_type <=> %d" % self.context
return "(" + cond + ")", [name], [], False


class BinaryOp(_Node):
"""Represents a relationship between two nodes: ``and``, ``or``."""
AND = object()
OR = object()
OPS = {AND: "AND", OR: "OR"}

def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right

def __repr__(self):
tmpl = "BinaryOp({0}, {1}, {2})"
return tmpl.format(self.left, self.OPS[self.op], self.right)

def sortkey(self):
return self.left.sortkey() + self.right.sortkey()

def parameterize(self, tables):
lcond, largs, lranks, need_lranks = self.left.parameterize(tables)
rcond, rargs, rranks, need_rranks = self.right.parameterize(tables)
lranks, rranks = lranks or [lcond], rranks or [rcond]
op = self.OPS[self.op]
cond = "(" + lcond + " " + op + " " + rcond + ")"
need_ranks = need_lranks or need_rranks or self.op == self.OR
return cond, largs + rargs, lranks + rranks, need_ranks


class UnaryOp(_Node):
"""Represents a transformation applied to one node: ``not``."""
NOT = object()
OPS = {NOT: "NOT"}

def __init__(self, op, node):
self.op = op
self.node = node

def __repr__(self):
return "UnaryOp({0}, {1})".format(self.OPS[self.op], self.node)

def sortkey(self):
return self.node.sortkey()

def parameterize(self, tables):
cond, args, ranks, need_ranks = self.node.parameterize(tables)
new_cond = "(" + self.OPS[self.op] + " " + cond + ")"
ranks = ranks or [cond]
return new_cond, args, ranks, need_ranks

+ 84
- 0
bitshift/query/tree.py View File

@@ -0,0 +1,84 @@
from . import nodes

__all__ = ["Tree"]

QUERY_TEMPLATE = """SELECT codelet_id, MAX(codelet_rank%s) AS score
FROM codelets %s
WHERE %s
GROUP BY codelet_id
ORDER BY score DESC
LIMIT %d OFFSET %d""".replace("\n", " ")

class Tree(object):
"""Represents a query tree."""

def __init__(self, root):
self._root = root

def __repr__(self):
return "Tree({0})".format(self._root)

@property
def root(self):
"""The root node of the tree."""
return self._root

def sortkey(self):
"""Return a string sort key for the query tree."""
return self._root.sortkey()

def serialize(self):
"""Create a string representation of the query for caching.

:return: Query string representation.
:rtype: str
"""
return repr(self)

def walk(self, node_type=None):
"""Walk through the query tree, returning nodes of a specific type."""
pending = [self._root]
while pending:
node = pending.pop()
if not node_type or isinstance(node, node_type):
yield node
if isinstance(node, nodes.UnaryOp):
pending.append(node.node)
elif isinstance(node, nodes.BinaryOp):
pending.extend([node.left, node.right])

def build_query(self, page=1, page_size=10):
"""Convert the query tree into a parameterized SQL SELECT statement.

:param page: The page number to get results for.
:type page: int
:param page_size: The number of results per page.
:type page_size: int

:return: SQL query data.
:rtype: 2-tuple of (SQL statement string, query parameter tuple)
"""
def get_table_joins(tables):
joins = [
("INNER", "code", "codelet_code_id", "code_id"),
("LEFT", "authors", "author_codelet", "codelet_id"),
("LEFT", "symbols", "symbol_code", "code_id"),
("LEFT", "symbol_locations", "sloc_symbol", "symbol_id")
]
tmpl = "%s JOIN %s ON %s = %s"
for args in joins:
if args[1] in tables:
yield tmpl % args

tables = set()
cond, arglist, ranks, need_ranks = self._root.parameterize(tables)
ranks = ranks or [cond]
if need_ranks:
score = " + ((%s) / %d)" % (" + ".join(ranks), len(ranks))
else:
score = ""
joins = " ".join(get_table_joins(tables))
offset = (page - 1) * page_size

query = QUERY_TEMPLATE % (score, joins, cond, page_size, offset)
return query, tuple(arglist * 2 if need_ranks else arglist)

+ 177
- 0
docs/Makefile View File

@@ -0,0 +1,177 @@
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = build

# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif

# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source

.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext

help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"

clean:
rm -rf $(BUILDDIR)/*

html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."

json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."

htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."

qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bitshift.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bitshift.qhc"

devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/bitshift"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bitshift"
@echo "# devhelp"

epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."

latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."

latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."

man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."

info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."

linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."

doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."

xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."

pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

+ 27
- 0
docs/source/api/bitshift.crawler.rst View File

@@ -0,0 +1,27 @@
crawler Package
===============

:mod:`crawler` Package
----------------------

.. automodule:: bitshift.crawler
:members:
:undoc-members:
:show-inheritance:

:mod:`crawler` Module
---------------------

.. automodule:: bitshift.crawler.crawler
:members:
:undoc-members:
:show-inheritance:

:mod:`indexer` Module
---------------------

.. automodule:: bitshift.crawler.indexer
:members:
:undoc-members:
:show-inheritance:


+ 19
- 0
docs/source/api/bitshift.database.rst View File

@@ -0,0 +1,19 @@
database Package
================

:mod:`database` Package
-----------------------

.. automodule:: bitshift.database
:members:
:undoc-members:
:show-inheritance:

:mod:`migration` Module
-----------------------

.. automodule:: bitshift.database.migration
:members:
:undoc-members:
:show-inheritance:


+ 11
- 0
docs/source/api/bitshift.query.rst View File

@@ -0,0 +1,11 @@
query Package
=============

:mod:`query` Package
--------------------

.. automodule:: bitshift.query
:members:
:undoc-members:
:show-inheritance:


+ 45
- 0
docs/source/api/bitshift.rst View File

@@ -0,0 +1,45 @@
bitshift Package
================

:mod:`bitshift` Package
-----------------------

.. automodule:: bitshift.__init__
:members:
:undoc-members:
:show-inheritance:

:mod:`assets` Module
--------------------

.. automodule:: bitshift.assets
:members:
:undoc-members:
:show-inheritance:

:mod:`codelet` Module
---------------------

.. automodule:: bitshift.codelet
:members:
:undoc-members:
:show-inheritance:

:mod:`config` Module
--------------------

.. automodule:: bitshift.config
:members:
:undoc-members:
:show-inheritance:

Subpackages
-----------

.. toctree::

bitshift.crawler
bitshift.database
bitshift.parser
bitshift.query


+ 7
- 0
docs/source/api/modules.rst View File

@@ -0,0 +1,7 @@
bitshift
========

.. toctree::
:maxdepth: 4

bitshift

+ 268
- 0
docs/source/conf.py View File

@@ -0,0 +1,268 @@
# -*- coding: utf-8 -*-
#
# bitshift documentation build configuration file, created by
# sphinx-quickstart on Mon Apr 7 21:09:45 2014.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

import sys
import os

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('../..'))

# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.intersphinx',
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx.ext.viewcode',
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix of source filenames.
source_suffix = '.rst'

# The encoding of source files.
#source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = u'bitshift'
copyright = u'2014, Benjamin Attal, Ben Kurtovic, Severyn Kozak'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1.dev'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []

# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None

# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True

# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []

# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'nature'

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []

# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None

# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None

# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None

# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []

# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'

# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True

# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}

# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}

# If false, no module index is generated.
#html_domain_indices = True

# If false, no index is generated.
#html_use_index = True

# If true, the index is split into individual pages for each letter.
#html_split_index = False

# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True

# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True

# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True

# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''

# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None

# Output file base name for HTML help builder.
htmlhelp_basename = 'bitshiftdoc'


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#'preamble': '',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'bitshift.tex', u'bitshift Documentation',
u'Benjamin Attal, Ben Kurtovic, Severyn Kozak', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None

# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False

# If true, show page references after internal links.
#latex_show_pagerefs = False

# If true, show URL addresses after external links.
#latex_show_urls = False

# Documents to append as an appendix to all manuals.
#latex_appendices = []

# If false, no module index is generated.
#latex_domain_indices = True


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'bitshift', u'bitshift Documentation',
[u'Benjamin Attal, Ben Kurtovic, Severyn Kozak'], 1)
]

# If true, show URL addresses after external links.
#man_show_urls = False


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'bitshift', u'bitshift Documentation',
u'Benjamin Attal, Ben Kurtovic, Severyn Kozak', 'bitshift', 'One line description of project.',
'Miscellaneous'),
]

# Documents to append as an appendix to all manuals.
#texinfo_appendices = []

# If false, no module index is generated.
#texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'

# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False


# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'http://docs.python.org/': None}

+ 20
- 0
docs/source/index.rst View File

@@ -0,0 +1,20 @@
bitshift
========

**bitshift** is a semantic search engine for source code.

Contents:

.. toctree::
:maxdepth: 2

API Reference <api/modules>


Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


+ 9
- 0
gunicorn.cfg View File

@@ -0,0 +1,9 @@
# Configuration file for Gunicorn
# http://docs.gunicorn.org/en/latest/configure.html

bind = ["unix:/tmp/gunicorn.sock"]
workers = 4

accesslog = "logs/access.log"
errorlog = "logs/error.log"
loglevel = "info"

+ 72
- 0
parsers/java/pom.xml View File

@@ -0,0 +1,72 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.bitshift.parsing</groupId>
<artifactId>parsing</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>parsing</name>
<url>http://maven.apache.org</url>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
<dependency>
<groupId>org.eclipse.jdt</groupId>
<artifactId>org.eclipse.jdt.core</artifactId>
<version>3.7.1</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>17.0</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.2.1</version>
<configuration>
<mainClass>com.bitshift.parsing.Parse</mainClass>
<arguments>
</arguments>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>com.bitshift.parsing.Parse</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<outputDirectory>${project.basedir}</outputDirectory>
<finalName>${project.artifactId}</finalName>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>

+ 35
- 0
parsers/java/src/main/java/com/bitshift/parsing/Parse.java View File

@@ -0,0 +1,35 @@
package com.bitshift.parsing;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

import com.bitshift.parsing.parsers.JavaParser;

public class Parse {

public static void main(String[] args) {
try {
BufferedReader br = new BufferedReader(
new InputStreamReader(System.in));

String str = "";
StringBuilder source = new StringBuilder();
while ((str = br.readLine()) != null) {
source.append(str + "\n");
}

String symbols = (new JavaParser(source.toString())).parse();
BufferedWriter bw = new BufferedWriter(
new OutputStreamWriter(System.out));

bw.write(symbols);
bw.flush();
} catch (IOException e) {

}
}

}

+ 214
- 0
parsers/java/src/main/java/com/bitshift/parsing/parsers/JavaParser.java View File

@@ -0,0 +1,214 @@
package com.bitshift.parsing.parsers;

import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.Stack;
import java.util.Arrays;

import com.google.common.base.Joiner;

import org.eclipse.jdt.core.JavaCore;
import org.eclipse.jdt.core.dom.AST;
import org.eclipse.jdt.core.dom.ASTNode;
import org.eclipse.jdt.core.dom.ASTParser;
import org.eclipse.jdt.core.dom.ASTVisitor;
import org.eclipse.jdt.core.dom.CompilationUnit;
import org.eclipse.jdt.core.dom.ClassInstanceCreation;
import org.eclipse.jdt.core.dom.ImportDeclaration;
import org.eclipse.jdt.core.dom.MethodDeclaration;
import org.eclipse.jdt.core.dom.MethodInvocation;
import org.eclipse.jdt.core.dom.Name;
import org.eclipse.jdt.core.dom.PackageDeclaration;
import org.eclipse.jdt.core.dom.QualifiedName;
import org.eclipse.jdt.core.dom.SimpleName;
import org.eclipse.jdt.core.dom.Statement;
import org.eclipse.jdt.core.dom.TypeDeclaration;
import org.eclipse.jdt.core.dom.VariableDeclarationFragment;

import com.bitshift.parsing.symbols.Symbols;
import com.bitshift.parsing.symbols.JavaSymbols;

/*TODO: Work on parsing partial java code.*/
public class JavaParser {
private String source;

public JavaParser(String source) {
this.source = source;
}

private Symbols genSymbols() {
ASTParser parser = ASTParser.newParser(AST.JLS3);
parser.setSource(this.source.toCharArray());

Map options = JavaCore.getOptions();
parser.setCompilerOptions(options);

CompilationUnit root = (CompilationUnit) parser.createAST(null);

NodeVisitor visitor = new NodeVisitor(root);
root.accept(visitor);

return visitor.symbols;
}

public String parse() {
JavaSymbols symbols = (JavaSymbols) this.genSymbols();
return symbols.toString();
}

class NodeVisitor extends ASTVisitor {

protected CompilationUnit root;
protected JavaSymbols symbols;
private Stack<HashMap<String, Object>> _cache;

public NodeVisitor(CompilationUnit root) {
this.root = root;
this.symbols = new JavaSymbols();
this._cache = new Stack<HashMap<String, Object>>();
}

public ArrayList<Integer> blockPosition(ASTNode node) {
int sl = this.root.getLineNumber(node.getStartPosition());
int sc = this.root.getColumnNumber(node.getStartPosition()) + 1;
int el = this.root.getLineNumber(node.getStartPosition()
+ node.getLength() - 1);
int ec = this.root.getColumnNumber(node.getStartPosition()
+ node.getLength() - 1) + 1;

return Symbols.createCoord(sl, sc, el, ec);
}

public boolean visit(MethodDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();
Name nameObj = node.getName();
String name = nameObj.isQualifiedName() ?
((QualifiedName) nameObj).getFullyQualifiedName() :
((SimpleName) nameObj).getIdentifier();

data.put("coord", this.blockPosition(node));
data.put("name", name);
this._cache.push(data);
return true;
}

public void endVisit(MethodDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertMethodDeclaration("\"" + name + "\"", data);
}

public boolean visit(MethodInvocation node) {
HashMap<String, Object> data = new HashMap<String, Object>();
Name nameObj = node.getName();
String name = nameObj.isQualifiedName() ?
((QualifiedName) nameObj).getFullyQualifiedName() :
((SimpleName) nameObj).getIdentifier();

data.put("coord", this.blockPosition(node));
data.put("name", name);
this._cache.push(data);
return true;
}

public void endVisit(MethodInvocation node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertMethodInvocation("\"" + name + "\"", data);
}

public boolean visit(PackageDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();
this._cache.push(data);
return true;
}

public void endVisit(PackageDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.setPackage(name);
}

public boolean visit(TypeDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();

data.put("coord", this.blockPosition(node));
this._cache.push(data);
return true;
}

public void endVisit(TypeDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");

if (node.isInterface()) {
this.symbols.insertInterfaceDeclaration("\"" + name + "\"", data);
} else {
this.symbols.insertClassDeclaration("\"" + name + "\"", data);
}
}

public boolean visit(VariableDeclarationFragment node) {
HashMap<String, Object> data = new HashMap<String, Object>();

data.put("coord", this.blockPosition(node));
this._cache.push(data);
return true;
}

public void endVisit(VariableDeclarationFragment node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
this.symbols.insertVariableDeclaration("\"" + name + "\"", data);
}

public boolean visit(QualifiedName node) {
if (!this._cache.empty()) {
HashMap<String, Object> data = this._cache.pop();

if(!data.containsKey("name")) {
String name = node.getFullyQualifiedName();
data.put("name", name);
}

this._cache.push(data);
}
return true;
}

public boolean visit(SimpleName node) {
if (!this._cache.empty()) {
HashMap<String, Object> data = this._cache.pop();

if(!data.containsKey("name")) {
String name = node.getIdentifier();
data.put("name", name);
}

this._cache.push(data);
}
return true;
}

public boolean visit(ImportDeclaration node) {
HashMap<String, Object> data = new HashMap<String, Object>();

data.put("coord", this.blockPosition(node));
this._cache.push(data);
return true;
}

public void endVisit(ImportDeclaration node) {
HashMap<String, Object> data = this._cache.pop();
String name = (String)data.remove("name");
String[] parts = name.split("\\.");

for(int i = parts.length; i > 1; i--) {
String pkg = Joiner.on(".").join(Arrays.copyOfRange(parts, 0, i));
this.symbols.insertImportStatement("\"" + pkg + "\"", data);
}
}
}
}

+ 71
- 0
parsers/java/src/main/java/com/bitshift/parsing/parsers/Parser.java View File

@@ -0,0 +1,71 @@
package com.bitshift.parsing.parsers;

import java.util.Formatter;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.IOException;

import java.nio.ByteBuffer;

import java.net.Socket;

import com.bitshift.parsing.symbols.Symbols;

public abstract class Parser implements Runnable {

protected Socket clientSocket;
private String eos;

public Parser(Socket clientSocket) {
this.clientSocket = clientSocket;
}

protected String readFromClient() {
String fromClient = "";

try {
BufferedReader clientReader = new BufferedReader(
new InputStreamReader(this.clientSocket.getInputStream()));

int bytes = Integer.parseInt(clientReader.readLine());
this.eos = clientReader.readLine();

StringBuilder builder = new StringBuilder();
int i = 0;

while(i < bytes) {
char aux = (char)clientReader.read();
builder.append(aux);
i++;
}

fromClient = builder.toString();

} catch (IOException ex) {
}

return fromClient;
}

protected void writeToClient(String toClient) {
try {
BufferedWriter clientWriter = new BufferedWriter(
new OutputStreamWriter(this.clientSocket.getOutputStream()));

clientWriter.write(toClient);
clientWriter.write(eos);
clientWriter.flush();
this.clientSocket.close();
} catch (IOException ex) {
}
}

protected abstract Symbols genSymbols();

public abstract void run();

}


+ 177
- 0
parsers/java/src/main/java/com/bitshift/parsing/symbols/JavaSymbols.java View File

@@ -0,0 +1,177 @@
package com.bitshift.parsing.symbols;

import java.util.HashMap;
import java.util.ArrayList;
import com.bitshift.parsing.symbols.Symbols;

/*TODO: Overwrite toString.*/
public class JavaSymbols extends Symbols {

private String _packageName;
private HashMap<String, HashMap<String, Object>> _classes;
private HashMap<String, HashMap<String, Object>> _interfaces;
private HashMap<String, HashMap<String, Object>> _methods;
private HashMap<String, HashMap<String, Object>> _vars;
private HashMap<String, HashMap<String, Object>> _imports;

private final String assignKey = "\"assignments\"";
private final String useKey = "\"uses\"";

public JavaSymbols() {
_packageName = null;
_classes = new HashMap<String, HashMap<String, Object>>();
_interfaces = new HashMap<String, HashMap<String, Object>>();
_methods = new HashMap<String, HashMap<String, Object>>();
_vars = new HashMap<String, HashMap<String, Object>>();
_imports = new HashMap<String, HashMap<String, Object>>();
}

public boolean setPackage(String name) {
_packageName = name;
return true;
}

public boolean insertClassDeclaration(String name, HashMap<String, Object> data) {
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);
HashMap<String, Object> klass = new HashMap<String, Object>();

assignments.add(data.get("coord"));
klass.put(assignKey, assignments);
klass.put(useKey, uses);

this._classes.put(name, klass);
return true;
}

public boolean insertInterfaceDeclaration(String name, HashMap<String, Object> data) {
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);
HashMap<String, Object> klass = new HashMap<String, Object>();

assignments.add(data.get("coord"));
klass.put(assignKey, assignments);
klass.put(useKey, uses);

this._interfaces.put(name, klass);
return true;
}

public boolean insertMethodDeclaration(String name, HashMap<String, Object> data) {
HashMap<String, Object> method = this._methods.get(name);
if (method == null) {
method = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

assignments.add(data.get("coord"));
method.put(assignKey, assignments);
method.put(useKey, uses);
} else {
ArrayList<Object> assignments = (ArrayList<Object>)method.get(assignKey);

assignments.add(data.get("coord"));
method.put(assignKey, assignments);
}

this._methods.put(name, method);
return true;
}
public boolean insertMethodInvocation(String name, HashMap<String, Object> data) {
HashMap<String, Object> method = this._methods.get(name);
if (method == null) {
method = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

uses.add(data.get("coord"));
method.put(assignKey, assignments);
method.put(useKey, uses);
} else {
ArrayList<Object> uses = (ArrayList<Object>)method.get(useKey);

uses.add(data.get("coord"));
method.put(useKey, uses);
}

this._methods.put(name, method);
return true;
}

public boolean insertVariableDeclaration(String name, HashMap<String, Object> data) {
HashMap<String, Object> var = this._vars.get(name);
if (var == null) {
var = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

assignments.add(data.get("coord"));
var.put(assignKey, assignments);
var.put(useKey, uses);
} else {
ArrayList<Object> assignments = (ArrayList<Object>)var.get(assignKey);

assignments.add(data.get("coord"));
var.put(assignKey, assignments);
}

this._vars.put(name, var);
return true;
}

public boolean insertVariableAccess(String name, HashMap<String, Object> data) {
HashMap<String, Object> var = this._vars.get(name);
if (var == null) {
var = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

uses.add(data.get("coord"));
var.put(assignKey, assignments);
var.put(useKey, uses);
} else {
ArrayList<Object> uses = (ArrayList<Object>)var.get(useKey);

uses.add(data.get("coord"));
var.put(useKey, uses);
}

this._vars.put(name, var);
return true;
}

public boolean insertImportStatement(String name, HashMap<String, Object> data) {
HashMap<String, Object> lib = this._imports.get(name);
if (lib == null) {
lib = new HashMap<String, Object>();
ArrayList<Object> assignments = new ArrayList<Object>(10);
ArrayList<Object> uses = new ArrayList<Object>(10);

uses.add(data.get("coord"));
lib.put(assignKey, assignments);
lib.put(useKey, uses);
} else {
ArrayList<Object> uses = (ArrayList<Object>)lib.get(useKey);

uses.add(data.get("coord"));
lib.put(useKey, uses);
}

this._imports.put(name, lib);
return true;
}

public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("\"classes\":" + this._classes + ",");
builder.append("\"interfaces\":" + this._interfaces + ",");
builder.append("\"functions\":" + this._methods + ",");
builder.append("\"vars\":" + this._vars + ",");
builder.append("\"imports\":" + this._imports + ",");

String s = builder.toString().replaceAll("=", ":");
s = s.substring(0, s.length() - 1);
return "{" + s + "}";
}
}


+ 17
- 0
parsers/java/src/main/java/com/bitshift/parsing/symbols/Symbols.java View File

@@ -0,0 +1,17 @@
package com.bitshift.parsing.symbols;

import java.util.ArrayList;

public abstract class Symbols {

public Symbols() {

}

public static ArrayList<Integer> createCoord(Integer startLine, Integer startCol, Integer endLine, Integer endCol) {
ArrayList<Integer> coord = new ArrayList<Integer>(4);
coord.add(startLine); coord.add(startCol); coord.add(endLine); coord.add(endCol);
return coord;
}

}

+ 4
- 0
parsers/ruby/Gemfile View File

@@ -0,0 +1,4 @@
source 'https://rubygems.org'

gem 'ruby_parser'
gem 'sexp_processor'

+ 6
- 0
parsers/ruby/Rakefile View File

@@ -0,0 +1,6 @@
require 'pp'
require File.expand_path('../lib/parser.rb', __FILE__)

task :parse do |t|
parse
end

+ 137
- 0
parsers/ruby/lib/parser.rb View File

@@ -0,0 +1,137 @@
require 'ripper'

def parse
source = STDIN.read
walker = TreeWalker.new(source)
walker.parse
puts walker.to_s
end

class TreeWalker < Ripper::SexpBuilder
attr_accessor :symbols

def initialize(source)
ns_hash = Hash.new {
|hash, key|
hash[key] = {
:assignments => [], :uses => []
}
}
class_hash = ns_hash.clone
function_hash = ns_hash.clone
var_hash = ns_hash.clone

@symbols = {
:namespaces => ns_hash,
:classes => class_hash,
:functions => function_hash,
:vars => var_hash
}

super(source)
end

def block_position(node)
last_node = node[0]
while last_node.is_a? Array
sp = last_node
while not (last_el = last_node[last_node.count - 1]) or
(last_el.is_a? Array and last_el[last_el.count - 1].nil?)
last_node = last_node[0..last_node.count - 2]
end
last_node = last_el
end

last_node = node[0]
while last_node.is_a? Array
ep = last_node
while not (last_el = last_node[last_node.count - 1]) or
(last_el.is_a? Array and last_el[last_el.count - 1].nil?)
last_node = last_node[0..last_node.count - 2]
end
last_node = last_el
end

if sp == ep
return sp + [sp[0], -1]
end
return sp + ep
end

def on_module(*node)
pos = block_position(node)
name = node[0][1][1]
symbols[:namespaces][name][:assignments] << pos
return node
end

def on_class(*node)
pos = block_position(node)
name = node[0][1][1]
symbols[:classes][name][:assignments] << pos
return node
end

def on_def(*node)
pos = block_position(node)
name = node[0][1]
symbols[:functions][name][:assignments] << pos
return node
end

def on_call(*node)
pos = block_position(node)
name = node[node.count - 1][1]
symbols[:functions][name][:uses] << pos
return node
end

def on_vcall(*node)
pos = block_position(node)
name = node[0][1]
symbols[:functions][name][:uses] << pos
return node
end

def on_assign(*node)
pos = block_position(node)
return node if not node[0][0].is_a? Array
name = node[0][0][1]
symbols[:vars][name][:assignments] << pos
return node
end

def on_var_field(*node)
pos = block_position(node)
name = node[0][1]
symbols[:vars][name][:uses] << pos
return node
end

def on_var_ref(*node)
pos = block_position(node)
name = node[0][1]
symbols[:vars][name][:uses] << pos
return node
end

def on_command(*node)
# catch require statements
end

def to_s
new_symbols = Hash.new {|hash, key| hash[key] = Hash.new}

symbols.each do |type, sym_list|
sym_list.each do |name, sym|
new_symbols[type.to_s][name.to_s] = {
"assignments" => sym[:assignments],
"uses" => sym[:uses]}
end
end

str = new_symbols.to_s
str = str.gsub(/=>/, ":")
return str
end
end

+ 14
- 0
setup.py View File

@@ -0,0 +1,14 @@
from setuptools import setup, find_packages

setup(
name = "bitshift",
version = "0.1.dev",
packages = find_packages(),
install_requires = [
"Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0",
"GitPython>=0.3.2.RC1", "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1",
"mmh3>=2.3", "PyYAML>=3.11", "python-dateutil>=2.2", "cchardet>=0.3.5"],
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
license = "MIT",
url = "https://github.com/earwig/bitshift"
)

+ 65
- 0
static/css/lib/github.css View File

@@ -0,0 +1,65 @@
td.linenos { background: rgba(65,131,196,0.05); padding-right: 10px; border-right: 1px solid #bbb; }
span.lineno { background: rgba(65,131,196,0.05); padding: 0 5px 0 5px; }
pre { line-height: 125% }
.highlighttable { background-color: #fff; padding-left: 10px; width: inherit; height: inherit; }
.hll { display: block }
.c { color: #999988; font-style: italic } /* Comment */
.err { color: #a61717; background-color: #e3d2d2 } /* Error */
.k { color: #000000; font-weight: bold } /* Keyword */
.o { color: #000000; font-weight: bold } /* Operator */
.cm { color: #999988; font-style: italic } /* Comment.Multiline */
.cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */
.c1 { color: #999988; font-style: italic } /* Comment.Single */
.cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
.gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.ge { color: #000000; font-style: italic } /* Generic.Emph */
.gr { color: #aa0000 } /* Generic.Error */
.gh { color: #999999 } /* Generic.Heading */
.gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.go { color: #888888 } /* Generic.Output */
.gp { color: #555555 } /* Generic.Prompt */
.gs { font-weight: bold } /* Generic.Strong */
.gu { color: #aaaaaa } /* Generic.Subheading */
.gt { color: #aa0000 } /* Generic.Traceback */
.kc { color: #000000; font-weight: bold } /* Keyword.Constant */
.kd { color: #000000; font-weight: bold } /* Keyword.Declaration */
.kn { color: #000000; font-weight: bold } /* Keyword.Namespace */
.kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */
.kr { color: #000000; font-weight: bold } /* Keyword.Reserved */
.kt { color: #445588; font-weight: bold } /* Keyword.Type */
.m { color: #009999 } /* Literal.Number */
.s { color: #d01040 } /* Literal.String */
.na { color: #008080 } /* Name.Attribute */
.nb { color: #0086B3 } /* Name.Builtin */
.nc { color: #445588; font-weight: bold } /* Name.Class */
.no { color: #008080 } /* Name.Constant */
.nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */
.ni { color: #800080 } /* Name.Entity */
.ne { color: #990000; font-weight: bold } /* Name.Exception */
.nf { color: #990000; font-weight: bold } /* Name.Function */
.nl { color: #990000; font-weight: bold } /* Name.Label */
.nn { color: #555555 } /* Name.Namespace */
.nt { color: #000080 } /* Name.Tag */
.nv { color: #008080 } /* Name.Variable */
.ow { color: #000000; font-weight: bold } /* Operator.Word */
.w { color: #bbbbbb } /* Text.Whitespace */
.mf { color: #009999 } /* Literal.Number.Float */
.mh { color: #009999 } /* Literal.Number.Hex */
.mi { color: #009999 } /* Literal.Number.Integer */
.mo { color: #009999 } /* Literal.Number.Oct */
.sb { color: #d01040 } /* Literal.String.Backtick */
.sc { color: #d01040 } /* Literal.String.Char */
.sd { color: #d01040 } /* Literal.String.Doc */
.s2 { color: #d01040 } /* Literal.String.Double */
.se { color: #d01040 } /* Literal.String.Escape */
.sh { color: #d01040 } /* Literal.String.Heredoc */
.si { color: #d01040 } /* Literal.String.Interpol */
.sx { color: #d01040 } /* Literal.String.Other */
.sr { color: #009926 } /* Literal.String.Regex */
.s1 { color: #d01040 } /* Literal.String.Single */
.ss { color: #990073 } /* Literal.String.Symbol */
.bp { color: #999999 } /* Name.Builtin.Pseudo */
.vc { color: #008080 } /* Name.Variable.Class */
.vg { color: #008080 } /* Name.Variable.Global */
.vi { color: #008080 } /* Name.Variable.Instance */
.il { color: #009999 } /* Literal.Number.Integer.Long */

+ 64
- 0
static/css/lib/highlight.css View File

@@ -0,0 +1,64 @@
td.linenos { background-color: #f0f0f0; padding-right: 10px; }
span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
pre { line-height: 125% }
.highlighttable { background-color: #49483e; width: inherit; height: inherit; }
.hll { display: block }
{ background: #272822; color: #f8f8f2 }
.c { color: #75715e } /* Comment */
.err { color: #960050; background-color: #1e0010 } /* Error */
.k { color: #66d9ef } /* Keyword */
.l { color: #ae81ff } /* Literal */
.n { color: #f8f8f2 } /* Name */
.o { color: #f92672 } /* Operator */
.p { color: #f8f8f2 } /* Punctuation */
.cm { color: #75715e } /* Comment.Multiline */
.cp { color: #75715e } /* Comment.Preproc */
.c1 { color: #75715e } /* Comment.Single */
.cs { color: #75715e } /* Comment.Special */
.ge { font-style: italic } /* Generic.Emph */
.gs { font-weight: bold } /* Generic.Strong */
.kc { color: #66d9ef } /* Keyword.Constant */
.kd { color: #66d9ef } /* Keyword.Declaration */
.kn { color: #f92672 } /* Keyword.Namespace */
.kp { color: #66d9ef } /* Keyword.Pseudo */
.kr { color: #66d9ef } /* Keyword.Reserved */
.kt { color: #66d9ef } /* Keyword.Type */
.ld { color: #e6db74 } /* Literal.Date */
.m { color: #ae81ff } /* Literal.Number */
.s { color: #e6db74 } /* Literal.String */
.na { color: #a6e22e } /* Name.Attribute */
.nb { color: #f8f8f2 } /* Name.Builtin */
.nc { color: #a6e22e } /* Name.Class */
.no { color: #66d9ef } /* Name.Constant */
.nd { color: #a6e22e } /* Name.Decorator */
.ni { color: #f8f8f2 } /* Name.Entity */
.ne { color: #a6e22e } /* Name.Exception */
.nf { color: #a6e22e } /* Name.Function */
.nl { color: #f8f8f2 } /* Name.Label */
.nn { color: #f8f8f2 } /* Name.Namespace */
.nx { color: #a6e22e } /* Name.Other */
.py { color: #f8f8f2 } /* Name.Property */
.nt { color: #f92672 } /* Name.Tag */
.nv { color: #f8f8f2 } /* Name.Variable */
.ow { color: #f92672 } /* Operator.Word */
.w { color: #f8f8f2 } /* Text.Whitespace */
.mf { color: #ae81ff } /* Literal.Number.Float */
.mh { color: #ae81ff } /* Literal.Number.Hex */
.mi { color: #ae81ff } /* Literal.Number.Integer */
.mo { color: #ae81ff } /* Literal.Number.Oct */
.sb { color: #e6db74 } /* Literal.String.Backtick */
.sc { color: #e6db74 } /* Literal.String.Char */
.sd { color: #e6db74 } /* Literal.String.Doc */
.s2 { color: #e6db74 } /* Literal.String.Double */
.se { color: #ae81ff } /* Literal.String.Escape */
.sh { color: #e6db74 } /* Literal.String.Heredoc */
.si { color: #e6db74 } /* Literal.String.Interpol */
.sx { color: #e6db74 } /* Literal.String.Other */
.sr { color: #e6db74 } /* Literal.String.Regex */
.s1 { color: #e6db74 } /* Literal.String.Single */
.ss { color: #e6db74 } /* Literal.String.Symbol */
.bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.vc { color: #f8f8f2 } /* Name.Variable.Class */
.vg { color: #f8f8f2 } /* Name.Variable.Global */
.vi { color: #f8f8f2 } /* Name.Variable.Instance */
.il { color: #ae81ff } /* Literal.Number.Integer.Long */

BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


+ 7
- 0
static/css/lib/jqueryui.custom.min.css
File diff suppressed because it is too large
View File


+ 0
- 4
static/css/main.css View File

@@ -1,4 +0,0 @@
/* Global project stylesheet.
*/
p {
font-size: 1.5em; }

BIN
View File


BIN
View File


+ 1
- 0
static/google10335120a3066831.html View File

@@ -0,0 +1 @@
google-site-verification: google10335120a3066831.html

BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


BIN
View File


+ 19
- 0
static/js/about.js View File

@@ -0,0 +1,19 @@
/*
* @file Implements a parallax effect on the about page.
*/

var lastVertPos = $(window).scrollTop();

/*
* Scroll `div#img-[1-4]` at a greater speed than the text, producing a
* parallax effect.
*/
$(window).scroll(function(e){
var currVertPos = $(window).scrollTop();
var delta = currVertPos - lastVertPos;
$(".bg").each(function(){
$(this).css("top", parseFloat($(this).css("top")) -
delta * $(this).attr("speed") + "px");
});
lastVertPos = currVertPos;
});

+ 175
- 0
static/js/index.advanced-search-form.js View File

@@ -0,0 +1,175 @@
/*
* @file Manages all advanced search form logic.
*/

var searchGroups = $("div#search-groups");

/*
* Load all advanced search form libraries.
*/
function loadInputFieldWidgets(){
$(".search-group input#date-last-modified").datepicker();
$(".search-group input#date-created").datepicker();
$(".search-group input#autocomplete").autocomplete({
source: function(request, response){
var matcher = new RegExp(
$.ui.autocomplete.escapeRegex(request.term), "i");
response($.grep(AUTOCOMPLETE_LANGUAGES, function(item){
return matcher.test(item);
}));
}
});
};
loadInputFieldWidgets();

/*
* Set all advanced search form button callbacks.
*/
(function setSearchFormCallbacks(){
// Create a new search group, and update the `#sidebar` checklist.
$("button#add-group").click(function(){
$("div#sidebar input[type=checkbox]").prop("checked", false);

searchGroups.children("#selected").removeAttr("id");
var searchGroup = $("<div/>", {
class : "search-group",
id : "selected"
});
searchGroups.append(
searchGroup.append(createSearchGroupInput("language", "languages")));
loadInputFieldWidgets();
$("div#sidebar input[type=checkbox]#language").prop("checked", true);

searchGroups[0].scrollTop = searchGroups[0].scrollHeight;
});

// Remove the currently selected group if it's not the only one, and mark
// one of its siblings as selected.
$("button#remove-group").click(function(){
var currentGroup = $("div.search-group#selected");

if($("div.search-group").length == 1)
return;
else {
var nextGroup = currentGroup.prev();
if(nextGroup.size() == 0)
nextGroup = currentGroup.next();
}
currentGroup.remove();
nextGroup.click();
});

// Select a search group, and update the `#sidebar` checklist accordingly.
$(document).on("click", "div.search-group", function(){
searchGroups.children("#selected").removeAttr("id");
$(this).attr("id", "selected");
$("div#sidebar input[type=checkbox]").prop("checked", false);
$(this).find("input[type=text]").each(function(){
var checkBoxSelector = "div#sidebar input[type=checkbox]";
$(checkBoxSelector + "#" + $(this).attr("class").split(" ")[0]).
prop("checked", true);
})
});

// Toggle the presence of an input field.
$("div#sidebar input[type=checkbox]").click(function(){
var fieldId = $(this).prop("id");
if($(this).is(":checked")){
$("div.search-group#selected").append(
$.parseHTML(createSearchGroupInput(
fieldId, $(this).next("label").children("div").
text())));
loadInputFieldWidgets();
if(fieldId.slice(0, 4) == "date")
$(".search-group#selected ." + fieldId).datepicker();
}
else {
if($(".search-group#selected").children("div").length > 1)
$(".search-group#selected #" + fieldId).remove()
else
$(this).prop("checked", true);
}
searchGroups[0].scrollTop = searchGroups[0].scrollHeight;
});

$("div#advanced-search button#submit").click(function(){
$("div#advanced-search").hide();
advancedSearchButton.removeClass("clicked");
assembleQuery();
populateResults();
})

var previousAdvancedQuery = "";
var searchBar = $("form#search-bar input[name=query]");

window.setInterval(function(){
var currentQuery = assembleQuery();
if(currentQuery != previousAdvancedQuery){
previousAdvancedQuery = currentQuery;
searchBar.val(assembleQuery());
}
}, 1e3 / 15);
}());

/*
* Return an HTML string representing a new input field div in a search group.
*
* @param fieldId The id of the input field div, and its child elements.
* @param name The name to display next to the input field.
*/
function createSearchGroupInput(fieldId, name){
var fieldHTML = [
"<div id='" + fieldId + "'>",
"<div class='name'>" + name + "</div>",
"<input class='" + fieldId + "' name='" + fieldId + "'type='text'>",
"<input type='checkbox' name='regex'>",
"<span class='regex'>Regex</span>",
"</div>"
]

if(fieldId == "language")
fieldHTML[2] = [
"<input id='autocomplete' class='language'",
"name='language' type='text'>"
].join(" ");

return fieldHTML.join("");
}

/*
* Create a query from advanced-search groups.
*/
function assembleQuery(){
var groups = searchGroups.children(".search-group");
var groupQueries = [];

for(var group = 0; group < groups.length; group++){
var inputFields = groups[group].querySelectorAll("input[type=text]");
var regexCheckbox = groups[group].querySelectorAll("input[name=regex]");
var groupQuery = [];

for(var field = 0; field < inputFields.length; field++)
if(inputFields[field].value.length > 0)
groupQuery.push(genFieldQueryString(
inputFields[field], regexCheckbox[field].checked));

if(groupQuery.length > 0)
groupQueries.push(groupQuery.join(" AND "));
}

return groupQueries.join(" OR ");
}

/*
* Generate a processed query string for an input field's value.
*
* @param field (DOM element) An `input[type=text]` element.
* @param hasRegex (boolean) Whether or not the field's value has regex.
*
* @return The processed query.
*/
function genFieldQueryString(field, hasRegex){
var terms = field.value.replace(/\\/g, "\\\\").replace(/\"/g, "\\\"");
var query = field.getAttribute("name") + ":" + (hasRegex?"re:":"") + terms;
return '"' + query + '"';
}

+ 447
- 0
static/js/index.js View File

@@ -0,0 +1,447 @@
/*
* @file Manages all library initialization, jQuery callbacks, query entry
* callbacks, server querying, and results diplay for `index.html`.
*/

var advancedSearchDiv = $("div#advanced-search");
var advancedSearchButton = $("button#advanced-search");
FINISH_TYPING_INTERVAL = 650;
var searchBar = $("form#search-bar input[type='text']")[0];
var resultsDiv = $("div#results")[0];

var typingTimer, scrollTimer, lastValue;
var searchResultsPage = 1;

/*
* Set all page callbacks.
*/
(function setHomePageCallbabacks(){
var results = $('#results').get(0);

// Enable infinite scrolling down the results page.
$(window).scroll(function(){
if($(window).scrollTop() + $(window).height() == $(document).height() &&
resultsDiv.querySelectorAll(".result").length > 0)
loadMoreResults();

clearTimeout(scrollTimer);
if (!results.classList.contains('disable-hover'))
results.classList.add('disable-hover')

scrollTimer = setTimeout(function(){
if (results.classList.contains('disable-hover'))
results.classList.remove('disable-hover');
}, 200);
});

// Toggle the advanced-search form's visibility.
advancedSearchButton.click(function(){
var searchField = $("div#search-field");
if(!advancedSearchDiv.hasClass("visible")){
searchField.addClass("partly-visible");
advancedSearchDiv.fadeIn(500).addClass("visible");
advancedSearchButton.addClass("clicked");
}
else {
advancedSearchDiv.hide().removeClass("visible");
advancedSearchButton.removeClass("clicked");
if($("div#results .result").length == 0)
searchField.removeClass("partly-visible");
clearResults();
}
});

// Enable capturing the `enter` key.
$("form#search-bar").submit(function(event){
event.preventDefault();
return false;
});
searchBar.onkeyup = typingTimer;
}());

/*
* Set keyboard shortcut mappings.
*/
(function resultsHotkeys(){
/*
* If the currently viewed result is not the first, scroll to the previous
* result.
*/
var previousResult = function(){
var currResult = $(".display-all");
if(currResult.length) {
currResult.removeClass("display-all");
currResult = currResult.closest(".result").prev(".result");
} else {
currResult = $(document.querySelectorAll(".result")[0]);
}

currResult.addClass("display-all");
currResult.each(function(){
$('html,body').stop().animate({
scrollTop: $(this).offset().top - (
$(window).height() - $(this).outerHeight(true)) / 2
}, 140);
});
};

/*
* If the currently viewed result is not the last, scroll to the next
* result.
*/
var nextResult = function(){
var currResult = $(".display-all");
if(currResult.length) {
currResult.removeClass("display-all");
currResult = currResult.closest(".result").next(".result");
} else {
currResult = $(document.querySelectorAll(".result")[0]);
}

currResult.addClass('display-all');
currResult.each(function(){
$('html,body').stop().animate({
scrollTop: $(this).offset().top - (
$(window).height() - $(this).outerHeight(true)) / 2
}, 140);
});
};

var displayHotkeyHelp = function(){
var help = $("div#hotkey-help");
if(help.hasClass("hidden"))
help.fadeIn(420);
else
help.fadeOut(420);

$("div#body").toggleClass("faded");
help.toggleClass("hidden");
}

var hotkeyActions = {
"k" : previousResult,
"j" : nextResult,
"h" : previousSymbolMatch,
"l" : nextSymbolMatch,
"?" : displayHotkeyHelp
};

$(window).keypress(function(key){
for(var hotkey in hotkeyActions){
var keyChar = String.fromCharCode(key.keyCode);
if(keyChar == hotkey &&
!($(key.target).is("textarea") || $(key.target).is("input")))
hotkeyActions[keyChar]();
}
});
}());

// Enable infinite scrolling down the results page.
$(window).scroll(function() {
var searchField = $("div#search-field");
if($(window).scrollTop() + $(window).height() == $(document).height() &&
searchField.hasClass('partly-visible')){
loadMoreResults();
}
});

/*
* Clear the existing timer and set a new one the the user types text into the
* search bar.
*/
function typingTimer(event){
clearTimeout(typingTimer);

var enterKeyCode = 13;
if(event.keyCode != enterKeyCode){
if(lastValue != searchBar.value)
typingTimer = setTimeout(finishedTyping, FINISH_TYPING_INTERVAL);
}
else {
event.preventDefault();
finishedTyping();
return false;
}
};

/*
* Callback which queries the server whenver the user stops typing.
*
* Whenever the user doesn't type for a `FINISH_TYPING_INTERVAL` after having
* entered new text in the search bar, send the current query request to the
* server.
*/
function finishedTyping(){
lastValue = searchBar.value;
var searchField = $("div#search-field");

clearResults();
if(searchBar.value){
searchField.addClass("partly-visible");
populateResults();
}
else {
searchField.removeClass("partly-visible");
$("div#advanced-search").fadeOut(50);
advancedSearchButton.removeClass("clicked");
clearResults();
}
}

/*
* Removes any child elements of `div#results`.
*/
function clearResults(){
while(resultsDiv.firstChild)
resultsDiv.removeChild(resultsDiv.firstChild);
}

/*
* Create a result element based upon a codelet instance.
*
* @return {Element} The result element.
*/
function createResult(codelet) {
var maxAttributeLength = 20;

//Level 1
var newDiv = document.createElement("div"),
table = document.createElement("table"),
row = document.createElement("tr");
//Level 2
var displayInfo = document.createElement("div"),
codeElt = document.createElement("td"),
hiddenInfoContainer = document.createElement("td"),
hiddenInfo = document.createElement("div"),
cycle = document.createElement("div");
//Level 3
var title = document.createElement("span"),
site = document.createElement("span"),
nextMatch = document.createElement("a"),
prevMatch = document.createElement("a"),
dateModified = document.createElement("div"),
language = document.createElement("div"),
dateCreated = document.createElement("div"),
authors = document.createElement("div");

//Classes and ID's
newDiv.classList.add('result');

displayInfo.id = 'display-info';
codeElt.id = 'code';
hiddenInfo.id = 'hidden-info';
cycle.id = 'cycle-matches'

title.id = 'title';
site.id = 'site';
nextMatch.id = 'next-match';
nextMatch.href = '#';
prevMatch.id = 'prev-match';
prevMatch.href = '#';
dateModified.id = 'date-modified';
language.id = 'language';
dateCreated.id = 'date-created';
authors.id = 'authors';

//Add the bulk of the html
title.innerHTML = ' &raquo; <a href="' + codelet.url + '">'
+ codelet.name + '</a>';
site.innerHTML = '<a href="' + codelet.origin[1] + '">' +
codelet.origin[0] +'</a>';
nextMatch.innerHTML = 'next match';
prevMatch.innerHTML = 'prev match';
language.innerHTML = 'Language: <span>' + codelet.lang + '</span>';
dateModified.innerHTML = 'Last modified: <span>' + codelet.modified +
'</span>';
// Needs to be changed from int to string on the server
dateCreated.innerHTML = 'Created: <span>' +
codelet.created.substring(0, maxAttributeLength) + '</span>';

var authorsHtml = 'Authors: <span>';
var currLength = 0;
var authorsList = [];
for(var auth = 0; auth < codelet.authors.length; auth++){
currLength += codelet.authors[auth].length;
if(6 < currLength){
authorsList.push("...");
break;
}
else
authorsList.push('<a href=#>' + codelet.authors[auth] + '</a>');
}
authors.innerHTML = "Authors: <span>" + authorsList.join(" ") + "</span>";

// Needs to be processed on the server
codeElt.innerHTML = '<div id=tablecontainer>' + codelet.code + '</div>';

//Event binding
$(newDiv).on('mousemove', function(e) {
var holdCondition = $('.disable-hover');

if(holdCondition.length == 0) {
$(this).siblings().removeClass('display-all');
$(this).addClass('display-all');
}
});

$(newDiv).on('mouseleave', function(e) {
var holdCondition = $('.disable-hover');

if(holdCondition.length == 0)
$(this).removeClass('display-all');
});

$(nextMatch).click(function(e) {
e.stopPropagation();
e.preventDefault();
nextSymbolMatch();
});

$(prevMatch).click(function(e) {
e.stopPropagation();
e.preventDefault();
previousSymbolMatch();
});

//Finish and append elements to parent elements
hiddenInfo.appendChild(dateCreated);
hiddenInfo.appendChild(dateModified);
hiddenInfo.appendChild(language);
hiddenInfo.appendChild(authors);

hiddenInfoContainer.appendChild(hiddenInfo);

row.appendChild(codeElt);
row.appendChild(hiddenInfoContainer);
table.appendChild(row);

displayInfo.appendChild(site);
displayInfo.appendChild(title);

cycle.appendChild(prevMatch);
cycle.appendChild(nextMatch);

newDiv.appendChild(displayInfo);
newDiv.appendChild(table);

return newDiv;
}

function previousSymbolMatch() {
var currResult = $(".display-all"),
currMatch = currResult.find(".hll.current"),
matches = currResult.find(".hll"),
scrollDiv = currResult.find("#tablecontainer");

if (currMatch.length == 0)
currMatch = matches[0];
else
currMatch.removeClass('current');

var index = matches.index(currMatch.get(0)) - 1;
index = index <= 0 ? matches.length - 1 : index;
var newMatch = $(matches[index]);

scrollDiv.scrollTop(scrollDiv.scrollTop()
- scrollDiv.height() / 2
+ newMatch.position().top + newMatch.height() / 2);

newMatch.effect("highlight", {color: '#FFF'}, 750)
newMatch.addClass('current');
};

function nextSymbolMatch() {
var currResult = $(".display-all"),
currMatch = currResult.find(".hll.current"),
matches = currResult.find(".hll"),
scrollDiv = currResult.find("#tablecontainer");

if (currMatch.length == 0)
currMatch = $(matches[0]);
else
currMatch.removeClass("current");

var index = matches.index(currMatch.get(0)) + 1;
index = index >= matches.length ? 0 : index;
var newMatch = $(matches[index]);

scrollDiv.scrollTop(scrollDiv.scrollTop()
- scrollDiv.height() / 2
+ newMatch.position().top + newMatch.height() / 2);

newMatch.effect("highlight", {color: "#FFF"}, 750)
newMatch.addClass("current");
};

/*
* AJAX the current query string to the server, and return its response.
*
* @return {Array} The server's response in the form of `div.result` DOM
* elements, to fill `div#results`.
*/
function queryServer(){
var queryUrl = document.URL + "search.json?" + $.param({
"q" : searchBar.value,
"p" : searchResultsPage++,
"hl": 1
});

var results = $.Deferred();
$.getJSON(queryUrl, function(result){
var resultDivs = [];
if("error" in result)
insertErrorMessage(result["error"]);
else if(result["results"].length == 0 && searchResultsPage == 2)
insertErrorMessage("No search results.");
else
for(var codelet = 0; codelet < result["results"].length; codelet++)
resultDivs.push(createResult(result["results"][codelet]));
results.resolve(resultDivs);
});

return results;
}

/*
* Query the server with the current search string, and populate `div#results`
* with its response.
*/
function populateResults(){
searchResultsPage = 1;
loadMoreResults();
}

/*
* Query the server for the next results page, and add its codelets to
* `div#results`.
*/
function loadMoreResults(){
queryServer().done(function(results){
for(var result = 0; result < results.length; result++){
var newDiv = results[result];
resultsDiv.appendChild(newDiv);
setTimeout(
(function(divReference){
return function(){
divReference.classList.add("cascade");
};
}(newDiv)),
result * 20);
}
});
}

/*
* Displays a warning message in the UI.
*
* @param msg (str) The message string.
*/
function insertErrorMessage(msg){
var error = $(
[
"<div id='error'><span id='s1'>Error</span> ",
"<span id='s2'>&raquo;</span> </div>"
].join(""));
error.append(msg);
resultsDiv.appendChild(error[0]);
}

+ 7
- 0
static/js/lib/jquery-ui.min.js
File diff suppressed because it is too large
View File


+ 4
- 0
static/js/lib/jquery.min.js
File diff suppressed because it is too large
View File


+ 6
- 0
static/js/main.js View File

@@ -0,0 +1,6 @@
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-51910807-1', 'bitshift.it');
ga('send', 'pageview');

+ 3
- 0
static/robots.txt View File

@@ -0,0 +1,3 @@
User-agent: *
Disallow: /search.json
Sitemap: http://www.bitshift.it/sitemap.xml

+ 18
- 0
static/sass/_logo.sass View File

@@ -0,0 +1,18 @@
a#logo
letter-spacing: 0.3em
text-decoration: none

div#logo
font-size: 400%
padding-bottom: 0.2em
text-align: center

#logo-bit
color: $baseColor1

#logo-angle
color: $baseColor3

#logo-shift
color: $baseColor2
font-style: italic

+ 20
- 2
static/sass/_mixins.sass View File

@@ -1,11 +1,29 @@
/*
Partial to contain all globally-applicable mixins
Partial to contain all globally-applicable mixins.
*/

// add vendor prefixes for the property $property with value $value
// Add vendor prefixes for the property $property with value $value.
@mixin vendor($property, $value)
-webkit-#{$property}: $value
-moz-#{$property}: $value
-ms-#{$property}: $value
-o-#{$property}: $value
#{$property}: $value

// Add portable opacity style.
@mixin opaque($opacity)
@include vendor(opacity, $opacity)
filter: alpha(opacity=$opacity)

@mixin delay($time)
transition-delay: $time
-webkit-transition-delay: $time

.t1
@include vendor(transition, all 0.1s ease-out)

.t2
@include vendor(transition, all 0.2s ease-out)

.t3
@include vendor(transition, all 0.3s ease-out)

+ 12
- 0
static/sass/_variables.sass View File

@@ -0,0 +1,12 @@
/*
Partial to contain all globally-applicable variables.
*/

$baseColor1: #A31F34
$baseColor2: #8A8B8C
$baseColor3: #C2C0BF

$lightGray: #F1F1F1

$lightBlue: #67A0FD
$blue: #3177EB

+ 139
- 0
static/sass/about.sass View File

@@ -0,0 +1,139 @@
/*
Stylesheet for `templates/about.html`
*/

@import mixins
@import variables

$centered-section-min-width: 500px

div.bg
$img-height: 650px

position: fixed
width: 100%
left: 0
z-index: -1

&#img-1
background: url(../img/about/bg1.png) no-repeat
background-size: cover
height: 600px
top: -300px

&#img-2
background: url(../img/about/bg2.png) no-repeat
background-size: cover
height: $img-height + 300
top: 1150px

&#img-3
background: url(../img/about/bg3.png) no-repeat
background-size: cover
height: $img-height + 300
top: 2050px

&#img-4
background: url(../img/about/bg4.png) no-repeat
background-size: cover
height: $img-height + 400
top: 3200px

div.section
background-color: white
border: 1px solid $baseColor2
margin-bottom: 200px
margin-top: 300px
padding-bottom: 80px
padding-top: 20px
overflow: hidden

&#top
margin-top: 0px

div#wrap
width: 100%
position: relative
padding-top: 56.782% // aspect ration, 9 / 16

iframe#vimeo
border: 1px solid $baseColor3
margin-top: 40px

position: absolute
top: 0
left: 0
height: none
width: none

div.centered
font-size: 110%
line-height: 150%
margin-left: auto
margin-right: auto
min-width: 500px
width: 65%

&#how
b
font-family: monospace
font-size: 110%

ul
list-style: none

span
color: $baseColor1
font-weight: bold

h1
color: $baseColor1

span
color: $baseColor2

a
color: #727070
font-style: italic
text-decoration: none

&:hover
@extend .t3

color: #575757

span#title
color: $baseColor1
font-weight: bold

div.person
font-size: 80%
overflow: hidden

&#top
margin-top: 40px

>div
$image-min-width: 100px

display: inline-block
height: 100%
margin-bottom: 40px
vertical-align: top

&.photo
margin-right: 40px
width: $image-min-width

img
display: block
height: $image-min-width
width: $image-min-width

&.bio
min-width: $centered-section-min-width - $image-min-width - 50px
width: 70%

h1
font-size: 130%
margin: 0

+ 63
- 0
static/sass/docs.sass View File

@@ -0,0 +1,63 @@
@import mixins
@import variables
@import logo

h1
color: $baseColor1

span
color: $baseColor2

h2, h3
color: $baseColor2 * 0.8

p
line-height: 1.8em

ul
list-style: none
margin-bottom: 2%

li
margin-bottom: 2%

a
color: #727070
font-style: italic
text-decoration: none

&:hover
@extend .t3

color: #575757

span
&.code
background-color: $baseColor3 * 1.2
font-family: monospace
padding: 5px

&.string
color: $baseColor1
font-family: monospace
font-size: 1.1em

&.title
color: $baseColor1
font-weight: bold

table.code-example
border-collapse: collapse
width: 100% !important

td.linenos
border: 1px solid $baseColor2
padding-left: 10px
width: 20px

td.code
padding-left: 10px

li#sec3 span#gasp
color: $baseColor2 * 0.8
font-style: italic

+ 19
- 0
static/sass/error404.sass View File

@@ -0,0 +1,19 @@
@import variables

div#message
color: $baseColor1
font-family: monospace
font-size: 700%
font-weight: normal
margin-top: 8%
text-align: center

span
&.light
color: $baseColor3

&.dark
color: $baseColor2

&.red
color: $baseColor1

+ 443
- 0
static/sass/index.sass View File

@@ -0,0 +1,443 @@
/*
Stylesheet for `templates/index.html`.
*/

@import mixins
@import variables
@import logo

$minSearchFieldsWidth: 490px
$codeWidth: 700px
$hiddenInfoWidth: 300px

.ui-datepicker
font-size: 70%

.ui-autocomplete
max-height: 30%
overflow-x: hidden
overflow-y: scroll
padding: 0px

>li.ui-menu-item a.ui-state-focus
@include vendor(transition, background-color 0.3s ease-out)

div#body
@extend .t3

&.faded
@include opaque(0.8)

div#hotkey-help
$width: 40%

background-color: white
border: 1px solid $baseColor3
left: 50% - $width / 2
min-width: 400px
padding: 35px
position: fixed
top: 30%
width: $width
z-index: 200

&.hidden
display: none

div
border-bottom: 1px solid $baseColor2
color: $baseColor1
font-size: 130%
padding-bottom: 8px
text-align: center

ul
list-style: none
margin-left: auto
margin-right: auto
position: relative
width: 300px

li
margin-bottom: 4px

span.hotkey
color: $baseColor1
font-family: monospace
font-size: 130%
font-weight: bold

span.seperator
color: $baseColor2

div#search-field
@extend .t2

bottom: 0
height: 50%
left: 0
margin: auto
margin-top: 15%
max-height: 100px
right: 0
position: absolute
z-index: 2
top: 0
width: 40%

form#search-bar
min-width: $minSearchFieldsWidth

input[type="text"], button
@extend .t3
@include vendor(box-sizing, border-box)

border: 1px solid $baseColor2
font-size: 110%
margin-bottom: 0px
padding: 6px

input[type="text"]#query
width: 100%

&:hover
border-color: $baseColor1

button#advanced-search
background-color: white
border: none
color: $baseColor2
font-size: 1.1em
font-style: italic

&:hover
color: $baseColor1
cursor: pointer

&.clicked
color: $baseColor1

&:focus
outline: 0

&.partly-visible
margin-top: 0%
position: absolute
width: 100%

#logo
position: absolute
top: -1%
left: 1%

span
font-size: 50%

form#search-bar
padding-top: 3%
margin-left: auto
margin-right: auto
min-width: 800px
width: 60%

input
@extend .t3

&#query
width: 80%

&:hover
border: 1px solid $baseColor1

button#advanced-search
margin-left: 30px

div#advanced-search
background-color: white
border: 1px solid $baseColor3
display: none
font-size: 96%
height: 400px
min-width: $minSearchFieldsWidth
padding-top: 0px
overflow-x: auto
overflow-y: hidden

#heading
color: $baseColor2
display: block
font-size: 120%
padding-left: 1%
padding-top: 1%
width: 100%

div
display: inline-block
font-size: 110%

&#col1
width: 25%

&#col2
width: 75%

button
border: none
color: white
float: right
font-size: 80%
font-weight: bold
margin-right: 1%
padding-left: 4%
padding-right: 4%

&:hover
cursor: pointer

&#add-group
background-color: #7FAFFC

&:hover
background-color: #609AF8

&#remove-group
background-color: #E74C4C

&:hover
background-color: #D63636

&#submit
background-color: #4ee76c

&:hover
background-color: #0FDD38

>div
@include vendor(box-sizing, border-box)

display: inline-block
float: left

#sidebar
padding-left: 1%
width: 25%

>ul
list-style: none
padding-left: 0
margin-bottom: 8%
margin-top: 2%

li
margin-bottom: 2%

label
user-select: none

div
@extend .t3

background-color: $lightGray
border: none
padding: 3%
width: 85%

&:hover, &.selectedInputField
@extend .t3

background-color: $baseColor2
color: white
cursor: pointer
width: 90%

input[type="checkbox"]
display: none

&:checked + label > div
@extend .selectedInputField

background-color: $baseColor1
color: white
width: 90%

#search-groups
margin-top: 1%
max-height: 87%
overflow-y: auto
width: 75%

.search-group
@include vendor(transition, all 0.6s ease-out)

background-color: $lightGray
padding: 1%
margin-bottom: 2%
width: 97%

>div
margin-bottom: 0.7%

>div.name
display: inline-block
font-size: 90%
width: 20%

>input[type=text]
display: inline-block
padding: 2px
width: 60%

>input[type=checkbox]
margin-left: 2%

&:checked + span
@extend .t2

color: green
font-weight: bold

&:hover
cursor: checkbox

span.regex
font-size: 80%

&:hover
cursor: pointer
background-color: #d6d6d6

&#selected
background-color: #CACACA

div#results
margin: 3% auto 0 auto
margin-left: auto
margin-right: auto
width: 80%

a
@extend .t3

text-decoration: none

&:hover
color: $baseColor1

div#error
font-size: 170%
margin-top: 22%
text-align: center

span
margin-right: 10px
font-size: 150%

&#s1
color: $baseColor1

&#s2
color: $baseColor2

&.disable-hover
pointer-events: none

div.result
@extend .t3

height: 200px
margin-bottom: 100%
pointer-events: auto

table
border-collapse: collapse
height: inherit

tr
@extend .t3
@include opaque(0.8)

height: inherit

&.cascade
@extend .t1
margin-bottom: 15%

&.display-all
table tr
@include opaque(1.0)

#tablecontainer
max-width: 70%
overflow: auto !important

div#display-info
font-size: 1.3em
padding: 5px 0px 5px 5px
width: 100%

#title
margin-right: 10px

#site
text-transform: capitalize

td#code
@include vendor(transition, width 0.2s ease-in-out)

width: $codeWidth
max-width: $codeWidth
height: inherit
padding: 0px

#tablecontainer
width: 100%
height: inherit
overflow: hidden
background-color: #49483e
position: relative
z-index: 1

table
border-collapse: collapse
font-family: monospace

.linenos
padding-left: 8px

pre
margin-top: 5px

.code pre
margin-top: 5px

.hll
background: #5B5A51

div#hidden-info
width: $hiddenInfoWidth
margin-left: -$hiddenInfoWidth
height: 100%
padding-top: 40px
font-size: 1.2em
line-height: 1.5em
position: relative
z-index: 0

@include vendor(transition, margin-left 0.2s ease-in-out)

.display-all &
margin-left: -$hiddenInfoWidth / 1.5
padding-left: 20px

span
color: $baseColor1
font-family: monospace
font-size: 1.1em
// float: right

div
display: block

#authors
a
font-family: monospace

+ 50
- 3
static/sass/main.sass View File

@@ -2,6 +2,53 @@
Global project stylesheet.
*/

// placeholder
p
font-size: 1.5em
@import mixins
@import variables

html, body
height: 100%
margin: 0
padding: 0
font-family: sans-serif

div#container
min-height: 100%
position: relative

div#header
padding: 10px

div#body
height: 100%
padding-bottom: 110px
padding-top: 4%

div#center
margin-left: auto
margin-right: auto
width: 75%

div#footer
background-color: $baseColor1
bottom: 0
height: 30px
padding-bottom: 5px
padding-top: 15px
position: fixed
text-align: center
width: 100%
z-index: 100

*
color: white

a
@extend .t3

font-size: 1.2em
margin-left: 5%
margin-right: 5%
text-decoration: none

&:hover
text-decoration: underline

+ 16
- 0
static/sitemap.xml View File

@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://bitshift.it/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>http://bitshift.it/about</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>http://bitshift.it/docs</loc>
<changefreq>monthly</changefreq>
</url>
</urlset>

+ 92
- 0
templates/about.html View File

@@ -0,0 +1,92 @@
= extends "layout.html"

= block title
about
= endblock

= block head
{{ assets.tag("lib/jquery.min.js") }}
{{ assets.tag("main.css") }}
{{ assets.tag("about.css") }}
= endblock

= block after_body
<div id="img-1" class="bg" speed="-1.25"></div>
<div id="img-2" class="bg" speed="1.4"></div>
<div id="img-3" class="bg" speed="1.4"></div>
<div id="img-4" class="bg" speed="1.4"></div>

<div id="top" class="section">
<div class="centered">
<h1><span>&raquo;</span> What</h1>
<span id="title">bitshift</span> is an <a href="https://github.com/earwig/bitshift">open-source</a>
online source-code search engine, developed by programmers, for programmers. The engine currently aggregates
publicly-available code from two online frameworks &#8211; <a href="https://github.com/">GitHub</a> and <a
href="https://bitbucket.org/">Bitbucket</a> &#8211; but has the necessary infrastructure to quickly incorporate
others, like <a href="http://stackoverflow.com/">StackOverflow</a> and
<a href="https://gitorious.org/">Gitorious</a>. <span id="title">bitshift</span> supports a robust query
language, which allows users to search for specific languages, files, dates of creation and last modifcation,
and symbols (function, class, and variable names), amongst other attributes.

Watch our introductory video:
<div id="wrap">
<iframe id="vimeo" src="//player.vimeo.com/video/98697078" width="100%" height="100%" frameborder="0"
webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
</div>
</div>
</div>

<div class="section">
<div id="how" class="centered">
<h1><span>&raquo;</span> How</h1>
<span id="title">bitshift</span> has an extensive back-end, roughly divided into three sections:
<ul>
<li><span>indexer</span> : finds and downloads code from online frameworks</li>
<li><span>parser</span> : parses newly crawled code, identifying its symbols</li>
<li><span>database</span> : interprets and compiles user searches into database queries</li>
</ul>
The engine was developed over the span of four months, and is primarily implemented in <b>Python</b>, but has
parsers in <b>Ruby</b>, <b>Java</b>, and a number of other languages.
</div>
</div>

<div class="section">
<div class="centered">
<h1><span>&raquo;</span> Who</h1>
<span id="title">bitshift</span> was developed by three seniors from New York City's Stuyvesant High School.
<div id="top" class="person">
<div class="photo">
<a href="https://github.com/breuckelen"><img src="img/about/bio1.jpg" alt="Benjamin Attal's photo."></a>
</div>
<div class="bio">
<h1><a href="https://github.com/breuckelen">Benjamin Attal</a></h1>
Benjamin Attal hacked together <span id="title">bitshift</span>'s parsers and is working on
data-visualization for bitshift's statistics page. He is a software developer and entrepreneur who enjoys
listening to and playing country music, as well as working with smart people.
</div>
</div>
<div class="person">
<div class="photo">
<a href="https://github.com/earwig"><img src="img/about/bio2.jpg" alt="Ben Kurtovic's photo."></a>
</div>
<div class="bio">
<h1><a href="https://github.com/earwig">Ben Kurtovic</a></h1> Ben Kurtovic designed <span
id="title">bitshift</span>’s database and acts as its server admin. In his free time, he edits Wikipedia
and invents new ways of confusing the hell out of people through source code obfuscation.
</div>
</div>
<div class="person">
<div class="photo">
<a href="https://github.com/sevko"><img src="img/about/bio3.jpg" alt="Severyn Kozak's photo."></a>
</div>
<div class="bio">
<h1><a href="https://github.com/sevko">Severyn Kozak</a></h1>
Severyn developed <span id="title">bitshift</span>'s crawlers and its front-end. He loves skiing, mathematics
that he doesn't understand, and the art of good software development.
</div>
</div>
</div>
</div>

{{ assets.tag("about.js") }}
= endblock

+ 282
- 0
templates/docs.html View File

@@ -0,0 +1,282 @@
= extends "layout.html"

= block title
docs
= endblock

= block head
{{ assets.tag("lib/highlight.css") }}

{{ assets.tag("docs.css") }}
= endblock

= block body
<a id="logo" href="/">
<div id="logo">
<span id="logo-bit">bit</span
><span id="logo-angle">&laquo;</span
><span id="logo-shift">shift</span>
</div>
</a>

<ul>
<li>
<h1><span>&raquo;</span> Usage</h1>
<p>
<span class="title">bitshift</span> is a search-engine optimized for
source code: beyond supporting searches with the full range of ASCII
symbols, the engine <em>understands</em> code, allowing users to query
for metadata, like time of creation/last modification, programming
language, and even symbols like function names and variables. Basic use
boils down to general and advanced searches.

<ul>
<li>
<h2>general search</h2>
<p>
To perform a "general search," simply place your cursor in the
search bar on our home page and begin entering text; when you
stop typing for a short period of time, we'll automatically
execute the query for you. As you scroll down the page, new
codelets, or results, will be seamlessly downloaded from our
server and appended to the end.
</p>
</li>

<li>
<h2>advanced search</h2>
<p>
General searches, though, are limited. To allow users to make the
best of our engine, we created an advanced search form that
allows the creation of complex queries with the following
specifiers:

<ul>
<li>
<h3>search fields</h3>
<ul id="search-fields">
<li>
<span class="code">languages</span> : The programming
languages to search for.
</li>
<li>
<span class="code">authors</span> : Search for code
written/modified by a specific person.
</li>
<li>
<span class="code">date last modified</span> : Search for
code last modified on a specific date
(<span class="code">mm/dd/yy</span> format).
</li>
<li>
<span class="code">date created</span> : Search for code
created on a specific date
(<span class="code">mm/dd/yy</span> format).
</li>
<li>
<span class="code">symbols</span> : Search for specific
symbols.
</li>
<li>
<span class="code">functions</span> : Search for
functions with specific names.
</li>
<li>
<span class="code">classes</span> : Search for classes
with specific names.
</li>
<li>
<span class="code">variables</span> : Search for
variables with specific names.
</li>
</ul>

<p>
Each of the search fields allows for numerous values; just
separate them with spaces. If you'd like to search for a
multi-word, space-delimited string, on the other hand,
enclose it in double quotes.

A query for <span class="code">foo bar</span> will search
for occurrences of both <span id="string">"foo"</span> and
<span class="string">"bar"</span>, while
<span class="code">"foo bar"</span> will search for
occurrences of <span class="string">"foo bar"</span>.
</p>
</li>

<li>
<h3>search groups</h3>
<p>
Search groups facilitate even more robust queries: they're
like a bunch of individual searches grouped into one. A
user searching for occurrenes of symbol
<span class="string">"curses"</span> in the language
<span class="string">"Python"</span>, and
<span class="string">"ncurses"</span> in
<span id="string">"C"</span>, won't get away with:
<span class="code">"symbols:curses ncurses"</span> and
<span class="code">"languages:Python C"</span>. The engine
might return results <span id="string">"curses"</span> in
<span class="string">"C"</span> and
<span class="string">"ncurses"</span> in
<span class="string">"Python"</span>!

To work around that, you can use two search groups: one for
<span class="string">"curses"</span> in
<span class="string">"Python"</span>, and another for
<span class="string">"curses"</span> in
<span id="string">"C"</span>.
<span class="title">bitshift</span> will return the union
of both sets of search results.
</p>
</li>
</ul>
</p>
</li>
</ul>
</p>
</li>

<li>
<h1><span>&raquo;</span> API</h1>
<p>
<span class="title">bitshift</span> provides an API through GET
requests to
<a href="http://bitshift.it/search.json"><span class="code">/search.json</span></a>.
</p>
<h2>parameters</h2>
<ul>
<li>
<span class="code">q</span> : The search query, as entered into the
search bar.
</li>
<li>
<span class="code">p</span> : The result page to return. Defaults to
<span class="code">1</span>. Each page contains ten results, so this
effectively offsets the search by
<span class="code">10 * (p - 1)</span> codelets.
</li>
<li>
<span class="code">hl</span> : Whether to return code as
<a href="http://pygments.org/">pygments</a>-highlighted HTML or as
plain source. Defaults to <span class="code">false</span>.
</li>
</ul>
<h2>output</h2>
<p>
<span class="code">/search.json</span> returns a JSON-encoded
dictionary. If there was an error, it will contain a single key,
<span class="code">"error"</span>, whose value will contain a
human-readable description of the error. Otherwise, there will be two
keys: <span class="code">"count"</span>, storing the number of results,
and <span class="code">"results"</span>, storing a list of codelets.
Each codelet is a dictionary with the following key–value pairs:
</p>
<ul>
<li>
<span class="code">name</span> : The name of the codelet.
</li>
<li>
<span class="code">code</span> : The actual source code if
<span class="code">hl</span> was not given or was
<span class="code">false</span>; HTML code otherwise.
</li>
<li>
<span class="code">lang</span> : The language of the code.
</li>
<li>
<span class="code">authors</span> : A list of authors. Each author is
a list of two items: their name, and URL (or
<span class="code">null</span> if none is known).
</li>
<li>
<span class="code">url</span> : The URL of the page where the code
was crawled from.
</li>
<li>
<span class="code">created</span> : The date the code was created, as
a
<a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>-formatted
string (e.g. <span class="code">"2014-06-01T12:41:28"</span>).
</li>
<li>
<span class="code">modified</span> : The date the code was last
modified, as a
<a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>-formatted
string (e.g. <span class="code">"2014-06-01T12:41:28"</span>).
</li>
<li>
<span class="code">origin</span> : A list of two items: the
originating site's name (e.g. <span class="code">"GitHub"</span>) and
URL (e.g. <span class="code">"https://github.com"</span>).
</li>
</ul>
<h2>example</h2>
<p>
The following example Python 2 code searches for a given Python
function definition and prints the URL of the first result:
</p>
<table class="highlighttable code-example">
<tr>
<td class="linenos">
<div class="linenodiv">
<pre> 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19</pre>
</div>
</td>
<td class="code">
<div class="highlight">
<pre><span class="c">#!/usr/bin/env python</span>

<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span>
<span class="kn">from</span> <span class="nn">sys</span> <span class="kn">import</span> <span class="n">argv</span>
<span class="kn">from</span> <span class="nn">urllib</span> <span class="kn">import</span> <span class="n">urlencode</span>
<span class="kn">from</span> <span class="nn">urllib2</span> <span class="kn">import</span> <span class="n">urlopen</span>

<span class="k">def</span> <span class="nf">get_function</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s">&quot;q&quot;</span><span class="p">:</span> <span class="s">&quot;lang:python and func:def:</span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="n">name</span><span class="p">}</span>
<span class="n">request</span> <span class="o">=</span> <span class="n">urlopen</span><span class="p">(</span><span class="s">&quot;http://bitshift.it/search.json?&quot;</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
<span class="n">res</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">request</span><span class="o">.</span><span class="n">read</span><span class="p">())[</span><span class="s">&quot;results&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">res</span><span class="p">:</span>
<span class="k">print</span> <span class="s">&quot;</span><span class="si">%s</span><span class="s">: </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">res</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s">&quot;url&quot;</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">print</span> <span class="s">&quot;</span><span class="si">%s</span><span class="s"> not found.&quot;</span> <span class="o">%</span> <span class="n">name</span>

<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">&quot;__main__&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">argv</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">get_function</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span></pre>
</div>
</td>
</tr>
</table>
</li>

<li id="sec3">
<h1><span>&raquo;</span> Get Involved</h1>
<p>
<span class="title">bitshift</span> is <span id="gasp">(gasp)</span>
open-source! The project is hosted on
<a href="https://github.com/earwig/bitshift">GitHub</a>; feel free to
file an issue or submit a pull request.
</p>
</li>
</ul>
= endblock

+ 26
- 0
templates/error404.html View File

@@ -0,0 +1,26 @@
= extends "layout.html"

= block title
404
= endblock

= block head
{{ assets.tag("error404.css") }}
= endblock

= block body
<div id="message">
{{ assets.syntax_highlight([
'puts("404");',
'printf("%d\n", 404);',
'puts 404',
'System.out.println("404")',
'print 404',
'console.log("404")',
'echo 404',
'std::cout << "404\\n"',
'(println "404")',
'say "404!";'
] | random) | safe }}
</div>
= endblock

+ 114
- 2
templates/index.html View File

@@ -1,9 +1,121 @@
= extends "layout.html"

= block title
Home
home
= endblock

= block head
{{ assets.tag("lib/jqueryui.custom.min.css") }}
{{ assets.tag("lib/jquery.min.js") }}
{{ assets.tag("lib/jquery-ui.min.js") }}
{{ assets.tag("lib/highlight.css") }}

{{ assets.tag("index.css") }}

<script>
AUTOCOMPLETE_LANGUAGES = {{ autocomplete_languages | safe }};
</script>
= endblock

= block body
<p>Hello, world.</p>
<div id="search-field">
<a id="logo" href="/">
<div id="logo">
<span id="logo-bit">bit</span
><span id="logo-angle">&laquo;</span
><span id="logo-shift">shift</span>
</div>
</a>

<form id="search-bar">
<input id="query" type="text" name="query"
><button id="advanced-search" title="advanced search" type="button">
Adv. Search
</button>

<div id="advanced-search">
<div id="heading">
<div id="col1">Fields</div
><div id="col2">
Search groups
<button id="submit">
<div>Search</div>
</button>
<button id="add-group">
<div><span>+</span> Add</div>
</button>
<button id="remove-group">
<div><span>-</span> Remove</div>
</button>
</div>
</div>

<div id="sidebar">
<ul>
<li>
<input type="checkbox" id="language" checked="true">
<label for="language"><div>languages</div></label>
</li>
<li>
<input type="checkbox" id="author">
<label for="author"><div>authors</div></label>
</li>
<li>
<input type="checkbox" id="date-last-modified">
<label for="date-last-modified"><div>date last modified</div></label>
</li>
<li>
<input type="checkbox" id="date-created">
<label for="date-created"><div>date created</div></label>
</li>
<li>
<input type="checkbox" id="symbol">
<label for="symbol"><div>symbols</div></label>
</li>
<li>
<input type="checkbox" id="function">
<label for="function"><div>functions</div></label>
</li>
<li>
<input type="checkbox" id="class">
<label for="class"><div>classes</div></label>
</li>
<li>
<input type="checkbox" id="variable">
<label for="variable"><div>variables</div></label>
</li>
</ul>
</div>

<div id="search-groups">
<div class="search-group" id="selected">
<div id="language">
<div class="name">languages</div
><input id="autocomplete" class="language" name="language" type="text"
><input type="checkbox" name="regex"
><span class="regex">Regex</span>
</div>
</div>
</div>
</div>
</form>
</div>

<div id="results"></div>

{{ assets.tag("index.js") }}
{{ assets.tag("index.advanced-search-form.js") }}
= endblock

= block after_body
<div id="hotkey-help" class="hidden">
<div>Hotkeys</div>
<ul>
<li><span class="hotkey">k</span> <span class="seperator">:</span> move window up to the previous result</li>
<li><span class="hotkey">j</span> <span class="seperator">:</span> move window down to the next result</li>
<li><span class="hotkey">h</span> <span class="seperator">:</span> move to the previous symbol match</li>
<li><span class="hotkey">l</span> <span class="seperator">:</span> move to the next symbol match</li>
<li><span class="hotkey">?</span> <span class="seperator">:</span> toggle help</li>
</ul>
</div>
= endblock

+ 28
- 8
templates/layout.html View File

@@ -4,24 +4,44 @@
<html>
<head>
<title>
= block title
= endblock
bitshift &laquo;
= filter lower
= block title
= endblock
= endfilter
</title>

<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/>
<meta name="description" content="bitshift is an online code snippet
exchange."/>
<meta name="keywords" content="code snippet exchange golf programming
software community"/>
<meta name="description" content="bitshift is a source code search engine."/>
<meta name="keywords" content="source code language search engine"/>
<meta name="author" content="Benjamin Attal Ben Kurtovic Severyn Kozak"/>

{{ assets.tag("main.css") }}
{{ assets.tag("main.js") }}

= block head
= endblock
</head>
<body>
= block body
= endblock
<div id="container">
<div id="header">
</div>

<div id="body">
<div id="center">
= block body
= endblock
</div>
</div>

= block after_body
= endblock

<div id="footer">
<a href="/">home</a>
<a href="/about">about</a>
<a href="/docs">docs</a>
</div>
</div>
</body>
</html>

+ 0
- 0
View File


+ 19
- 0
test/find_function_def.py View File

@@ -0,0 +1,19 @@
#!/usr/bin/env python

from json import loads
from sys import argv
from urllib import urlencode
from urllib2 import urlopen

def get_function(name):
params = {"q": "lang:python and func:def:%s" % name}
request = urlopen("http://bitshift.it/search.json?" + urlencode(params))
res = loads(request.read())["results"]
if res:
print "%s: %s" % (name, res[0]["url"])
else:
print "%s not found." % name

if __name__ == "__main__":
if len(argv) == 2:
get_function(argv[1])

+ 56
- 0
test/parser_test.py View File

@@ -0,0 +1,56 @@
import socket, sys, struct

file_name = 'resources/<name>.c'
server_socket_number = 5001
recv_size = 8192

if __name__ == '__main__':
if len(sys.argv) == 1:
print "Please input a parser to test."

elif len(sys.argv) > 2:
print "Too many arguments."

else:
if sys.argv[1] == 'c':
pass

elif sys.argv[1] == 'java':
file_name = "resources/Matrix.java"
server_socket_number = 5002

elif sys.argv[1] == 'ruby':
file_name = "resources/parser.rb"
server_socket_number = 5065

server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.connect(("localhost", server_socket_number))

with open(file_name, "r") as source_file:
source = source_file.read()
server_socket.send("%d\n%s" % (len(source), source));

total_data = []; size_data = cur_data = ''
total_size = 0; size = sys.maxint

while total_size < size:
cur_data = server_socket.recv(recv_size)

if not total_data:
if len(size_data) > 4:
size_data += cur_data
size = struct.unpack('>i', size_data[:4])[0]
recv_size = size
if recv_size > sys.maxint: recv_size = sys.maxint
total_data.append(size_data[4:])
else:
size_data += cur_data

else:
total_data.append(cur_data)

total_size = sum([len(s) for s in total_data])


server_socket.close()
print ''.join(total_data);

+ 218
- 0
test/resources/Matrix.java View File

@@ -0,0 +1,218 @@
package battlechap;

import java.io.PrintStream;

public class Matrix {
private Object[][] _datmatrix;

public Matrix(int paramInt){
this._datmatrix = new Object[paramInt][paramInt];
}

public int size() {
return this._datmatrix.length;
}

public Object get(int paramInt1, int paramInt2) {
return this._datmatrix[paramInt1][paramInt2];
}

public boolean isEmpty(int paramInt1, int paramInt2) {
return this._datmatrix[paramInt1][paramInt2] == null;
}

public boolean equals(Object paramObject) {
boolean bool = true;
if ((paramObject instanceof Matrix)) {
Matrix localMatrix = (Matrix)paramObject;
if (localMatrix.size() == size()) {
for (int i = 0; i < size(); i++) {
for (int j = 0; j < size(); j++) {
if (!localMatrix.get(i, j).equals(get(i, j))) {
bool = false;
break;
}
}
if (!bool)
break;
}
}
else
bool = false;
}
else
{
bool = false;
}
return bool;
}

public Object set(int paramInt1, int paramInt2, Object paramObject) {
Object localObject = this._datmatrix[paramInt1][paramInt2];
this._datmatrix[paramInt1][paramInt2] = paramObject;
return localObject;
}

public void transpose() {
int i = 0;
for (int j = 0; j < size(); j++) {
for (int k = i; k < size(); k++) {
set(j, k, set(k, j, get(j, k)));
}
i++;
}
}

public static void swapRows(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject[paramInt1].length; i++) {
Object localObject = paramArrayOfObject[paramInt1][i];
paramArrayOfObject[paramInt1][i] = paramArrayOfObject[paramInt2][i];
paramArrayOfObject[paramInt2][i] = localObject;
}
}

public static void swapCols(int paramInt1, int paramInt2, Object[][] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject.length; i++) {
Object localObject = paramArrayOfObject[i][paramInt1];
paramArrayOfObject[i][paramInt1] = paramArrayOfObject[i][paramInt2];
paramArrayOfObject[i][paramInt2] = localObject;
}
}

public Object[] getRow(int paramInt) {
Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length];
for (int i = 0; i < arrayOfObject.length; i++) {
arrayOfObject[i] = this._datmatrix[paramInt][i];
}
return arrayOfObject;
}

public Object[] getCol(int paramInt) {
Object[] arrayOfObject = new Object[this._datmatrix[paramInt].length];
for (int i = 0; i < arrayOfObject.length; i++) {
arrayOfObject[i] = this._datmatrix[i][paramInt];
}
return arrayOfObject;
}

public Object[] setRow(int paramInt, Object[] paramArrayOfObject) {
Object[] arrayOfObject = getRow(paramInt);

for (int i = 0; i < size(); i++) {
set(paramInt, i, paramArrayOfObject[i]);
}

return arrayOfObject;
}

public Object[] setCol(int paramInt, Object[] paramArrayOfObject) {
Object[] arrayOfObject = getCol(paramInt);

for (int i = 0; i < size(); i++) {
set(i, paramInt, paramArrayOfObject[i]);
}

return arrayOfObject;
}

public String toString()
{
String str1 = "";
for (int i = 0; i < this._datmatrix.length; i++) {
if (i < 9)
str1 = str1 + (i + 1) + ": ";
else
str1 = str1 + (i + 1) + ":";
for (int j = 0; j < this._datmatrix[i].length; j++) {
int k = (this._datmatrix[i][j] + "").length();
String str2 = " ".substring(k);
str1 = str1 + this._datmatrix[i][j] + str2;
}
str1 = str1 + "\n";
}
return str1;
}

public static void print(Object[][] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject.length; i++) {
for (int j = 0; j < paramArrayOfObject[i].length; j++) {
int k = (paramArrayOfObject[i][j] + "").length();
String str = " ".substring(k);
System.out.print(paramArrayOfObject[i][j] + str);
}
System.out.print("\n");
}
}

public static void printArray(Object[] paramArrayOfObject) {
for (int i = 0; i < paramArrayOfObject.length; i++) {
int j = (paramArrayOfObject[i] + "").length();
String str = " ".substring(j);
System.out.print(paramArrayOfObject[i] + str);
}
System.out.print("\n");
}

public static void main(String[] paramArrayOfString) {
Matrix localMatrix1 = new Matrix(5);
Matrix localMatrix2 = new Matrix(5);
for (int i = 0; i < localMatrix1.size(); i++) {
for (int j = 0; j < localMatrix1.size(); j++) {
Integer localInteger1 = new Integer((int)(Math.random() * 20.0D));
localMatrix1.set(i, j, localInteger1);
localMatrix2.set(i, j, localInteger1);
}
}

System.out.println("\nDemonstrating equals method (should be true)\t" + localMatrix2.equals(localMatrix1) + "\n");

System.out.println("Demonstrating get method\n" + localMatrix1.get(0, 0) + "\n");
System.out.println("Demonstrating is empty method\n" + localMatrix1.isEmpty(1, 0) + "\n");
System.out.println("Demonstrating size method \n" + localMatrix1.size() + "\n");
System.out.println("Demonstrating toString method\n" + localMatrix1 + "\n");
localMatrix1.transpose();
System.out.println("Blop has been transposed\n" + localMatrix1 + "\n");

Object[][] arrayOfObject = new Object[4][4];
for (int j = 0; j < arrayOfObject.length; j++) {
for (int k = 0; k < arrayOfObject[j].length; k++) {
Integer localInteger2 = new Integer((int)(Math.random() * 20.0D));
arrayOfObject[j][k] = localInteger2;
}
}
System.out.println("\n\n**Swapping Rows Demo**");
print(arrayOfObject);
System.out.println("\nRows 1 and 2 have been Swapped \n");
swapRows(1, 2, arrayOfObject);
print(arrayOfObject);

System.out.println("\n**Swapping Columns Demo**");
print(arrayOfObject);
System.out.println("\n\nColumns 1 and 2 have been Swapped \n");
swapCols(1, 2, arrayOfObject);
print(arrayOfObject);

System.out.println("\n**Getting rows demo (from blop)**");
System.out.println(localMatrix1);
System.out.println("\nGetting row 1\n");
printArray(localMatrix1.getRow(1));

System.out.println("\n**Getting cols demo (from blop)**");
System.out.println(localMatrix1);
System.out.println("\nGetting col 1\n");
printArray(localMatrix1.getCol(1));

System.out.println("\n**Demonstrating set row method**");
System.out.println(localMatrix1);
System.out.println("\nSwitching row 1 of blop to 1st column of blop\n");
localMatrix1.setRow(1, localMatrix1.getCol(1));
System.out.println(localMatrix1 + "\n");

System.out.println("\n**Demonstrating set col method**");
System.out.println(localMatrix1);
System.out.println("\nSwitching col 1 of blop to 2nd row of blop\n");
localMatrix1.setCol(1, localMatrix1.getRow(2));
System.out.println(localMatrix1 + "\n");
}
}


+ 40
- 0
test/resources/app.py View File

@@ -0,0 +1,40 @@
"""
Module to contain all the project's Flask server plumbing.
"""

from flask import Flask
from flask import render_template, session

from bitshift import assets
# from bitshift.database import Database
# from bitshift.query import parse_query

app = Flask(__name__)
app.config.from_object("bitshift.config")

app_env = app.jinja_env
app_env.line_statement_prefix = "="
app_env.globals.update(assets=assets)

# database = Database()

@app.route("/")
def index():
return render_template("index.html")

@app.route("/search/<query>")
def search(query):
# tree = parse_query(query)
# database.search(tree)
pass

@app.route("/about")
def about():
return render_template("about.html")

@app.route("/developers")
def developers():
return render_template("developers.html")

if __name__ == "__main__":
app.run(debug=True)

+ 126
- 0
test/resources/parser.rb View File

@@ -0,0 +1,126 @@
require 'socket'
require 'ruby_parser'
require 'sexp_processor'

module Bitshift
class Parser
def initialize(source)
@source = source
end

def parse
parser = RubyParser.new
tree = parser.parse(@source)
puts tree.inspect
offset = tree.line - 1
processor = NodeVisitor.new offset
processor.process tree
return processor.symbols
end
end

class NodeVisitor < SexpProcessor
attr_accessor :symbols
attr_accessor :offset

def initialize(offset)
super()
@require_empty = false
@offset = offset

module_hash = Hash.new {|hash, key| hash[key] = Hash.new}
class_hash = module_hash.clone
function_hash = Hash.new {|hash, key| hash[key] = { calls: [] } }
var_hash = Hash.new {|hash, key| hash[key] = [] }

@symbols = {
modules: module_hash,
classes: class_hash,
functions: function_hash,
vars: var_hash
}
end

def block_position(exp)
pos = Hash.new
end_ln = (start_ln = exp.line - offset)
cur_exp = exp

while cur_exp.is_a? Sexp
end_ln = cur_exp.line - offset
cur_exp = cur_exp.last
break if cur_exp == nil
end

pos[:coord] = {
start_ln: start_ln,
end_ln: end_ln }
return pos
end

def statement_position(exp)
pos = Hash.new
end_ln = start_ln = exp.line - offset

pos[:coord] = {
start_ln: start_ln,
end_ln: end_ln }
return pos
end

def process_module(exp)
pos = block_position exp
exp.shift
name = exp.shift
symbols[:modules][name] = pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_class(exp)
pos = block_position exp
exp.shift
name = exp.shift
symbols[:classes][name] = pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_defn(exp)
pos = block_position exp
exp.shift
name = exp.shift
symbols[:functions][name][:declaration] = pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_call(exp)
pos = statement_position exp
exp.shift
exp.shift
name = exp.shift
symbols[:functions][name][:calls] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_iasgn(exp)
pos = statement_position exp
exp.shift
name = exp.shift
symbols[:vars][name] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end

def process_lasgn(exp)
pos = statement_position exp
exp.shift
name = exp.shift
symbols[:vars][name] << pos
exp.each_sexp {|s| process(s)}
return exp.clear
end
end
end

+ 76
- 0
test/test_query_parser.py View File

@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
import unittest

from bitshift.query import parse_query

TESTS = [
# Text
("test", "Tree(Text(String(u'test')))"),
("re:test", "Tree(Text(Regex(u'test')))"),

# Language
("language:python", "Tree(Language(Python))"),
("language:py", "Tree(Language(Python))"),
("l:r:r..y", "Tree(Language(Ruby))"),
(r'"lang:re:python|^c$"',
"Tree(BinaryOp(Language(C), OR, Language(Python)))"),

# Author
('"author:Ben Kurtovic"', "Tree(Author(String(u'Ben Kurtovic')))"),
(r"'a:re:b.*?\sk.*?'", r"Tree(Author(Regex(u'b.*?\\sk.*?')))"),

# Date
("'create:before:Jan 1, 2014'",
"Tree(Date(CREATE, BEFORE, 2014-01-01 00:00:00))"),
("'modify:after:2010-05-09 10:11:12'",
"Tree(Date(MODIFY, AFTER, 2010-05-09 10:11:12))"),

# Symbol
("sym:foobar", "Tree(Symbol(ALL, ALL, String(u'foobar')))"),
("func:foo_bar", "Tree(Symbol(ALL, FUNCTION, String(u'foo_bar')))"),
("func:foo_bar()", "Tree(Symbol(ALL, FUNCTION, String(u'foo_bar')))"),
("class:FooBar", "Tree(Symbol(ALL, CLASS, String(u'FooBar')))"),
("var:foobar", "Tree(Symbol(ALL, VARIABLE, String(u'foobar')))"),
("var:r:foobar", "Tree(Symbol(ALL, VARIABLE, Regex(u'foobar')))"),

# Composition
("(a and b) or (c and d)", ", ".join([
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND",
"Text(String(u'd')))))"])),
("a and b or c and d", ", ".join([
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "AND",
"Text(String(u'd')))))"])),
("a and b or c or d", ", ".join([
"Tree(BinaryOp(BinaryOp(Text(String(u'a'))", "AND",
"Text(String(u'b')))", "OR", "BinaryOp(Text(String(u'c'))", "OR",
"Text(String(u'd')))))"])),
("a and (b or c or d)", ", ".join([
"Tree(BinaryOp(Text(String(u'a'))", "AND",
"BinaryOp(Text(String(u'b'))", "OR", "BinaryOp(Text(String(u'c'))", "OR",
"Text(String(u'd'))))))"])),
("a not b", ", ".join([
"Tree(BinaryOp(Text(String(u'a'))", "AND", "UnaryOp(NOT",
"Text(String(u'b')))))"])),

# Unicode, Escaping
(r'lang:py "author:fo\\o \"bar\" baz\\"', ", ".join([
"Tree(BinaryOp(Language(Python)", "AND",
"Author(String(u'fo\\\\o \"bar\" baz\\\\'))))"])),
('"author:Ben Kurtović"', "Tree(Author(String(u'Ben Kurtovi\\u0107')))")
]

class TestQueryParser(unittest.TestCase):
"""Unit tests for the query parser in :py:mod:`bitshift.query`."""

def test_parse(self):
"""test full query parsing"""
for test, expected in TESTS:
self.assertEqual(expected, parse_query(test).serialize())


if __name__ == "__main__":
unittest.main(verbosity=2)

Loading…
Cancel
Save