@@ -52,3 +52,4 @@ target | |||
# Ctags | |||
*/tags | |||
logs | |||
Gemfile.lock |
@@ -8,6 +8,7 @@ import logging | |||
import logging.handlers | |||
import os | |||
import Queue | |||
import sys | |||
import time | |||
from threading import Event | |||
@@ -37,9 +38,9 @@ def crawl(): | |||
crawler.BitbucketCrawler(repo_clone_queue, run_event), | |||
indexer.GitIndexer(repo_clone_queue, run_event)] | |||
parse_servers = start_parse_servers() | |||
for thread in threads: | |||
thread.start() | |||
parse_servers = start_parse_servers() | |||
try: | |||
while 1: | |||
@@ -77,5 +78,4 @@ def _configure_logging(): | |||
root_logger.setLevel(logging.NOTSET) | |||
if __name__ == "__main__": | |||
_configure_logging() | |||
crawl() |
@@ -86,8 +86,8 @@ class GitHubCrawler(threading.Thread): | |||
time.sleep(1) | |||
self.clone_queue.put(indexer.GitRepository( | |||
repo["html_url"], repo["full_name"].replace("/", ""), | |||
"GitHub", repo_ranks[repo["full_name"]])) | |||
repo["html_url"], repo["full_name"], "GitHub", | |||
repo_ranks[repo["full_name"]])) | |||
if int(resp.headers["x-ratelimit-remaining"]) == 0: | |||
time.sleep(int(resp.headers["x-ratelimit-reset"]) - | |||
@@ -1,283 +0,0 @@ | |||
{ | |||
"_comment" : "A list of programming languages supported by `bitshift`.", | |||
"languages" : [ | |||
"Debian Sourcelist", | |||
"Delphi", | |||
"JavaScript+Mako", | |||
"Brainfuck", | |||
"Ceylon", | |||
"JavaScript+Django/Jinja", | |||
"HTML+Evoque", | |||
"NumPy", | |||
"Modula-2", | |||
"LiveScript", | |||
"Nimrod", | |||
"Bash", | |||
"HTML+Django/Jinja", | |||
"CSS+PHP", | |||
"XML+Lasso", | |||
"VimL", | |||
"CSS+Genshi Text", | |||
"Fancy", | |||
"Coldfusion HTML", | |||
"cfstatement", | |||
"Scalate Server Page", | |||
"Smarty", | |||
"XML+Evoque", | |||
"haXe", | |||
"PowerShell", | |||
"Tea", | |||
"HTML+Cheetah", | |||
"Mason", | |||
"Django/Jinja", | |||
"JAGS", | |||
"ApacheConf", | |||
"DTD", | |||
"Lighttpd configuration file", | |||
"Java", | |||
"JavaScript+Genshi Text", | |||
"Scheme", | |||
"Nemerle", | |||
"RHTML", | |||
"Ragel in Java Host", | |||
"Darcs Patch", | |||
"Puppet", | |||
"Octave", | |||
"CoffeeScript", | |||
"Ragel in D Host", | |||
"Scilab", | |||
"Monkey", | |||
"HTML+Myghty", | |||
"CSS", | |||
"JavaScript+Smarty", | |||
"Io", | |||
"COBOLFree", | |||
"Asymptote", | |||
"vhdl", | |||
"CSS+Ruby", | |||
"Fortran", | |||
"d-objdump", | |||
"MySQL", | |||
"REBOL", | |||
"C++", | |||
"ERB", | |||
"CBM BASIC V2", | |||
"Befunge", | |||
"Julia", | |||
"MoonScript", | |||
"Ruby", | |||
"XML+Smarty", | |||
"Dylan", | |||
"Groovy", | |||
"MoinMoin/Trac Wiki markup", | |||
"autohotkey", | |||
"C", | |||
"HTML", | |||
"Felix", | |||
"CMake", | |||
"NSIS", | |||
"SourcePawn", | |||
"Mako", | |||
"VGL", | |||
"Velocity", | |||
"Koka", | |||
"CUDA", | |||
"Gnuplot", | |||
"IRC logs", | |||
"Prolog", | |||
"Python", | |||
"CSS+Django/Jinja", | |||
"verilog", | |||
"Smalltalk", | |||
"JavaScript+Myghty", | |||
"YAML", | |||
"Julia console", | |||
"ANTLR With ActionScript Target", | |||
"XML+Mako", | |||
"XSLT", | |||
"UrbiScript", | |||
"Scaml", | |||
"S", | |||
"DylanLID", | |||
"MAQL", | |||
"sqlite3con", | |||
"Boo", | |||
"OCaml", | |||
"eC", | |||
"ActionScript", | |||
"VB.net", | |||
"SquidConf", | |||
"XQuery", | |||
"D", | |||
"Fantom", | |||
"Gettext Catalog", | |||
"Logos", | |||
"Lasso", | |||
"SCSS", | |||
"BBCode", | |||
"Haml", | |||
"FoxPro", | |||
"MuPAD", | |||
"XML+Ruby", | |||
"Dart", | |||
"IDL", | |||
"dg", | |||
"Evoque", | |||
"Jade", | |||
"c-objdump", | |||
"Kconfig", | |||
"Java Server Page", | |||
"reg", | |||
"ABAP", | |||
"XML+Velocity", | |||
"JavaScript+Cheetah", | |||
"HTML+Mako", | |||
"Ragel in Ruby Host", | |||
"RobotFramework", | |||
"Protocol Buffer", | |||
"CFEngine3", | |||
"Ragel", | |||
"GLSL", | |||
"COBOL", | |||
"TypeScript", | |||
"Ada", | |||
"PostgreSQL SQL dialect", | |||
"Xtend", | |||
"Logtalk", | |||
"objdump", | |||
"CSS+Mako", | |||
"ca65", | |||
"Objective-C++", | |||
"Gherkin", | |||
"HTML+PHP", | |||
"Makefile", | |||
"PostScript", | |||
"Hxml", | |||
"Kotlin", | |||
"PL/pgSQL", | |||
"Vala", | |||
"Haskell", | |||
"Bro", | |||
"Lua", | |||
"POVRay", | |||
"Sass", | |||
"ANTLR With Java Target", | |||
"Tcl", | |||
"ANTLR With ObjectiveC Target", | |||
"JavaScript+Ruby", | |||
"Racket", | |||
"AspectJ", | |||
"Base Makefile", | |||
"ANTLR With Python Target", | |||
"cpp-objdump", | |||
"Genshi Text", | |||
"Ioke", | |||
"PyPy Log", | |||
"Croc", | |||
"Objective-J", | |||
"GAS", | |||
"Batchfile", | |||
"Snobol", | |||
"XML", | |||
"ANTLR", | |||
"Opa", | |||
"XML+Cheetah", | |||
"Go", | |||
"Diff", | |||
"MiniD", | |||
"Cython", | |||
"Ragel in C Host", | |||
"Erlang", | |||
"Debian Control file", | |||
"aspx-vb", | |||
"BUGS", | |||
"Ragel in CPP Host", | |||
"aspx-cs", | |||
"Properties", | |||
"Groff", | |||
"Clojure", | |||
"Modelica", | |||
"QML", | |||
"JavaScript+Lasso", | |||
"ANTLR With Perl Target", | |||
"Genshi", | |||
"BlitzMax", | |||
"Treetop", | |||
"Matlab", | |||
"Myghty", | |||
"HTML+Genshi", | |||
"Duel", | |||
"Perl", | |||
"FSharp", | |||
"reStructuredText", | |||
"NewLisp", | |||
"Scala", | |||
"CSS+Lasso", | |||
"XML+PHP", | |||
"Stan", | |||
"INI", | |||
"MOOCode", | |||
"Shell Session", | |||
"RPMSpec", | |||
"Newspeak", | |||
"Bash Session", | |||
"Coq", | |||
"Raw token data", | |||
"Tcsh", | |||
"HTML+Lasso", | |||
"C#", | |||
"Gosu Template", | |||
"RConsole", | |||
"MXML", | |||
"TeX", | |||
"CSS+Smarty", | |||
"Text only", | |||
"ANTLR With C# Target", | |||
"OpenEdge ABL", | |||
"Cheetah", | |||
"Smali", | |||
"CSS+Myghty", | |||
"Rd", | |||
"LLVM", | |||
"Standard ML", | |||
"Elixir", | |||
"Nginx configuration file", | |||
"GoodData-CL", | |||
"AppleScript", | |||
"HTML+Smarty", | |||
"Objective-C", | |||
"JavaScript", | |||
"Rust", | |||
"Common Lisp", | |||
"Embedded Ragel", | |||
"ActionScript 3", | |||
"systemverilog", | |||
"Literate Haskell", | |||
"PHP", | |||
"ANTLR With CPP Target", | |||
"Gosu", | |||
"Hybris", | |||
"JavaScript+PHP", | |||
"Factor", | |||
"HTML+Velocity", | |||
"Mscgen", | |||
"Ooc", | |||
"SQL", | |||
"HTTP", | |||
"ECL", | |||
"Redcode", | |||
"Ragel in Objective C Host", | |||
"XML+Django/Jinja", | |||
"Awk", | |||
"JSON", | |||
"NASM", | |||
"ANTLR With Ruby Target", | |||
"XML+Myghty", | |||
"AutoIt", | |||
"Mako", | |||
"CSS+Mako", | |||
"HTML+Mako", | |||
"XML+Mako", | |||
"JavaScript+Mako" | |||
] | |||
} |
@@ -1,5 +1,21 @@ | |||
import json | |||
from os import path | |||
with open(path.join(path.dirname(__file__), "languages.json")) as lang_json: | |||
LANGS = [lang for lang in json.load(lang_json)["languages"]] | |||
import yaml | |||
__all__ = ["LANGS", "LANGS_ALL"] | |||
def _load_langs(): | |||
filename = path.join(path.dirname(__file__), "languages.yml") | |||
with open(filename) as fp: | |||
data = yaml.load(fp)["languages"] | |||
langs = [it.keys()[0] if isinstance(it, dict) else it for it in data] | |||
all_langs = {} | |||
for i, lang in enumerate(data): | |||
if isinstance(lang, dict): | |||
for val in lang.values()[0]: | |||
all_langs[val] = i | |||
else: | |||
all_langs[lang] = i | |||
return langs, all_langs | |||
LANGS, LANGS_ALL = _load_langs() |
@@ -0,0 +1,368 @@ | |||
# A list of programming languages supported by bitshift: | |||
languages: | |||
# With parsers: | |||
- Python: | |||
- Python | |||
- Python 3 | |||
- Python 3.0 Traceback | |||
- Python console session | |||
- Python Traceback | |||
- NumPy | |||
- PyPy Log | |||
- C | |||
- Java | |||
- Ruby: | |||
- Ruby | |||
- Ruby irb session | |||
# Without parsers: | |||
- ABAP | |||
- APL | |||
- ActionScript: | |||
- ActionScript | |||
- ActionScript 3 | |||
- ANTLR: | |||
- ANTLR | |||
- ANTLR With ActionScript Target | |||
- ANTLR With CPP Target | |||
- "ANTLR With C# Target" | |||
- ANTLR With Java Target | |||
- ANTLR With ObjectiveC Target | |||
- ANTLR With Perl Target | |||
- ANTLR With Python Target | |||
- ANTLR With Ruby Target | |||
- Ada | |||
- Agda: | |||
- Agda | |||
- Literate Agda | |||
- Alloy | |||
- AmbientTalk | |||
- ApacheConf | |||
- AppleScript | |||
- AspectJ | |||
- aspx-cs | |||
- aspx-vb | |||
- Asymptote | |||
- autohotkey | |||
- AutoIt | |||
- Awk | |||
- BBCode | |||
- BUGS | |||
- Bash: | |||
- Bash | |||
- Bash Session | |||
- Batchfile | |||
- Befunge | |||
- BlitzBasic: | |||
- BlitzBasic | |||
- BlitzMax | |||
- Boo | |||
- Brainfuck | |||
- Bro | |||
- "C#" | |||
- C++ | |||
- ca65 | |||
- CBM BASIC V2 | |||
- Ceylon | |||
- CFEngine3 | |||
- cfstatement | |||
- ChaiScript | |||
- Chapel | |||
- Cheetah | |||
- Cirru | |||
- Clay | |||
- Clojure: | |||
- Clojure | |||
- ClojureScript | |||
- CMake | |||
- COBOL: | |||
- COBOL | |||
- COBOLFree | |||
- CoffeeScript | |||
- Coldfusion CFC | |||
- Coldfusion HTML | |||
- Common Lisp | |||
- Coq | |||
- Croc | |||
- Cryptol: | |||
- Cryptol | |||
- Literate Cryptol | |||
- CSS: | |||
- CSS | |||
- CSS+Django/Jinja | |||
- CSS+Genshi Text | |||
- CSS+Lasso | |||
- CSS+Mako | |||
- CSS+Mako | |||
- CSS+Myghty | |||
- CSS+PHP | |||
- CSS+Ruby | |||
- CSS+Smarty | |||
- CUDA | |||
- Cypher | |||
- Cython | |||
- D | |||
- Darcs Patch | |||
- Dart | |||
- Debian Control file | |||
- Debian Sourcelist | |||
- Delphi | |||
- dg | |||
- Diff | |||
- Django/Jinja | |||
- Docker | |||
- DTD | |||
- Duel | |||
- Dylan: | |||
- Dylan | |||
- Dylan session | |||
- DylanLID | |||
- EBNF | |||
- eC | |||
- ECL | |||
- Eiffel | |||
- Elixir: | |||
- Elixir | |||
- Elixir iex session | |||
- Embedded Ragel | |||
- ERB: | |||
- ERB | |||
- RHTML | |||
- Erlang: | |||
- Erlang | |||
- Erlang erl session | |||
- Evoque | |||
- Factor | |||
- Fancy | |||
- Fantom | |||
- Felix | |||
- Fortran | |||
- FoxPro | |||
- FSharp | |||
- GAP | |||
- GAS | |||
- Genshi | |||
- Genshi Text | |||
- Gettext Catalog | |||
- Gherkin | |||
- GLSL | |||
- Gnuplot | |||
- Go | |||
- Golo | |||
- GoodData-CL | |||
- Gosu | |||
- Gosu Template | |||
- Groff | |||
- Groovy | |||
- Haml | |||
- Handlebars | |||
- Haskell: | |||
- Haskell | |||
- Literate Haskell | |||
- Haxe | |||
- HTML: | |||
- HTML | |||
- HTML+Cheetah | |||
- HTML+Django/Jinja | |||
- HTML+Evoque | |||
- HTML+Genshi | |||
- HTML+Lasso | |||
- HTML+Mako | |||
- HTML+Mako | |||
- HTML+Myghty | |||
- HTML+PHP | |||
- HTML+Smarty | |||
- HTML+Velocity | |||
- Hxml | |||
- Hy | |||
- Hybris | |||
- IDL | |||
- Idris: | |||
- Idris | |||
- Literate Idris | |||
- Igor | |||
- Inform 6: | |||
- Inform 6 | |||
- Inform 6 template | |||
- Inform 7 | |||
- INI | |||
- Io | |||
- Ioke | |||
- Jade | |||
- JAGS | |||
- Jasmin | |||
- Java Server Page | |||
- JavaScript: | |||
- JavaScript | |||
- JavaScript+Cheetah | |||
- JavaScript+Django/Jinja | |||
- JavaScript+Genshi Text | |||
- JavaScript+Lasso | |||
- JavaScript+Mak | |||
- JavaScript+Mako | |||
- JavaScript+Myghty | |||
- JavaScript+PHP | |||
- JavaScript+Ruby | |||
- JavaScript+Smarty | |||
- JSON | |||
- Julia: | |||
- Julia | |||
- Julia console | |||
- Kal | |||
- Kconfig | |||
- Koka | |||
- Kotlin | |||
- Lasso | |||
- Lighttpd configuration file | |||
- Limbo | |||
- LiveScript | |||
- LLVM | |||
- Logos | |||
- Logtalk | |||
- LSL | |||
- Lua | |||
- Makefile | |||
- Makefile | |||
- Base Makefile | |||
- Mako | |||
- MAQL | |||
- Mask | |||
- Mason | |||
- Mathematica | |||
- Matlab: | |||
- Matlab | |||
- Matlab session | |||
- MiniD | |||
- Modelica | |||
- Modula-2 | |||
- Monkey | |||
- MOOCode | |||
- MoonScript | |||
- MQL | |||
- Mscgen | |||
- MuPAD | |||
- MXML | |||
- Myghty | |||
- NASM | |||
- Nemerle | |||
- nesC | |||
- NewLisp | |||
- Newspeak | |||
- Nginx configuration file | |||
- Nimrod | |||
- Nix | |||
- NSIS | |||
- Objective-C | |||
- Objective-C++ | |||
- Objective-J | |||
- OCaml | |||
- Octave | |||
- Ooc | |||
- Opa | |||
- OpenEdge ABL | |||
- Pan | |||
- Pawn | |||
- Perl: | |||
- Perl | |||
- Perl6 | |||
- PHP | |||
- Pig | |||
- Pike | |||
- PostScript | |||
- POVRay | |||
- PowerShell | |||
- Prolog | |||
- Properties | |||
- Protocol Buffer | |||
- Puppet | |||
- QBasic | |||
- QML | |||
- Racket | |||
- Ragel: | |||
- Ragel | |||
- Ragel in C Host | |||
- Ragel in CPP Host | |||
- Ragel in D Host | |||
- Ragel in Java Host | |||
- Ragel in Objective C Host | |||
- Ragel in Ruby Host | |||
- RConsole | |||
- Rd | |||
- REBOL | |||
- Red | |||
- Redcode | |||
- reg | |||
- reStructuredText | |||
- Rexx | |||
- RobotFramework | |||
- RPMSpec | |||
- RQL | |||
- RSL | |||
- Rust | |||
- S | |||
- Sass: | |||
- Sass | |||
- SCSS | |||
- Scala | |||
- Scalate Server Page | |||
- Scaml | |||
- Scheme | |||
- Scilab | |||
- Shell Session | |||
- Slim | |||
- Smali | |||
- Smalltalk | |||
- Smarty | |||
- Snobol | |||
- SourcePawn | |||
- SPARQL | |||
- SQL: | |||
- SQL | |||
- MySQL | |||
- PL/pgSQL | |||
- PostgreSQL console (psql) | |||
- PostgreSQL SQL dialect | |||
- sqlite3con | |||
- SquidConf | |||
- Stan | |||
- Standard ML | |||
- SWIG | |||
- systemverilog | |||
- Tcl | |||
- Tcsh | |||
- Tea | |||
- TeX | |||
- Todotxt | |||
- Treetop | |||
- TypeScript | |||
- UrbiScript | |||
- Vala | |||
- VB.net | |||
- VCTreeStatus | |||
- Velocity | |||
- verilog | |||
- VGL | |||
- vhdl | |||
- VimL | |||
- XML: | |||
- XML | |||
- XML+Cheetah | |||
- XML+Django/Jinja | |||
- XML+Evoque | |||
- XML+Lasso | |||
- XML+Mako | |||
- XML+Mako | |||
- XML+Myghty | |||
- XML+PHP | |||
- XML+Ruby | |||
- XML+Smarty | |||
- XML+Velocity | |||
- XQuery | |||
- XSLT | |||
- Xtend | |||
- YAML: | |||
- YAML | |||
- YAML+Jinja | |||
- Zephir |
@@ -7,7 +7,7 @@ import subprocess | |||
from os import path | |||
from pygments import lexers as pgl, util | |||
from ..languages import LANGS | |||
from ..languages import LANGS, LANGS_ALL | |||
from .python import parse_py | |||
__all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] | |||
@@ -41,11 +41,10 @@ def _lang(codelet): | |||
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) | |||
else: | |||
lex = pgl.guess_lexer(codelet.code) | |||
except util.ClassNotFound: | |||
return LANGS_ALL[lex.name] | |||
except (util.ClassNotFound, KeyError): | |||
raise UnsupportedFileError(codelet.filename) | |||
return LANGS.index(lex.name) | |||
def _recv_data(server_socket): | |||
""" | |||
Private function to read string response from a server. It reads a certain | |||
@@ -7,7 +7,7 @@ setup( | |||
install_requires = [ | |||
"Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0", | |||
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3", | |||
"python-dateutil>=2.2"], | |||
"PyYAML>=3.11", "python-dateutil>=2.2"], | |||
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", | |||
license = "MIT", | |||
url = "https://github.com/earwig/bitshift" | |||