From e77de2305cc9c96bc4ce046211b4991a89a73f68 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Jun 2014 01:29:10 -0400 Subject: [PATCH] Start working on new language system. --- .gitignore | 1 + bitshift/crawler/crawl.py | 4 +- bitshift/crawler/crawler.py | 4 +- bitshift/languages.json | 283 -------------------------------------------- bitshift/languages.py | 7 +- bitshift/languages.yml | 129 ++++++++++++++++++++ setup.py | 2 +- 7 files changed, 139 insertions(+), 291 deletions(-) delete mode 100644 bitshift/languages.json create mode 100644 bitshift/languages.yml diff --git a/.gitignore b/.gitignore index b8b2697..bfad355 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ target # Ctags */tags logs +Gemfile.lock diff --git a/bitshift/crawler/crawl.py b/bitshift/crawler/crawl.py index d34deb9..40238af 100644 --- a/bitshift/crawler/crawl.py +++ b/bitshift/crawler/crawl.py @@ -8,6 +8,7 @@ import logging import logging.handlers import os import Queue +import sys import time from threading import Event @@ -37,9 +38,9 @@ def crawl(): crawler.BitbucketCrawler(repo_clone_queue, run_event), indexer.GitIndexer(repo_clone_queue, run_event)] + parse_servers = start_parse_servers() for thread in threads: thread.start() - parse_servers = start_parse_servers() try: while 1: @@ -77,5 +78,4 @@ def _configure_logging(): root_logger.setLevel(logging.NOTSET) if __name__ == "__main__": - _configure_logging() crawl() diff --git a/bitshift/crawler/crawler.py b/bitshift/crawler/crawler.py index 655725f..ba04412 100644 --- a/bitshift/crawler/crawler.py +++ b/bitshift/crawler/crawler.py @@ -86,8 +86,8 @@ class GitHubCrawler(threading.Thread): time.sleep(1) self.clone_queue.put(indexer.GitRepository( - repo["html_url"], repo["full_name"].replace("/", ""), - "GitHub", repo_ranks[repo["full_name"]])) + repo["html_url"], repo["full_name"], "GitHub", + repo_ranks[repo["full_name"]])) if int(resp.headers["x-ratelimit-remaining"]) == 0: time.sleep(int(resp.headers["x-ratelimit-reset"]) - diff --git a/bitshift/languages.json b/bitshift/languages.json deleted file mode 100644 index d855164..0000000 --- a/bitshift/languages.json +++ /dev/null @@ -1,283 +0,0 @@ -{ - "_comment" : "A list of programming languages supported by `bitshift`.", - "languages" : [ - "Debian Sourcelist", - "Delphi", - "JavaScript+Mako", - "Brainfuck", - "Ceylon", - "JavaScript+Django/Jinja", - "HTML+Evoque", - "NumPy", - "Modula-2", - "LiveScript", - "Nimrod", - "Bash", - "HTML+Django/Jinja", - "CSS+PHP", - "XML+Lasso", - "VimL", - "CSS+Genshi Text", - "Fancy", - "Coldfusion HTML", - "cfstatement", - "Scalate Server Page", - "Smarty", - "XML+Evoque", - "haXe", - "PowerShell", - "Tea", - "HTML+Cheetah", - "Mason", - "Django/Jinja", - "JAGS", - "ApacheConf", - "DTD", - "Lighttpd configuration file", - "Java", - "JavaScript+Genshi Text", - "Scheme", - "Nemerle", - "RHTML", - "Ragel in Java Host", - "Darcs Patch", - "Puppet", - "Octave", - "CoffeeScript", - "Ragel in D Host", - "Scilab", - "Monkey", - "HTML+Myghty", - "CSS", - "JavaScript+Smarty", - "Io", - "COBOLFree", - "Asymptote", - "vhdl", - "CSS+Ruby", - "Fortran", - "d-objdump", - "MySQL", - "REBOL", - "C++", - "ERB", - "CBM BASIC V2", - "Befunge", - "Julia", - "MoonScript", - "Ruby", - "XML+Smarty", - "Dylan", - "Groovy", - "MoinMoin/Trac Wiki markup", - "autohotkey", - "C", - "HTML", - "Felix", - "CMake", - "NSIS", - "SourcePawn", - "Mako", - "VGL", - "Velocity", - "Koka", - "CUDA", - "Gnuplot", - "IRC logs", - "Prolog", - "Python", - "CSS+Django/Jinja", - "verilog", - "Smalltalk", - "JavaScript+Myghty", - "YAML", - "Julia console", - "ANTLR With ActionScript Target", - "XML+Mako", - "XSLT", - "UrbiScript", - "Scaml", - "S", - "DylanLID", - "MAQL", - "sqlite3con", - "Boo", - "OCaml", - "eC", - "ActionScript", - "VB.net", - "SquidConf", - "XQuery", - "D", - "Fantom", - "Gettext Catalog", - "Logos", - "Lasso", - "SCSS", - "BBCode", - "Haml", - "FoxPro", - "MuPAD", - "XML+Ruby", - "Dart", - "IDL", - "dg", - "Evoque", - "Jade", - "c-objdump", - "Kconfig", - "Java Server Page", - "reg", - "ABAP", - "XML+Velocity", - "JavaScript+Cheetah", - "HTML+Mako", - "Ragel in Ruby Host", - "RobotFramework", - "Protocol Buffer", - "CFEngine3", - "Ragel", - "GLSL", - "COBOL", - "TypeScript", - "Ada", - "PostgreSQL SQL dialect", - "Xtend", - "Logtalk", - "objdump", - "CSS+Mako", - "ca65", - "Objective-C++", - "Gherkin", - "HTML+PHP", - "Makefile", - "PostScript", - "Hxml", - "Kotlin", - "PL/pgSQL", - "Vala", - "Haskell", - "Bro", - "Lua", - "POVRay", - "Sass", - "ANTLR With Java Target", - "Tcl", - "ANTLR With ObjectiveC Target", - "JavaScript+Ruby", - "Racket", - "AspectJ", - "Base Makefile", - "ANTLR With Python Target", - "cpp-objdump", - "Genshi Text", - "Ioke", - "PyPy Log", - "Croc", - "Objective-J", - "GAS", - "Batchfile", - "Snobol", - "XML", - "ANTLR", - "Opa", - "XML+Cheetah", - "Go", - "Diff", - "MiniD", - "Cython", - "Ragel in C Host", - "Erlang", - "Debian Control file", - "aspx-vb", - "BUGS", - "Ragel in CPP Host", - "aspx-cs", - "Properties", - "Groff", - "Clojure", - "Modelica", - "QML", - "JavaScript+Lasso", - "ANTLR With Perl Target", - "Genshi", - "BlitzMax", - "Treetop", - "Matlab", - "Myghty", - "HTML+Genshi", - "Duel", - "Perl", - "FSharp", - "reStructuredText", - "NewLisp", - "Scala", - "CSS+Lasso", - "XML+PHP", - "Stan", - "INI", - "MOOCode", - "Shell Session", - "RPMSpec", - "Newspeak", - "Bash Session", - "Coq", - "Raw token data", - "Tcsh", - "HTML+Lasso", - "C#", - "Gosu Template", - "RConsole", - "MXML", - "TeX", - "CSS+Smarty", - "Text only", - "ANTLR With C# Target", - "OpenEdge ABL", - "Cheetah", - "Smali", - "CSS+Myghty", - "Rd", - "LLVM", - "Standard ML", - "Elixir", - "Nginx configuration file", - "GoodData-CL", - "AppleScript", - "HTML+Smarty", - "Objective-C", - "JavaScript", - "Rust", - "Common Lisp", - "Embedded Ragel", - "ActionScript 3", - "systemverilog", - "Literate Haskell", - "PHP", - "ANTLR With CPP Target", - "Gosu", - "Hybris", - "JavaScript+PHP", - "Factor", - "HTML+Velocity", - "Mscgen", - "Ooc", - "SQL", - "HTTP", - "ECL", - "Redcode", - "Ragel in Objective C Host", - "XML+Django/Jinja", - "Awk", - "JSON", - "NASM", - "ANTLR With Ruby Target", - "XML+Myghty", - "AutoIt", - "Mako", - "CSS+Mako", - "HTML+Mako", - "XML+Mako", - "JavaScript+Mako" - ] -} diff --git a/bitshift/languages.py b/bitshift/languages.py index 36d7f63..c5f9038 100644 --- a/bitshift/languages.py +++ b/bitshift/languages.py @@ -1,5 +1,6 @@ -import json from os import path -with open(path.join(path.dirname(__file__), "languages.json")) as lang_json: - LANGS = [lang for lang in json.load(lang_json)["languages"]] +import yaml + +with open(path.join(path.dirname(__file__), "languages.yml")) as lang_yaml: + LANGS = [lang for lang in yaml.load(lang_yaml)["languages"]] diff --git a/bitshift/languages.yml b/bitshift/languages.yml new file mode 100644 index 0000000..ac272cb --- /dev/null +++ b/bitshift/languages.yml @@ -0,0 +1,129 @@ +# A list of programming languages supported by bitshift: + +languages: + # With parsers: + - Python: + - Python + - Python 3 + - Python 3.0 Traceback + - Python console session + - Python Traceback + - C + - Java + - Ruby + + # Without parsers: + - ABAP + - APL + - ActionScript: + - ActionScript + - ActionScript 3 + - ANTLR: + - ANTLR + - ANTLR With ActionScript Target + - ANTLR With CPP Target + - "ANTLR With C# Target" + - ANTLR With Java Target + - ANTLR With ObjectiveC Target + - ANTLR With Perl Target + - ANTLR With Python Target + - ANTLR With Ruby Target + - Ada + - Agda + - Alloy + - AmbientTalk + - ApacheConf + - AppleScript + - AspectJ + - Asymptote + - AutoIt + - Awk + - BBCode + - BUGS + - Bash: + - Bash + - Bash Session + - Batchfile + - Befunge + - BlitzBasic: + - BlitzBasic + - BlitzMax + - Boo + - Brainfuck + - Bro + - "C#" + - C++ + + - CSS: + - CSS + - CSS+Django/Jinja + - CSS+Genshi Text + - CSS+Lasso + - CSS+Mako + - CSS+Mako + - CSS+Myghty + - CSS+PHP + - CSS+Ruby + - CSS+Smarty + - Haskell: + - Haskell + - Literate Haskell + - HTML: + - HTML + - HTML+Cheetah + - HTML+Django/Jinja + - HTML+Evoque + - HTML+Genshi + - HTML+Lasso + - HTML+Mako + - HTML+Mako + - HTML+Myghty + - HTML+PHP + - HTML+Smarty + - HTML+Velocity + - JavaScript: + - JavaScript + - JavaScript+Cheetah + - JavaScript+Django/Jinja + - JavaScript+Genshi Text + - JavaScript+Lasso + - JavaScript+Mak + - JavaScript+Mako + - JavaScript+Myghty + - JavaScript+PHP + - JavaScript+Ruby + - JavaScript+Smarty + - Julia: + - Julia + - Julia console + - Makefile + - Makefile + - Base Makefile + - Objective-C + - Objective-C++ + - Objective-J + - Ragel: + - Ragel + - Ragel in C Host + - Ragel in CPP Host + - Ragel in D Host + - Ragel in Java Host + - Ragel in Objective C Host + - Ragel in Ruby Host + - Sass: + - Sass + - SCSS + - XML: + - XML + - XML+Cheetah + - XML+Django/Jinja + - XML+Evoque + - XML+Lasso + - XML+Mako + - XML+Mako + - XML+Myghty + - XML+PHP + - XML+Ruby + - XML+Smarty + - XML+Velocity + - YAML diff --git a/setup.py b/setup.py index f268991..869c896 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( install_requires = [ "Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0", "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3", - "python-dateutil>=2.2"], + "PyYAML>=3.11", "python-dateutil>=2.2"], author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", license = "MIT", url = "https://github.com/earwig/bitshift"