Browse Source

Merge branch 'feature/lang_fixes' into develop

tags/v1.0^2
Ben Kurtovic 10 years ago
parent
commit
c78b271bd7
8 changed files with 396 additions and 295 deletions
  1. +1
    -0
      .gitignore
  2. +2
    -2
      bitshift/crawler/crawl.py
  3. +2
    -2
      bitshift/crawler/crawler.py
  4. +0
    -283
      bitshift/languages.json
  5. +19
    -3
      bitshift/languages.py
  6. +368
    -0
      bitshift/languages.yml
  7. +3
    -4
      bitshift/parser/__init__.py
  8. +1
    -1
      setup.py

+ 1
- 0
.gitignore View File

@@ -52,3 +52,4 @@ target
# Ctags # Ctags
*/tags */tags
logs logs
Gemfile.lock

+ 2
- 2
bitshift/crawler/crawl.py View File

@@ -8,6 +8,7 @@ import logging
import logging.handlers import logging.handlers
import os import os
import Queue import Queue
import sys
import time import time
from threading import Event from threading import Event


@@ -37,9 +38,9 @@ def crawl():
crawler.BitbucketCrawler(repo_clone_queue, run_event), crawler.BitbucketCrawler(repo_clone_queue, run_event),
indexer.GitIndexer(repo_clone_queue, run_event)] indexer.GitIndexer(repo_clone_queue, run_event)]


parse_servers = start_parse_servers()
for thread in threads: for thread in threads:
thread.start() thread.start()
parse_servers = start_parse_servers()


try: try:
while 1: while 1:
@@ -77,5 +78,4 @@ def _configure_logging():
root_logger.setLevel(logging.NOTSET) root_logger.setLevel(logging.NOTSET)


if __name__ == "__main__": if __name__ == "__main__":
_configure_logging()
crawl() crawl()

+ 2
- 2
bitshift/crawler/crawler.py View File

@@ -86,8 +86,8 @@ class GitHubCrawler(threading.Thread):
time.sleep(1) time.sleep(1)


self.clone_queue.put(indexer.GitRepository( self.clone_queue.put(indexer.GitRepository(
repo["html_url"], repo["full_name"].replace("/", ""),
"GitHub", repo_ranks[repo["full_name"]]))
repo["html_url"], repo["full_name"], "GitHub",
repo_ranks[repo["full_name"]]))


if int(resp.headers["x-ratelimit-remaining"]) == 0: if int(resp.headers["x-ratelimit-remaining"]) == 0:
time.sleep(int(resp.headers["x-ratelimit-reset"]) - time.sleep(int(resp.headers["x-ratelimit-reset"]) -


+ 0
- 283
bitshift/languages.json View File

@@ -1,283 +0,0 @@
{
"_comment" : "A list of programming languages supported by `bitshift`.",
"languages" : [
"Debian Sourcelist",
"Delphi",
"JavaScript+Mako",
"Brainfuck",
"Ceylon",
"JavaScript+Django/Jinja",
"HTML+Evoque",
"NumPy",
"Modula-2",
"LiveScript",
"Nimrod",
"Bash",
"HTML+Django/Jinja",
"CSS+PHP",
"XML+Lasso",
"VimL",
"CSS+Genshi Text",
"Fancy",
"Coldfusion HTML",
"cfstatement",
"Scalate Server Page",
"Smarty",
"XML+Evoque",
"haXe",
"PowerShell",
"Tea",
"HTML+Cheetah",
"Mason",
"Django/Jinja",
"JAGS",
"ApacheConf",
"DTD",
"Lighttpd configuration file",
"Java",
"JavaScript+Genshi Text",
"Scheme",
"Nemerle",
"RHTML",
"Ragel in Java Host",
"Darcs Patch",
"Puppet",
"Octave",
"CoffeeScript",
"Ragel in D Host",
"Scilab",
"Monkey",
"HTML+Myghty",
"CSS",
"JavaScript+Smarty",
"Io",
"COBOLFree",
"Asymptote",
"vhdl",
"CSS+Ruby",
"Fortran",
"d-objdump",
"MySQL",
"REBOL",
"C++",
"ERB",
"CBM BASIC V2",
"Befunge",
"Julia",
"MoonScript",
"Ruby",
"XML+Smarty",
"Dylan",
"Groovy",
"MoinMoin/Trac Wiki markup",
"autohotkey",
"C",
"HTML",
"Felix",
"CMake",
"NSIS",
"SourcePawn",
"Mako",
"VGL",
"Velocity",
"Koka",
"CUDA",
"Gnuplot",
"IRC logs",
"Prolog",
"Python",
"CSS+Django/Jinja",
"verilog",
"Smalltalk",
"JavaScript+Myghty",
"YAML",
"Julia console",
"ANTLR With ActionScript Target",
"XML+Mako",
"XSLT",
"UrbiScript",
"Scaml",
"S",
"DylanLID",
"MAQL",
"sqlite3con",
"Boo",
"OCaml",
"eC",
"ActionScript",
"VB.net",
"SquidConf",
"XQuery",
"D",
"Fantom",
"Gettext Catalog",
"Logos",
"Lasso",
"SCSS",
"BBCode",
"Haml",
"FoxPro",
"MuPAD",
"XML+Ruby",
"Dart",
"IDL",
"dg",
"Evoque",
"Jade",
"c-objdump",
"Kconfig",
"Java Server Page",
"reg",
"ABAP",
"XML+Velocity",
"JavaScript+Cheetah",
"HTML+Mako",
"Ragel in Ruby Host",
"RobotFramework",
"Protocol Buffer",
"CFEngine3",
"Ragel",
"GLSL",
"COBOL",
"TypeScript",
"Ada",
"PostgreSQL SQL dialect",
"Xtend",
"Logtalk",
"objdump",
"CSS+Mako",
"ca65",
"Objective-C++",
"Gherkin",
"HTML+PHP",
"Makefile",
"PostScript",
"Hxml",
"Kotlin",
"PL/pgSQL",
"Vala",
"Haskell",
"Bro",
"Lua",
"POVRay",
"Sass",
"ANTLR With Java Target",
"Tcl",
"ANTLR With ObjectiveC Target",
"JavaScript+Ruby",
"Racket",
"AspectJ",
"Base Makefile",
"ANTLR With Python Target",
"cpp-objdump",
"Genshi Text",
"Ioke",
"PyPy Log",
"Croc",
"Objective-J",
"GAS",
"Batchfile",
"Snobol",
"XML",
"ANTLR",
"Opa",
"XML+Cheetah",
"Go",
"Diff",
"MiniD",
"Cython",
"Ragel in C Host",
"Erlang",
"Debian Control file",
"aspx-vb",
"BUGS",
"Ragel in CPP Host",
"aspx-cs",
"Properties",
"Groff",
"Clojure",
"Modelica",
"QML",
"JavaScript+Lasso",
"ANTLR With Perl Target",
"Genshi",
"BlitzMax",
"Treetop",
"Matlab",
"Myghty",
"HTML+Genshi",
"Duel",
"Perl",
"FSharp",
"reStructuredText",
"NewLisp",
"Scala",
"CSS+Lasso",
"XML+PHP",
"Stan",
"INI",
"MOOCode",
"Shell Session",
"RPMSpec",
"Newspeak",
"Bash Session",
"Coq",
"Raw token data",
"Tcsh",
"HTML+Lasso",
"C#",
"Gosu Template",
"RConsole",
"MXML",
"TeX",
"CSS+Smarty",
"Text only",
"ANTLR With C# Target",
"OpenEdge ABL",
"Cheetah",
"Smali",
"CSS+Myghty",
"Rd",
"LLVM",
"Standard ML",
"Elixir",
"Nginx configuration file",
"GoodData-CL",
"AppleScript",
"HTML+Smarty",
"Objective-C",
"JavaScript",
"Rust",
"Common Lisp",
"Embedded Ragel",
"ActionScript 3",
"systemverilog",
"Literate Haskell",
"PHP",
"ANTLR With CPP Target",
"Gosu",
"Hybris",
"JavaScript+PHP",
"Factor",
"HTML+Velocity",
"Mscgen",
"Ooc",
"SQL",
"HTTP",
"ECL",
"Redcode",
"Ragel in Objective C Host",
"XML+Django/Jinja",
"Awk",
"JSON",
"NASM",
"ANTLR With Ruby Target",
"XML+Myghty",
"AutoIt",
"Mako",
"CSS+Mako",
"HTML+Mako",
"XML+Mako",
"JavaScript+Mako"
]
}

+ 19
- 3
bitshift/languages.py View File

@@ -1,5 +1,21 @@
import json
from os import path from os import path


with open(path.join(path.dirname(__file__), "languages.json")) as lang_json:
LANGS = [lang for lang in json.load(lang_json)["languages"]]
import yaml

__all__ = ["LANGS", "LANGS_ALL"]

def _load_langs():
filename = path.join(path.dirname(__file__), "languages.yml")
with open(filename) as fp:
data = yaml.load(fp)["languages"]
langs = [it.keys()[0] if isinstance(it, dict) else it for it in data]
all_langs = {}
for i, lang in enumerate(data):
if isinstance(lang, dict):
for val in lang.values()[0]:
all_langs[val] = i
else:
all_langs[lang] = i
return langs, all_langs

LANGS, LANGS_ALL = _load_langs()

+ 368
- 0
bitshift/languages.yml View File

@@ -0,0 +1,368 @@
# A list of programming languages supported by bitshift:

languages:
# With parsers:
- Python:
- Python
- Python 3
- Python 3.0 Traceback
- Python console session
- Python Traceback
- NumPy
- PyPy Log
- C
- Java
- Ruby:
- Ruby
- Ruby irb session

# Without parsers:
- ABAP
- APL
- ActionScript:
- ActionScript
- ActionScript 3
- ANTLR:
- ANTLR
- ANTLR With ActionScript Target
- ANTLR With CPP Target
- "ANTLR With C# Target"
- ANTLR With Java Target
- ANTLR With ObjectiveC Target
- ANTLR With Perl Target
- ANTLR With Python Target
- ANTLR With Ruby Target
- Ada
- Agda:
- Agda
- Literate Agda
- Alloy
- AmbientTalk
- ApacheConf
- AppleScript
- AspectJ
- aspx-cs
- aspx-vb
- Asymptote
- autohotkey
- AutoIt
- Awk
- BBCode
- BUGS
- Bash:
- Bash
- Bash Session
- Batchfile
- Befunge
- BlitzBasic:
- BlitzBasic
- BlitzMax
- Boo
- Brainfuck
- Bro
- "C#"
- C++
- ca65
- CBM BASIC V2
- Ceylon
- CFEngine3
- cfstatement
- ChaiScript
- Chapel
- Cheetah
- Cirru
- Clay
- Clojure:
- Clojure
- ClojureScript
- CMake
- COBOL:
- COBOL
- COBOLFree
- CoffeeScript
- Coldfusion CFC
- Coldfusion HTML
- Common Lisp
- Coq
- Croc
- Cryptol:
- Cryptol
- Literate Cryptol
- CSS:
- CSS
- CSS+Django/Jinja
- CSS+Genshi Text
- CSS+Lasso
- CSS+Mako
- CSS+Mako
- CSS+Myghty
- CSS+PHP
- CSS+Ruby
- CSS+Smarty
- CUDA
- Cypher
- Cython
- D
- Darcs Patch
- Dart
- Debian Control file
- Debian Sourcelist
- Delphi
- dg
- Diff
- Django/Jinja
- Docker
- DTD
- Duel
- Dylan:
- Dylan
- Dylan session
- DylanLID
- EBNF
- eC
- ECL
- Eiffel
- Elixir:
- Elixir
- Elixir iex session
- Embedded Ragel
- ERB:
- ERB
- RHTML
- Erlang:
- Erlang
- Erlang erl session
- Evoque
- Factor
- Fancy
- Fantom
- Felix
- Fortran
- FoxPro
- FSharp
- GAP
- GAS
- Genshi
- Genshi Text
- Gettext Catalog
- Gherkin
- GLSL
- Gnuplot
- Go
- Golo
- GoodData-CL
- Gosu
- Gosu Template
- Groff
- Groovy
- Haml
- Handlebars
- Haskell:
- Haskell
- Literate Haskell
- Haxe
- HTML:
- HTML
- HTML+Cheetah
- HTML+Django/Jinja
- HTML+Evoque
- HTML+Genshi
- HTML+Lasso
- HTML+Mako
- HTML+Mako
- HTML+Myghty
- HTML+PHP
- HTML+Smarty
- HTML+Velocity
- Hxml
- Hy
- Hybris
- IDL
- Idris:
- Idris
- Literate Idris
- Igor
- Inform 6:
- Inform 6
- Inform 6 template
- Inform 7
- INI
- Io
- Ioke
- Jade
- JAGS
- Jasmin
- Java Server Page
- JavaScript:
- JavaScript
- JavaScript+Cheetah
- JavaScript+Django/Jinja
- JavaScript+Genshi Text
- JavaScript+Lasso
- JavaScript+Mak
- JavaScript+Mako
- JavaScript+Myghty
- JavaScript+PHP
- JavaScript+Ruby
- JavaScript+Smarty
- JSON
- Julia:
- Julia
- Julia console
- Kal
- Kconfig
- Koka
- Kotlin
- Lasso
- Lighttpd configuration file
- Limbo
- LiveScript
- LLVM
- Logos
- Logtalk
- LSL
- Lua
- Makefile
- Makefile
- Base Makefile
- Mako
- MAQL
- Mask
- Mason
- Mathematica
- Matlab:
- Matlab
- Matlab session
- MiniD
- Modelica
- Modula-2
- Monkey
- MOOCode
- MoonScript
- MQL
- Mscgen
- MuPAD
- MXML
- Myghty
- NASM
- Nemerle
- nesC
- NewLisp
- Newspeak
- Nginx configuration file
- Nimrod
- Nix
- NSIS
- Objective-C
- Objective-C++
- Objective-J
- OCaml
- Octave
- Ooc
- Opa
- OpenEdge ABL
- Pan
- Pawn
- Perl:
- Perl
- Perl6
- PHP
- Pig
- Pike
- PostScript
- POVRay
- PowerShell
- Prolog
- Properties
- Protocol Buffer
- Puppet
- QBasic
- QML
- Racket
- Ragel:
- Ragel
- Ragel in C Host
- Ragel in CPP Host
- Ragel in D Host
- Ragel in Java Host
- Ragel in Objective C Host
- Ragel in Ruby Host
- RConsole
- Rd
- REBOL
- Red
- Redcode
- reg
- reStructuredText
- Rexx
- RobotFramework
- RPMSpec
- RQL
- RSL
- Rust
- S
- Sass:
- Sass
- SCSS
- Scala
- Scalate Server Page
- Scaml
- Scheme
- Scilab
- Shell Session
- Slim
- Smali
- Smalltalk
- Smarty
- Snobol
- SourcePawn
- SPARQL
- SQL:
- SQL
- MySQL
- PL/pgSQL
- PostgreSQL console (psql)
- PostgreSQL SQL dialect
- sqlite3con

- SquidConf
- Stan
- Standard ML
- SWIG
- systemverilog
- Tcl
- Tcsh
- Tea
- TeX
- Todotxt
- Treetop
- TypeScript
- UrbiScript
- Vala
- VB.net
- VCTreeStatus
- Velocity
- verilog
- VGL
- vhdl
- VimL
- XML:
- XML
- XML+Cheetah
- XML+Django/Jinja
- XML+Evoque
- XML+Lasso
- XML+Mako
- XML+Mako
- XML+Myghty
- XML+PHP
- XML+Ruby
- XML+Smarty
- XML+Velocity
- XQuery
- XSLT
- Xtend
- YAML:
- YAML
- YAML+Jinja
- Zephir

+ 3
- 4
bitshift/parser/__init__.py View File

@@ -7,7 +7,7 @@ import subprocess
from os import path from os import path
from pygments import lexers as pgl, util from pygments import lexers as pgl, util


from ..languages import LANGS
from ..languages import LANGS, LANGS_ALL
from .python import parse_py from .python import parse_py


__all__ = ["parse", "UnsupportedFileError", "start_parse_servers"] __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]
@@ -41,11 +41,10 @@ def _lang(codelet):
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code) lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
else: else:
lex = pgl.guess_lexer(codelet.code) lex = pgl.guess_lexer(codelet.code)
except util.ClassNotFound:
return LANGS_ALL[lex.name]
except (util.ClassNotFound, KeyError):
raise UnsupportedFileError(codelet.filename) raise UnsupportedFileError(codelet.filename)


return LANGS.index(lex.name)

def _recv_data(server_socket): def _recv_data(server_socket):
""" """
Private function to read string response from a server. It reads a certain Private function to read string response from a server. It reads a certain


+ 1
- 1
setup.py View File

@@ -7,7 +7,7 @@ setup(
install_requires = [ install_requires = [
"Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0", "Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0",
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3", "beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3",
"python-dateutil>=2.2"],
"PyYAML>=3.11", "python-dateutil>=2.2"],
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak", author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
license = "MIT", license = "MIT",
url = "https://github.com/earwig/bitshift" url = "https://github.com/earwig/bitshift"


Loading…
Cancel
Save