Browse Source

Merge branch 'feature/lang_fixes' into develop

tags/v1.0^2
Ben Kurtovic 10 years ago
parent
commit
c78b271bd7
8 changed files with 396 additions and 295 deletions
  1. +1
    -0
      .gitignore
  2. +2
    -2
      bitshift/crawler/crawl.py
  3. +2
    -2
      bitshift/crawler/crawler.py
  4. +0
    -283
      bitshift/languages.json
  5. +19
    -3
      bitshift/languages.py
  6. +368
    -0
      bitshift/languages.yml
  7. +3
    -4
      bitshift/parser/__init__.py
  8. +1
    -1
      setup.py

+ 1
- 0
.gitignore View File

@@ -52,3 +52,4 @@ target
# Ctags
*/tags
logs
Gemfile.lock

+ 2
- 2
bitshift/crawler/crawl.py View File

@@ -8,6 +8,7 @@ import logging
import logging.handlers
import os
import Queue
import sys
import time
from threading import Event

@@ -37,9 +38,9 @@ def crawl():
crawler.BitbucketCrawler(repo_clone_queue, run_event),
indexer.GitIndexer(repo_clone_queue, run_event)]

parse_servers = start_parse_servers()
for thread in threads:
thread.start()
parse_servers = start_parse_servers()

try:
while 1:
@@ -77,5 +78,4 @@ def _configure_logging():
root_logger.setLevel(logging.NOTSET)

if __name__ == "__main__":
_configure_logging()
crawl()

+ 2
- 2
bitshift/crawler/crawler.py View File

@@ -86,8 +86,8 @@ class GitHubCrawler(threading.Thread):
time.sleep(1)

self.clone_queue.put(indexer.GitRepository(
repo["html_url"], repo["full_name"].replace("/", ""),
"GitHub", repo_ranks[repo["full_name"]]))
repo["html_url"], repo["full_name"], "GitHub",
repo_ranks[repo["full_name"]]))

if int(resp.headers["x-ratelimit-remaining"]) == 0:
time.sleep(int(resp.headers["x-ratelimit-reset"]) -


+ 0
- 283
bitshift/languages.json View File

@@ -1,283 +0,0 @@
{
"_comment" : "A list of programming languages supported by `bitshift`.",
"languages" : [
"Debian Sourcelist",
"Delphi",
"JavaScript+Mako",
"Brainfuck",
"Ceylon",
"JavaScript+Django/Jinja",
"HTML+Evoque",
"NumPy",
"Modula-2",
"LiveScript",
"Nimrod",
"Bash",
"HTML+Django/Jinja",
"CSS+PHP",
"XML+Lasso",
"VimL",
"CSS+Genshi Text",
"Fancy",
"Coldfusion HTML",
"cfstatement",
"Scalate Server Page",
"Smarty",
"XML+Evoque",
"haXe",
"PowerShell",
"Tea",
"HTML+Cheetah",
"Mason",
"Django/Jinja",
"JAGS",
"ApacheConf",
"DTD",
"Lighttpd configuration file",
"Java",
"JavaScript+Genshi Text",
"Scheme",
"Nemerle",
"RHTML",
"Ragel in Java Host",
"Darcs Patch",
"Puppet",
"Octave",
"CoffeeScript",
"Ragel in D Host",
"Scilab",
"Monkey",
"HTML+Myghty",
"CSS",
"JavaScript+Smarty",
"Io",
"COBOLFree",
"Asymptote",
"vhdl",
"CSS+Ruby",
"Fortran",
"d-objdump",
"MySQL",
"REBOL",
"C++",
"ERB",
"CBM BASIC V2",
"Befunge",
"Julia",
"MoonScript",
"Ruby",
"XML+Smarty",
"Dylan",
"Groovy",
"MoinMoin/Trac Wiki markup",
"autohotkey",
"C",
"HTML",
"Felix",
"CMake",
"NSIS",
"SourcePawn",
"Mako",
"VGL",
"Velocity",
"Koka",
"CUDA",
"Gnuplot",
"IRC logs",
"Prolog",
"Python",
"CSS+Django/Jinja",
"verilog",
"Smalltalk",
"JavaScript+Myghty",
"YAML",
"Julia console",
"ANTLR With ActionScript Target",
"XML+Mako",
"XSLT",
"UrbiScript",
"Scaml",
"S",
"DylanLID",
"MAQL",
"sqlite3con",
"Boo",
"OCaml",
"eC",
"ActionScript",
"VB.net",
"SquidConf",
"XQuery",
"D",
"Fantom",
"Gettext Catalog",
"Logos",
"Lasso",
"SCSS",
"BBCode",
"Haml",
"FoxPro",
"MuPAD",
"XML+Ruby",
"Dart",
"IDL",
"dg",
"Evoque",
"Jade",
"c-objdump",
"Kconfig",
"Java Server Page",
"reg",
"ABAP",
"XML+Velocity",
"JavaScript+Cheetah",
"HTML+Mako",
"Ragel in Ruby Host",
"RobotFramework",
"Protocol Buffer",
"CFEngine3",
"Ragel",
"GLSL",
"COBOL",
"TypeScript",
"Ada",
"PostgreSQL SQL dialect",
"Xtend",
"Logtalk",
"objdump",
"CSS+Mako",
"ca65",
"Objective-C++",
"Gherkin",
"HTML+PHP",
"Makefile",
"PostScript",
"Hxml",
"Kotlin",
"PL/pgSQL",
"Vala",
"Haskell",
"Bro",
"Lua",
"POVRay",
"Sass",
"ANTLR With Java Target",
"Tcl",
"ANTLR With ObjectiveC Target",
"JavaScript+Ruby",
"Racket",
"AspectJ",
"Base Makefile",
"ANTLR With Python Target",
"cpp-objdump",
"Genshi Text",
"Ioke",
"PyPy Log",
"Croc",
"Objective-J",
"GAS",
"Batchfile",
"Snobol",
"XML",
"ANTLR",
"Opa",
"XML+Cheetah",
"Go",
"Diff",
"MiniD",
"Cython",
"Ragel in C Host",
"Erlang",
"Debian Control file",
"aspx-vb",
"BUGS",
"Ragel in CPP Host",
"aspx-cs",
"Properties",
"Groff",
"Clojure",
"Modelica",
"QML",
"JavaScript+Lasso",
"ANTLR With Perl Target",
"Genshi",
"BlitzMax",
"Treetop",
"Matlab",
"Myghty",
"HTML+Genshi",
"Duel",
"Perl",
"FSharp",
"reStructuredText",
"NewLisp",
"Scala",
"CSS+Lasso",
"XML+PHP",
"Stan",
"INI",
"MOOCode",
"Shell Session",
"RPMSpec",
"Newspeak",
"Bash Session",
"Coq",
"Raw token data",
"Tcsh",
"HTML+Lasso",
"C#",
"Gosu Template",
"RConsole",
"MXML",
"TeX",
"CSS+Smarty",
"Text only",
"ANTLR With C# Target",
"OpenEdge ABL",
"Cheetah",
"Smali",
"CSS+Myghty",
"Rd",
"LLVM",
"Standard ML",
"Elixir",
"Nginx configuration file",
"GoodData-CL",
"AppleScript",
"HTML+Smarty",
"Objective-C",
"JavaScript",
"Rust",
"Common Lisp",
"Embedded Ragel",
"ActionScript 3",
"systemverilog",
"Literate Haskell",
"PHP",
"ANTLR With CPP Target",
"Gosu",
"Hybris",
"JavaScript+PHP",
"Factor",
"HTML+Velocity",
"Mscgen",
"Ooc",
"SQL",
"HTTP",
"ECL",
"Redcode",
"Ragel in Objective C Host",
"XML+Django/Jinja",
"Awk",
"JSON",
"NASM",
"ANTLR With Ruby Target",
"XML+Myghty",
"AutoIt",
"Mako",
"CSS+Mako",
"HTML+Mako",
"XML+Mako",
"JavaScript+Mako"
]
}

+ 19
- 3
bitshift/languages.py View File

@@ -1,5 +1,21 @@
import json
from os import path

with open(path.join(path.dirname(__file__), "languages.json")) as lang_json:
LANGS = [lang for lang in json.load(lang_json)["languages"]]
import yaml

__all__ = ["LANGS", "LANGS_ALL"]

def _load_langs():
filename = path.join(path.dirname(__file__), "languages.yml")
with open(filename) as fp:
data = yaml.load(fp)["languages"]
langs = [it.keys()[0] if isinstance(it, dict) else it for it in data]
all_langs = {}
for i, lang in enumerate(data):
if isinstance(lang, dict):
for val in lang.values()[0]:
all_langs[val] = i
else:
all_langs[lang] = i
return langs, all_langs

LANGS, LANGS_ALL = _load_langs()

+ 368
- 0
bitshift/languages.yml View File

@@ -0,0 +1,368 @@
# A list of programming languages supported by bitshift:

languages:
# With parsers:
- Python:
- Python
- Python 3
- Python 3.0 Traceback
- Python console session
- Python Traceback
- NumPy
- PyPy Log
- C
- Java
- Ruby:
- Ruby
- Ruby irb session

# Without parsers:
- ABAP
- APL
- ActionScript:
- ActionScript
- ActionScript 3
- ANTLR:
- ANTLR
- ANTLR With ActionScript Target
- ANTLR With CPP Target
- "ANTLR With C# Target"
- ANTLR With Java Target
- ANTLR With ObjectiveC Target
- ANTLR With Perl Target
- ANTLR With Python Target
- ANTLR With Ruby Target
- Ada
- Agda:
- Agda
- Literate Agda
- Alloy
- AmbientTalk
- ApacheConf
- AppleScript
- AspectJ
- aspx-cs
- aspx-vb
- Asymptote
- autohotkey
- AutoIt
- Awk
- BBCode
- BUGS
- Bash:
- Bash
- Bash Session
- Batchfile
- Befunge
- BlitzBasic:
- BlitzBasic
- BlitzMax
- Boo
- Brainfuck
- Bro
- "C#"
- C++
- ca65
- CBM BASIC V2
- Ceylon
- CFEngine3
- cfstatement
- ChaiScript
- Chapel
- Cheetah
- Cirru
- Clay
- Clojure:
- Clojure
- ClojureScript
- CMake
- COBOL:
- COBOL
- COBOLFree
- CoffeeScript
- Coldfusion CFC
- Coldfusion HTML
- Common Lisp
- Coq
- Croc
- Cryptol:
- Cryptol
- Literate Cryptol
- CSS:
- CSS
- CSS+Django/Jinja
- CSS+Genshi Text
- CSS+Lasso
- CSS+Mako
- CSS+Mako
- CSS+Myghty
- CSS+PHP
- CSS+Ruby
- CSS+Smarty
- CUDA
- Cypher
- Cython
- D
- Darcs Patch
- Dart
- Debian Control file
- Debian Sourcelist
- Delphi
- dg
- Diff
- Django/Jinja
- Docker
- DTD
- Duel
- Dylan:
- Dylan
- Dylan session
- DylanLID
- EBNF
- eC
- ECL
- Eiffel
- Elixir:
- Elixir
- Elixir iex session
- Embedded Ragel
- ERB:
- ERB
- RHTML
- Erlang:
- Erlang
- Erlang erl session
- Evoque
- Factor
- Fancy
- Fantom
- Felix
- Fortran
- FoxPro
- FSharp
- GAP
- GAS
- Genshi
- Genshi Text
- Gettext Catalog
- Gherkin
- GLSL
- Gnuplot
- Go
- Golo
- GoodData-CL
- Gosu
- Gosu Template
- Groff
- Groovy
- Haml
- Handlebars
- Haskell:
- Haskell
- Literate Haskell
- Haxe
- HTML:
- HTML
- HTML+Cheetah
- HTML+Django/Jinja
- HTML+Evoque
- HTML+Genshi
- HTML+Lasso
- HTML+Mako
- HTML+Mako
- HTML+Myghty
- HTML+PHP
- HTML+Smarty
- HTML+Velocity
- Hxml
- Hy
- Hybris
- IDL
- Idris:
- Idris
- Literate Idris
- Igor
- Inform 6:
- Inform 6
- Inform 6 template
- Inform 7
- INI
- Io
- Ioke
- Jade
- JAGS
- Jasmin
- Java Server Page
- JavaScript:
- JavaScript
- JavaScript+Cheetah
- JavaScript+Django/Jinja
- JavaScript+Genshi Text
- JavaScript+Lasso
- JavaScript+Mak
- JavaScript+Mako
- JavaScript+Myghty
- JavaScript+PHP
- JavaScript+Ruby
- JavaScript+Smarty
- JSON
- Julia:
- Julia
- Julia console
- Kal
- Kconfig
- Koka
- Kotlin
- Lasso
- Lighttpd configuration file
- Limbo
- LiveScript
- LLVM
- Logos
- Logtalk
- LSL
- Lua
- Makefile
- Makefile
- Base Makefile
- Mako
- MAQL
- Mask
- Mason
- Mathematica
- Matlab:
- Matlab
- Matlab session
- MiniD
- Modelica
- Modula-2
- Monkey
- MOOCode
- MoonScript
- MQL
- Mscgen
- MuPAD
- MXML
- Myghty
- NASM
- Nemerle
- nesC
- NewLisp
- Newspeak
- Nginx configuration file
- Nimrod
- Nix
- NSIS
- Objective-C
- Objective-C++
- Objective-J
- OCaml
- Octave
- Ooc
- Opa
- OpenEdge ABL
- Pan
- Pawn
- Perl:
- Perl
- Perl6
- PHP
- Pig
- Pike
- PostScript
- POVRay
- PowerShell
- Prolog
- Properties
- Protocol Buffer
- Puppet
- QBasic
- QML
- Racket
- Ragel:
- Ragel
- Ragel in C Host
- Ragel in CPP Host
- Ragel in D Host
- Ragel in Java Host
- Ragel in Objective C Host
- Ragel in Ruby Host
- RConsole
- Rd
- REBOL
- Red
- Redcode
- reg
- reStructuredText
- Rexx
- RobotFramework
- RPMSpec
- RQL
- RSL
- Rust
- S
- Sass:
- Sass
- SCSS
- Scala
- Scalate Server Page
- Scaml
- Scheme
- Scilab
- Shell Session
- Slim
- Smali
- Smalltalk
- Smarty
- Snobol
- SourcePawn
- SPARQL
- SQL:
- SQL
- MySQL
- PL/pgSQL
- PostgreSQL console (psql)
- PostgreSQL SQL dialect
- sqlite3con

- SquidConf
- Stan
- Standard ML
- SWIG
- systemverilog
- Tcl
- Tcsh
- Tea
- TeX
- Todotxt
- Treetop
- TypeScript
- UrbiScript
- Vala
- VB.net
- VCTreeStatus
- Velocity
- verilog
- VGL
- vhdl
- VimL
- XML:
- XML
- XML+Cheetah
- XML+Django/Jinja
- XML+Evoque
- XML+Lasso
- XML+Mako
- XML+Mako
- XML+Myghty
- XML+PHP
- XML+Ruby
- XML+Smarty
- XML+Velocity
- XQuery
- XSLT
- Xtend
- YAML:
- YAML
- YAML+Jinja
- Zephir

+ 3
- 4
bitshift/parser/__init__.py View File

@@ -7,7 +7,7 @@ import subprocess
from os import path
from pygments import lexers as pgl, util

from ..languages import LANGS
from ..languages import LANGS, LANGS_ALL
from .python import parse_py

__all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]
@@ -41,11 +41,10 @@ def _lang(codelet):
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
else:
lex = pgl.guess_lexer(codelet.code)
except util.ClassNotFound:
return LANGS_ALL[lex.name]
except (util.ClassNotFound, KeyError):
raise UnsupportedFileError(codelet.filename)

return LANGS.index(lex.name)

def _recv_data(server_socket):
"""
Private function to read string response from a server. It reads a certain


+ 1
- 1
setup.py View File

@@ -7,7 +7,7 @@ setup(
install_requires = [
"Flask>=0.10.1", "gunicorn>=18.0", "pygments>=1.6", "requests>=2.2.0",
"beautifulsoup4>=3.2.1", "oursql>=0.9.3.1", "mmh3>=2.3",
"python-dateutil>=2.2"],
"PyYAML>=3.11", "python-dateutil>=2.2"],
author = "Benjamin Attal, Ben Kurtovic, Severyn Kozak",
license = "MIT",
url = "https://github.com/earwig/bitshift"


Loading…
Cancel
Save