瀏覽代碼

Some fixes, mainly involving language detection.

tags/v1.0^2
Ben Kurtovic 10 年之前
父節點
當前提交
5d9ef2774d
共有 5 個檔案被更改,包括 305 行新增19 行删除
  1. +1
    -1
      .gitignore
  2. +1
    -1
      bitshift/codelet.py
  3. +280
    -1
      bitshift/languages.json
  4. +22
    -15
      bitshift/parser/__init__.py
  5. +1
    -1
      bitshift/query/nodes.py

+ 1
- 1
.gitignore 查看文件

@@ -51,4 +51,4 @@ target

# Ctags
*/tags
log
logs

+ 1
- 1
bitshift/codelet.py 查看文件

@@ -27,7 +27,7 @@ class Codelet(object):
"""

def __init__(self, name, code, filename, language, authors, code_url,
date_created, date_modified, rank, symbols=None, origin=None):
date_created, date_modified, rank, symbols=None, origin=None):
"""
Create a Codelet instance.



+ 280
- 1
bitshift/languages.json 查看文件

@@ -1,4 +1,283 @@
{
"_comment" : "A list of programming languages supported by `bitshift`.",
"languages" : ["Debian Sourcelist", "Delphi", "JavaScript+Mako", "Brainfuck", "Ceylon", "JavaScript+Django/Jinja", "HTML+Evoque", "NumPy", "Modula-2", "LiveScript", "Nimrod", "Bash", "HTML+Django/Jinja", "CSS+PHP", "XML+Lasso", "VimL", "CSS+Genshi Text", "Fancy", "Coldfusion HTML", "cfstatement", "Scalate Server Page", "Smarty", "XML+Evoque", "haXe", "PowerShell", "Tea", "HTML+Cheetah", "Mason", "Django/Jinja", "JAGS", "ApacheConf", "DTD", "Lighttpd configuration file", "Java", "JavaScript+Genshi Text", "Scheme", "Nemerle", "RHTML", "Ragel in Java Host", "Darcs Patch", "Puppet", "Octave", "CoffeeScript", "Ragel in D Host", "Scilab", "Monkey", "HTML+Myghty", "CSS", "JavaScript+Smarty", "Io", "COBOLFree", "Asymptote", "vhdl", "Python 3", "CSS+Ruby", "Fortran", "d-objdump", "MySQL", "REBOL", "C++", "ERB", "CBM BASIC V2", "Befunge", "Julia", "MoonScript", "Ruby", "XML+Smarty", "Dylan", "Groovy", "MoinMoin/Trac Wiki markup", "autohotkey", "C", "HTML", "Felix", "CMake", "NSIS", "SourcePawn", "Mako", "VGL", "Velocity", "Koka", "CUDA", "Gnuplot", "IRC logs", "Prolog", "Python", "CSS+Django/Jinja", "verilog", "Smalltalk", "JavaScript+Myghty", "YAML", "Julia console", "ANTLR With ActionScript Target", "XML+Mako", "XSLT", "UrbiScript", "Scaml", "S", "DylanLID", "MAQL", "sqlite3con", "Boo", "OCaml", "eC", "ActionScript", "VB.net", "SquidConf", "XQuery", "D", "Fantom", "Gettext Catalog", "Logos", "Lasso", "SCSS", "BBCode", "Haml", "FoxPro", "Python 3.0 Traceback", "MuPAD", "XML+Ruby", "Dart", "IDL", "dg", "Evoque", "Jade", "c-objdump", "Kconfig", "Java Server Page", "reg", "ABAP", "XML+Velocity", "JavaScript+Cheetah", "HTML+Mako", "Ragel in Ruby Host", "RobotFramework", "Protocol Buffer", "CFEngine3", "Ragel", "GLSL", "COBOL", "TypeScript", "Ada", "PostgreSQL SQL dialect", "Xtend", "Logtalk", "objdump", "CSS+Mako", "ca65", "Objective-C++", "Gherkin", "HTML+PHP", "Makefile", "PostScript", "Hxml", "Kotlin", "PL/pgSQL", "Vala", "Haskell", "Bro", "Lua", "POVRay", "Sass", "ANTLR With Java Target", "Tcl", "ANTLR With ObjectiveC Target", "JavaScript+Ruby", "Racket", "AspectJ", "Base Makefile", "ANTLR With Python Target", "cpp-objdump", "Genshi Text", "Ioke", "PyPy Log", "Croc", "Objective-J", "GAS", "Batchfile", "Snobol", "XML", "ANTLR", "Opa", "XML+Cheetah", "Go", "Diff", "MiniD", "Cython", "Ragel in C Host", "Erlang", "Debian Control file", "aspx-vb", "BUGS", "Ragel in CPP Host", "aspx-cs", "Properties", "Groff", "Clojure", "Modelica", "QML", "JavaScript+Lasso", "ANTLR With Perl Target", "Genshi", "BlitzMax", "Treetop", "Matlab", "Myghty", "HTML+Genshi", "Duel", "Perl", "FSharp", "reStructuredText", "NewLisp", "Scala", "CSS+Lasso", "XML+PHP", "Stan", "INI", "MOOCode", "Shell Session", "RPMSpec", "Newspeak", "Bash Session", "Coq", "Raw token data", "Tcsh", "HTML+Lasso", "C#", "Gosu Template", "RConsole", "MXML", "TeX", "CSS+Smarty", "Text only", "ANTLR With C# Target", "OpenEdge ABL", "Cheetah", "Smali", "CSS+Myghty", "Rd", "LLVM", "Standard ML", "Elixir", "Nginx configuration file", "GoodData-CL", "AppleScript", "HTML+Smarty", "Objective-C", "JavaScript", "Rust", "Common Lisp", "Embedded Ragel", "ActionScript 3", "systemverilog", "Literate Haskell", "Python Traceback", "PHP", "ANTLR With CPP Target", "Gosu", "Hybris", "JavaScript+PHP", "Factor", "HTML+Velocity", "Mscgen", "Ooc", "SQL", "HTTP", "ECL", "Redcode", "Ragel in Objective C Host", "XML+Django/Jinja", "Awk", "JSON", "NASM", "ANTLR With Ruby Target", "XML+Myghty", "AutoIt", "Mako", "CSS+Mako", "HTML+Mako", "XML+Mako", "JavaScript+Mako"]
"languages" : [
"Debian Sourcelist",
"Delphi",
"JavaScript+Mako",
"Brainfuck",
"Ceylon",
"JavaScript+Django/Jinja",
"HTML+Evoque",
"NumPy",
"Modula-2",
"LiveScript",
"Nimrod",
"Bash",
"HTML+Django/Jinja",
"CSS+PHP",
"XML+Lasso",
"VimL",
"CSS+Genshi Text",
"Fancy",
"Coldfusion HTML",
"cfstatement",
"Scalate Server Page",
"Smarty",
"XML+Evoque",
"haXe",
"PowerShell",
"Tea",
"HTML+Cheetah",
"Mason",
"Django/Jinja",
"JAGS",
"ApacheConf",
"DTD",
"Lighttpd configuration file",
"Java",
"JavaScript+Genshi Text",
"Scheme",
"Nemerle",
"RHTML",
"Ragel in Java Host",
"Darcs Patch",
"Puppet",
"Octave",
"CoffeeScript",
"Ragel in D Host",
"Scilab",
"Monkey",
"HTML+Myghty",
"CSS",
"JavaScript+Smarty",
"Io",
"COBOLFree",
"Asymptote",
"vhdl",
"CSS+Ruby",
"Fortran",
"d-objdump",
"MySQL",
"REBOL",
"C++",
"ERB",
"CBM BASIC V2",
"Befunge",
"Julia",
"MoonScript",
"Ruby",
"XML+Smarty",
"Dylan",
"Groovy",
"MoinMoin/Trac Wiki markup",
"autohotkey",
"C",
"HTML",
"Felix",
"CMake",
"NSIS",
"SourcePawn",
"Mako",
"VGL",
"Velocity",
"Koka",
"CUDA",
"Gnuplot",
"IRC logs",
"Prolog",
"Python",
"CSS+Django/Jinja",
"verilog",
"Smalltalk",
"JavaScript+Myghty",
"YAML",
"Julia console",
"ANTLR With ActionScript Target",
"XML+Mako",
"XSLT",
"UrbiScript",
"Scaml",
"S",
"DylanLID",
"MAQL",
"sqlite3con",
"Boo",
"OCaml",
"eC",
"ActionScript",
"VB.net",
"SquidConf",
"XQuery",
"D",
"Fantom",
"Gettext Catalog",
"Logos",
"Lasso",
"SCSS",
"BBCode",
"Haml",
"FoxPro",
"MuPAD",
"XML+Ruby",
"Dart",
"IDL",
"dg",
"Evoque",
"Jade",
"c-objdump",
"Kconfig",
"Java Server Page",
"reg",
"ABAP",
"XML+Velocity",
"JavaScript+Cheetah",
"HTML+Mako",
"Ragel in Ruby Host",
"RobotFramework",
"Protocol Buffer",
"CFEngine3",
"Ragel",
"GLSL",
"COBOL",
"TypeScript",
"Ada",
"PostgreSQL SQL dialect",
"Xtend",
"Logtalk",
"objdump",
"CSS+Mako",
"ca65",
"Objective-C++",
"Gherkin",
"HTML+PHP",
"Makefile",
"PostScript",
"Hxml",
"Kotlin",
"PL/pgSQL",
"Vala",
"Haskell",
"Bro",
"Lua",
"POVRay",
"Sass",
"ANTLR With Java Target",
"Tcl",
"ANTLR With ObjectiveC Target",
"JavaScript+Ruby",
"Racket",
"AspectJ",
"Base Makefile",
"ANTLR With Python Target",
"cpp-objdump",
"Genshi Text",
"Ioke",
"PyPy Log",
"Croc",
"Objective-J",
"GAS",
"Batchfile",
"Snobol",
"XML",
"ANTLR",
"Opa",
"XML+Cheetah",
"Go",
"Diff",
"MiniD",
"Cython",
"Ragel in C Host",
"Erlang",
"Debian Control file",
"aspx-vb",
"BUGS",
"Ragel in CPP Host",
"aspx-cs",
"Properties",
"Groff",
"Clojure",
"Modelica",
"QML",
"JavaScript+Lasso",
"ANTLR With Perl Target",
"Genshi",
"BlitzMax",
"Treetop",
"Matlab",
"Myghty",
"HTML+Genshi",
"Duel",
"Perl",
"FSharp",
"reStructuredText",
"NewLisp",
"Scala",
"CSS+Lasso",
"XML+PHP",
"Stan",
"INI",
"MOOCode",
"Shell Session",
"RPMSpec",
"Newspeak",
"Bash Session",
"Coq",
"Raw token data",
"Tcsh",
"HTML+Lasso",
"C#",
"Gosu Template",
"RConsole",
"MXML",
"TeX",
"CSS+Smarty",
"Text only",
"ANTLR With C# Target",
"OpenEdge ABL",
"Cheetah",
"Smali",
"CSS+Myghty",
"Rd",
"LLVM",
"Standard ML",
"Elixir",
"Nginx configuration file",
"GoodData-CL",
"AppleScript",
"HTML+Smarty",
"Objective-C",
"JavaScript",
"Rust",
"Common Lisp",
"Embedded Ragel",
"ActionScript 3",
"systemverilog",
"Literate Haskell",
"PHP",
"ANTLR With CPP Target",
"Gosu",
"Hybris",
"JavaScript+PHP",
"Factor",
"HTML+Velocity",
"Mscgen",
"Ooc",
"SQL",
"HTTP",
"ECL",
"Redcode",
"Ragel in Objective C Host",
"XML+Django/Jinja",
"Awk",
"JSON",
"NASM",
"ANTLR With Ruby Target",
"XML+Myghty",
"AutoIt",
"Mako",
"CSS+Mako",
"HTML+Mako",
"XML+Mako",
"JavaScript+Mako"
]
}

+ 22
- 15
bitshift/parser/__init__.py 查看文件

@@ -1,4 +1,10 @@
import json, pygments.lexers as pgl, sys, socket, struct
import json
import sys
import socket
import struct

from pygments import lexers as pgl, util

from ..languages import LANGS
from .python import parse_py

@@ -19,13 +25,14 @@ def _lang(codelet):
Modify function to incorporate tags from stackoverflow.
"""

if codelet.filename is not None:
try:
return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name
except:
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename')

return LANGS.index(pgl.guess_lexer(codelet.code))
try:
if codelet.filename:
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
else:
lex = pgl.guess_lexer(codelet.code)
except util.ClassNotFound:
raise UnsupportedFileError(codelet.filename)
return LANGS.index(lex.name)

def _recv_data(server_socket):
"""
@@ -39,8 +46,9 @@ def _recv_data(server_socket):
"""

recv_size = 8192
total_data = []; size_data = cur_data = ''
total_size = 0; size = sys.maxint
total_data = []
size_data = cur_data = ''
total_size, size = 0, sys.maxint

while total_size < size:
cur_data = server_socket.recv(recv_size)
@@ -61,8 +69,7 @@ def _recv_data(server_socket):
total_size = sum([len(s) for s in total_data])

server_socket.close()
return ''.join(total_data);

return ''.join(total_data)

def parse(codelet):
"""
@@ -76,7 +83,8 @@ def parse(codelet):
:type code: Codelet
"""

lang = _lang(codelet); source = codelet.code
lang = _lang(codelet)
source = codelet.code
codelet.language = lang
server_socket_number = 5000 + lang

@@ -86,8 +94,7 @@ def parse(codelet):
else:
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.connect(("localhost", server_socket_number))
server_socket.send("%d\n%s" % (len(source), source));
server_socket.send("%d\n%s" % (len(source), source))

symbols = json.loads(_recv_data(server_socket))
codelet.symbols = symbols


+ 1
- 1
bitshift/query/nodes.py 查看文件

@@ -195,7 +195,7 @@ class Symbol(_Node):
CLASS = 1
VARIABLE = 2
TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"}
TYPES_INV = ["functions", "classes", "variables"]
TYPES_INV = ["functions", "classes", "vars"]

def __init__(self, type_, name):
"""


Loading…
取消
儲存