Parcourir la source

Some fixes, mainly involving language detection.

tags/v1.0^2
Ben Kurtovic il y a 10 ans
Parent
révision
5d9ef2774d
5 fichiers modifiés avec 305 ajouts et 19 suppressions
  1. +1
    -1
      .gitignore
  2. +1
    -1
      bitshift/codelet.py
  3. +280
    -1
      bitshift/languages.json
  4. +22
    -15
      bitshift/parser/__init__.py
  5. +1
    -1
      bitshift/query/nodes.py

+ 1
- 1
.gitignore Voir le fichier

@@ -51,4 +51,4 @@ target

# Ctags
*/tags
log
logs

+ 1
- 1
bitshift/codelet.py Voir le fichier

@@ -27,7 +27,7 @@ class Codelet(object):
"""

def __init__(self, name, code, filename, language, authors, code_url,
date_created, date_modified, rank, symbols=None, origin=None):
date_created, date_modified, rank, symbols=None, origin=None):
"""
Create a Codelet instance.



+ 280
- 1
bitshift/languages.json Voir le fichier

@@ -1,4 +1,283 @@
{
"_comment" : "A list of programming languages supported by `bitshift`.",
"languages" : ["Debian Sourcelist", "Delphi", "JavaScript+Mako", "Brainfuck", "Ceylon", "JavaScript+Django/Jinja", "HTML+Evoque", "NumPy", "Modula-2", "LiveScript", "Nimrod", "Bash", "HTML+Django/Jinja", "CSS+PHP", "XML+Lasso", "VimL", "CSS+Genshi Text", "Fancy", "Coldfusion HTML", "cfstatement", "Scalate Server Page", "Smarty", "XML+Evoque", "haXe", "PowerShell", "Tea", "HTML+Cheetah", "Mason", "Django/Jinja", "JAGS", "ApacheConf", "DTD", "Lighttpd configuration file", "Java", "JavaScript+Genshi Text", "Scheme", "Nemerle", "RHTML", "Ragel in Java Host", "Darcs Patch", "Puppet", "Octave", "CoffeeScript", "Ragel in D Host", "Scilab", "Monkey", "HTML+Myghty", "CSS", "JavaScript+Smarty", "Io", "COBOLFree", "Asymptote", "vhdl", "Python 3", "CSS+Ruby", "Fortran", "d-objdump", "MySQL", "REBOL", "C++", "ERB", "CBM BASIC V2", "Befunge", "Julia", "MoonScript", "Ruby", "XML+Smarty", "Dylan", "Groovy", "MoinMoin/Trac Wiki markup", "autohotkey", "C", "HTML", "Felix", "CMake", "NSIS", "SourcePawn", "Mako", "VGL", "Velocity", "Koka", "CUDA", "Gnuplot", "IRC logs", "Prolog", "Python", "CSS+Django/Jinja", "verilog", "Smalltalk", "JavaScript+Myghty", "YAML", "Julia console", "ANTLR With ActionScript Target", "XML+Mako", "XSLT", "UrbiScript", "Scaml", "S", "DylanLID", "MAQL", "sqlite3con", "Boo", "OCaml", "eC", "ActionScript", "VB.net", "SquidConf", "XQuery", "D", "Fantom", "Gettext Catalog", "Logos", "Lasso", "SCSS", "BBCode", "Haml", "FoxPro", "Python 3.0 Traceback", "MuPAD", "XML+Ruby", "Dart", "IDL", "dg", "Evoque", "Jade", "c-objdump", "Kconfig", "Java Server Page", "reg", "ABAP", "XML+Velocity", "JavaScript+Cheetah", "HTML+Mako", "Ragel in Ruby Host", "RobotFramework", "Protocol Buffer", "CFEngine3", "Ragel", "GLSL", "COBOL", "TypeScript", "Ada", "PostgreSQL SQL dialect", "Xtend", "Logtalk", "objdump", "CSS+Mako", "ca65", "Objective-C++", "Gherkin", "HTML+PHP", "Makefile", "PostScript", "Hxml", "Kotlin", "PL/pgSQL", "Vala", "Haskell", "Bro", "Lua", "POVRay", "Sass", "ANTLR With Java Target", "Tcl", "ANTLR With ObjectiveC Target", "JavaScript+Ruby", "Racket", "AspectJ", "Base Makefile", "ANTLR With Python Target", "cpp-objdump", "Genshi Text", "Ioke", "PyPy Log", "Croc", "Objective-J", "GAS", "Batchfile", "Snobol", "XML", "ANTLR", "Opa", "XML+Cheetah", "Go", "Diff", "MiniD", "Cython", "Ragel in C Host", "Erlang", "Debian Control file", "aspx-vb", "BUGS", "Ragel in CPP Host", "aspx-cs", "Properties", "Groff", "Clojure", "Modelica", "QML", "JavaScript+Lasso", "ANTLR With Perl Target", "Genshi", "BlitzMax", "Treetop", "Matlab", "Myghty", "HTML+Genshi", "Duel", "Perl", "FSharp", "reStructuredText", "NewLisp", "Scala", "CSS+Lasso", "XML+PHP", "Stan", "INI", "MOOCode", "Shell Session", "RPMSpec", "Newspeak", "Bash Session", "Coq", "Raw token data", "Tcsh", "HTML+Lasso", "C#", "Gosu Template", "RConsole", "MXML", "TeX", "CSS+Smarty", "Text only", "ANTLR With C# Target", "OpenEdge ABL", "Cheetah", "Smali", "CSS+Myghty", "Rd", "LLVM", "Standard ML", "Elixir", "Nginx configuration file", "GoodData-CL", "AppleScript", "HTML+Smarty", "Objective-C", "JavaScript", "Rust", "Common Lisp", "Embedded Ragel", "ActionScript 3", "systemverilog", "Literate Haskell", "Python Traceback", "PHP", "ANTLR With CPP Target", "Gosu", "Hybris", "JavaScript+PHP", "Factor", "HTML+Velocity", "Mscgen", "Ooc", "SQL", "HTTP", "ECL", "Redcode", "Ragel in Objective C Host", "XML+Django/Jinja", "Awk", "JSON", "NASM", "ANTLR With Ruby Target", "XML+Myghty", "AutoIt", "Mako", "CSS+Mako", "HTML+Mako", "XML+Mako", "JavaScript+Mako"]
"languages" : [
"Debian Sourcelist",
"Delphi",
"JavaScript+Mako",
"Brainfuck",
"Ceylon",
"JavaScript+Django/Jinja",
"HTML+Evoque",
"NumPy",
"Modula-2",
"LiveScript",
"Nimrod",
"Bash",
"HTML+Django/Jinja",
"CSS+PHP",
"XML+Lasso",
"VimL",
"CSS+Genshi Text",
"Fancy",
"Coldfusion HTML",
"cfstatement",
"Scalate Server Page",
"Smarty",
"XML+Evoque",
"haXe",
"PowerShell",
"Tea",
"HTML+Cheetah",
"Mason",
"Django/Jinja",
"JAGS",
"ApacheConf",
"DTD",
"Lighttpd configuration file",
"Java",
"JavaScript+Genshi Text",
"Scheme",
"Nemerle",
"RHTML",
"Ragel in Java Host",
"Darcs Patch",
"Puppet",
"Octave",
"CoffeeScript",
"Ragel in D Host",
"Scilab",
"Monkey",
"HTML+Myghty",
"CSS",
"JavaScript+Smarty",
"Io",
"COBOLFree",
"Asymptote",
"vhdl",
"CSS+Ruby",
"Fortran",
"d-objdump",
"MySQL",
"REBOL",
"C++",
"ERB",
"CBM BASIC V2",
"Befunge",
"Julia",
"MoonScript",
"Ruby",
"XML+Smarty",
"Dylan",
"Groovy",
"MoinMoin/Trac Wiki markup",
"autohotkey",
"C",
"HTML",
"Felix",
"CMake",
"NSIS",
"SourcePawn",
"Mako",
"VGL",
"Velocity",
"Koka",
"CUDA",
"Gnuplot",
"IRC logs",
"Prolog",
"Python",
"CSS+Django/Jinja",
"verilog",
"Smalltalk",
"JavaScript+Myghty",
"YAML",
"Julia console",
"ANTLR With ActionScript Target",
"XML+Mako",
"XSLT",
"UrbiScript",
"Scaml",
"S",
"DylanLID",
"MAQL",
"sqlite3con",
"Boo",
"OCaml",
"eC",
"ActionScript",
"VB.net",
"SquidConf",
"XQuery",
"D",
"Fantom",
"Gettext Catalog",
"Logos",
"Lasso",
"SCSS",
"BBCode",
"Haml",
"FoxPro",
"MuPAD",
"XML+Ruby",
"Dart",
"IDL",
"dg",
"Evoque",
"Jade",
"c-objdump",
"Kconfig",
"Java Server Page",
"reg",
"ABAP",
"XML+Velocity",
"JavaScript+Cheetah",
"HTML+Mako",
"Ragel in Ruby Host",
"RobotFramework",
"Protocol Buffer",
"CFEngine3",
"Ragel",
"GLSL",
"COBOL",
"TypeScript",
"Ada",
"PostgreSQL SQL dialect",
"Xtend",
"Logtalk",
"objdump",
"CSS+Mako",
"ca65",
"Objective-C++",
"Gherkin",
"HTML+PHP",
"Makefile",
"PostScript",
"Hxml",
"Kotlin",
"PL/pgSQL",
"Vala",
"Haskell",
"Bro",
"Lua",
"POVRay",
"Sass",
"ANTLR With Java Target",
"Tcl",
"ANTLR With ObjectiveC Target",
"JavaScript+Ruby",
"Racket",
"AspectJ",
"Base Makefile",
"ANTLR With Python Target",
"cpp-objdump",
"Genshi Text",
"Ioke",
"PyPy Log",
"Croc",
"Objective-J",
"GAS",
"Batchfile",
"Snobol",
"XML",
"ANTLR",
"Opa",
"XML+Cheetah",
"Go",
"Diff",
"MiniD",
"Cython",
"Ragel in C Host",
"Erlang",
"Debian Control file",
"aspx-vb",
"BUGS",
"Ragel in CPP Host",
"aspx-cs",
"Properties",
"Groff",
"Clojure",
"Modelica",
"QML",
"JavaScript+Lasso",
"ANTLR With Perl Target",
"Genshi",
"BlitzMax",
"Treetop",
"Matlab",
"Myghty",
"HTML+Genshi",
"Duel",
"Perl",
"FSharp",
"reStructuredText",
"NewLisp",
"Scala",
"CSS+Lasso",
"XML+PHP",
"Stan",
"INI",
"MOOCode",
"Shell Session",
"RPMSpec",
"Newspeak",
"Bash Session",
"Coq",
"Raw token data",
"Tcsh",
"HTML+Lasso",
"C#",
"Gosu Template",
"RConsole",
"MXML",
"TeX",
"CSS+Smarty",
"Text only",
"ANTLR With C# Target",
"OpenEdge ABL",
"Cheetah",
"Smali",
"CSS+Myghty",
"Rd",
"LLVM",
"Standard ML",
"Elixir",
"Nginx configuration file",
"GoodData-CL",
"AppleScript",
"HTML+Smarty",
"Objective-C",
"JavaScript",
"Rust",
"Common Lisp",
"Embedded Ragel",
"ActionScript 3",
"systemverilog",
"Literate Haskell",
"PHP",
"ANTLR With CPP Target",
"Gosu",
"Hybris",
"JavaScript+PHP",
"Factor",
"HTML+Velocity",
"Mscgen",
"Ooc",
"SQL",
"HTTP",
"ECL",
"Redcode",
"Ragel in Objective C Host",
"XML+Django/Jinja",
"Awk",
"JSON",
"NASM",
"ANTLR With Ruby Target",
"XML+Myghty",
"AutoIt",
"Mako",
"CSS+Mako",
"HTML+Mako",
"XML+Mako",
"JavaScript+Mako"
]
}

+ 22
- 15
bitshift/parser/__init__.py Voir le fichier

@@ -1,4 +1,10 @@
import json, pygments.lexers as pgl, sys, socket, struct
import json
import sys
import socket
import struct

from pygments import lexers as pgl, util

from ..languages import LANGS
from .python import parse_py

@@ -19,13 +25,14 @@ def _lang(codelet):
Modify function to incorporate tags from stackoverflow.
"""

if codelet.filename is not None:
try:
return pgl.guess_lexer_for_filename(codelet.filename, codelet.code).name
except:
raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename')

return LANGS.index(pgl.guess_lexer(codelet.code))
try:
if codelet.filename:
lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
else:
lex = pgl.guess_lexer(codelet.code)
except util.ClassNotFound:
raise UnsupportedFileError(codelet.filename)
return LANGS.index(lex.name)

def _recv_data(server_socket):
"""
@@ -39,8 +46,9 @@ def _recv_data(server_socket):
"""

recv_size = 8192
total_data = []; size_data = cur_data = ''
total_size = 0; size = sys.maxint
total_data = []
size_data = cur_data = ''
total_size, size = 0, sys.maxint

while total_size < size:
cur_data = server_socket.recv(recv_size)
@@ -61,8 +69,7 @@ def _recv_data(server_socket):
total_size = sum([len(s) for s in total_data])

server_socket.close()
return ''.join(total_data);

return ''.join(total_data)

def parse(codelet):
"""
@@ -76,7 +83,8 @@ def parse(codelet):
:type code: Codelet
"""

lang = _lang(codelet); source = codelet.code
lang = _lang(codelet)
source = codelet.code
codelet.language = lang
server_socket_number = 5000 + lang

@@ -86,8 +94,7 @@ def parse(codelet):
else:
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.connect(("localhost", server_socket_number))
server_socket.send("%d\n%s" % (len(source), source));
server_socket.send("%d\n%s" % (len(source), source))

symbols = json.loads(_recv_data(server_socket))
codelet.symbols = symbols


+ 1
- 1
bitshift/query/nodes.py Voir le fichier

@@ -195,7 +195,7 @@ class Symbol(_Node):
CLASS = 1
VARIABLE = 2
TYPES = {FUNCTION: "FUNCTION", CLASS: "CLASS", VARIABLE: "VARIABLE"}
TYPES_INV = ["functions", "classes", "variables"]
TYPES_INV = ["functions", "classes", "vars"]

def __init__(self, type_, name):
"""


Chargement…
Annuler
Enregistrer