A semantic search engine for source code https://bitshift.benkurtovic.com/
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 
 
 

91 lignes
2.5 KiB

  1. import json
  2. import subprocess
  3. from os import path
  4. from pygments import lexers as pgl, util
  5. from ..languages import LANGS, LANGS_ALL
  6. from .python import parse_py
  7. __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]
  8. # TODO: Change these
  9. PARSER_COMMANDS = {
  10. 'Java': ['java', '-cp',
  11. path.join(path.dirname(__file__), "../../parsers/java/parsing.jar"),
  12. 'com.bitshift.parsing.Parse'],
  13. 'Ruby': ['rake', '-f',
  14. path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
  15. 'parse']
  16. }
  17. class UnsupportedFileError(Exception):
  18. pass
  19. def _lang(codelet):
  20. """
  21. Private function to identify the language of a codelet.
  22. :param codelet: The codelet object to identified.
  23. :type code: Codelet
  24. .. todo::
  25. Modify function to incorporate tags from stackoverflow.
  26. """
  27. try:
  28. if codelet.filename:
  29. lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
  30. else:
  31. lex = pgl.guess_lexer(codelet.code)
  32. return LANGS_ALL[lex.name]
  33. except (util.ClassNotFound, KeyError):
  34. raise UnsupportedFileError(codelet.filename)
  35. def parse_via_proc(codelet):
  36. proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]],
  37. stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  38. data = proc.communicate(codelet.code)[0]
  39. symbols = json.loads(data)
  40. return symbols
  41. PARSERS = {
  42. "Python": parse_py,
  43. "Java": parse_via_proc,
  44. "Ruby": parse_via_proc,
  45. }
  46. def parse(codelet):
  47. """
  48. Dispatches the codelet to the correct parser based on its language.
  49. It is the job of the respective parsers to accumulate data about the
  50. code and to convert it into a string representing a python dict.
  51. The codelet is then given dict as its 'symbols' field.
  52. :param codelet: The codelet object to parsed.
  53. :type code: Codelet
  54. """
  55. lang = _lang(codelet)
  56. lang_string = LANGS[lang]
  57. codelet.language = lang
  58. def loc_helper(l):
  59. for i in l:
  60. if i == -1:
  61. yield None
  62. else:
  63. yield i
  64. if lang_string in PARSERS:
  65. symbols = PARSERS[lang_string](codelet)
  66. symbols = {
  67. key: [(name,
  68. [tuple(loc_helper(loc)) for loc in syms[name]["assignments"]],
  69. [tuple(loc_helper(loc)) for loc in syms[name]["uses"]])
  70. for name in syms]
  71. for key, syms in symbols.iteritems()}
  72. codelet.symbols = symbols