A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

91 lines
2.5 KiB

  1. import json
  2. import subprocess
  3. from os import path
  4. from pygments import lexers as pgl, util
  5. from ..languages import LANGS, LANGS_ALL
  6. from .python import parse_py
  7. __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]
  8. # TODO: Change these
  9. PARSER_COMMANDS = {
  10. 'Java': ['java', '-cp',
  11. path.join(path.dirname(__file__), "../../parsers/java/parsing.jar"),
  12. 'com.bitshift.parsing.Parse'],
  13. 'Ruby': ['rake', '-f',
  14. path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
  15. 'parse']
  16. }
  17. class UnsupportedFileError(Exception):
  18. pass
  19. def _lang(codelet):
  20. """
  21. Private function to identify the language of a codelet.
  22. :param codelet: The codelet object to identified.
  23. :type code: Codelet
  24. .. todo::
  25. Modify function to incorporate tags from stackoverflow.
  26. """
  27. try:
  28. if codelet.filename:
  29. lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
  30. else:
  31. lex = pgl.guess_lexer(codelet.code)
  32. return LANGS_ALL[lex.name]
  33. except (util.ClassNotFound, KeyError):
  34. raise UnsupportedFileError(codelet.filename)
  35. def parse_via_proc(codelet):
  36. proc = subprocess.Popen(PARSER_COMMANDS[LANGS[codelet.language]],
  37. stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  38. data = proc.communicate(codelet.code)[0]
  39. symbols = json.loads(data)
  40. return symbols
  41. PARSERS = {
  42. "Python": parse_py,
  43. "Java": parse_via_proc,
  44. "Ruby": parse_via_proc,
  45. }
  46. def parse(codelet):
  47. """
  48. Dispatches the codelet to the correct parser based on its language.
  49. It is the job of the respective parsers to accumulate data about the
  50. code and to convert it into a string representing a python dict.
  51. The codelet is then given dict as its 'symbols' field.
  52. :param codelet: The codelet object to parsed.
  53. :type code: Codelet
  54. """
  55. lang = _lang(codelet)
  56. lang_string = LANGS[lang]
  57. codelet.language = lang
  58. def loc_helper(l):
  59. for i in l:
  60. if i == -1:
  61. yield None
  62. else:
  63. yield i
  64. if lang_string in PARSERS:
  65. symbols = PARSERS[lang_string](codelet)
  66. symbols = {
  67. key: [(name,
  68. [tuple(loc_helper(loc)) for loc in syms[name]["assignments"]],
  69. [tuple(loc_helper(loc)) for loc in syms[name]["uses"]])
  70. for name in syms]
  71. for key, syms in symbols.iteritems()}
  72. codelet.symbols = symbols