A semantic search engine for source code https://bitshift.benkurtovic.com/
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

148 líneas
3.9 KiB

  1. import json
  2. import sys
  3. import socket
  4. import struct
  5. import subprocess
  6. from os import path
  7. from pygments import lexers as pgl, util
  8. from ..languages import LANGS, LANGS_ALL
  9. from .python import parse_py
  10. __all__ = ["parse", "UnsupportedFileError", "start_parse_servers"]
  11. PARSER_COMMANDS = [
  12. ('Java', ['mvn', '-f',
  13. path.join(path.dirname(__file__), "../../parsers/java/pom.xml"),
  14. 'exec:java', '-Dexec.args=%d']),
  15. ('Ruby', ['rake', '-f',
  16. path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
  17. 'start_server[%d]'])
  18. ]
  19. class UnsupportedFileError(Exception):
  20. pass
  21. def _lang(codelet):
  22. """
  23. Private function to identify the language of a codelet.
  24. :param codelet: The codelet object to identified.
  25. :type code: Codelet
  26. .. todo::
  27. Modify function to incorporate tags from stackoverflow.
  28. """
  29. try:
  30. if codelet.filename:
  31. lex = pgl.guess_lexer_for_filename(codelet.filename, codelet.code)
  32. else:
  33. lex = pgl.guess_lexer(codelet.code)
  34. return LANGS_ALL[lex.name]
  35. except (util.ClassNotFound, KeyError):
  36. raise UnsupportedFileError(codelet.filename)
  37. def _recv_data(server_socket):
  38. """
  39. Private function to read string response from a server. It reads a certain
  40. amount of data based on the size it is sent from the server.
  41. :param server_socket: The server that the client is connected to, and will,
  42. read from.
  43. :type code: socket.ServerSocket
  44. """
  45. recv_size = 8192
  46. total_data = []
  47. size_data = cur_data = ''
  48. total_size, size = 0, sys.maxint
  49. while total_size < size:
  50. cur_data = server_socket.recv(recv_size)
  51. if not total_data:
  52. if len(size_data) > 4:
  53. size_data += cur_data
  54. size = struct.unpack('>i', size_data[:4])[0]
  55. recv_size = size
  56. if recv_size > sys.maxint:
  57. recv_size = sys.maxint
  58. total_data.append(size_data[4:])
  59. else:
  60. size_data += cur_data
  61. else:
  62. total_data.append(cur_data)
  63. total_size = sum([len(s) for s in total_data])
  64. server_socket.close()
  65. return ''.join(total_data)
  66. def start_parse_servers():
  67. """
  68. Starts all the parse servers for languages besides python.
  69. :rtype: list
  70. """
  71. procs = []
  72. for (lang, cmd) in PARSER_COMMANDS:
  73. cmd[-1] = cmd[-1] % (5001 + LANGS.index(lang))
  74. procs.append(subprocess.Popen(cmd))
  75. return procs
  76. def parse_via_server(codelet):
  77. port = 5001 + codelet.language
  78. server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  79. server_socket.connect(("localhost", port))
  80. server_socket.send("%d\n%s" % (len(codelet.code), codelet.code))
  81. symbols = json.loads(_recv_data(server_socket))
  82. return symbols
  83. PARSERS = {
  84. "Python": parse_py,
  85. "Java": parse_via_server,
  86. "Ruby": parse_via_server,
  87. }
  88. def parse(codelet):
  89. """
  90. Dispatches the codelet to the correct parser based on its language.
  91. It is the job of the respective parsers to accumulate data about the
  92. code and to convert it into a string representing a python dict.
  93. The codelet is then given dict as its 'symbols' field.
  94. :param codelet: The codelet object to parsed.
  95. :type code: Codelet
  96. """
  97. lang = _lang(codelet)
  98. lang_string = LANGS[lang]
  99. codelet.language = lang
  100. def loc_helper(l):
  101. for i in l:
  102. if i == -1:
  103. yield None
  104. else:
  105. yield i
  106. if lang_string in PARSERS:
  107. symbols = PARSERS[lang_string](codelet)
  108. symbols = {
  109. key: [(name,
  110. [tuple(loc_helper(loc)) for loc in syms[name]["assignments"]],
  111. [tuple(loc_helper(loc)) for loc in syms[name]["uses"]])
  112. for name in syms]
  113. for key, syms in symbols.iteritems()}
  114. codelet.symbols = symbols