A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

136 lines
3.6 KiB

  1. import json
  2. import sys
  3. import socket
  4. import struct
  5. import subprocess
  6. from os import path
  7. from pygments import lexers as pgl, util
  8. from ..languages import LANGS
  9. from .python import parse_py
  10. _all__ = ["parse", "start_parse_servers"]
  11. PARSER_COMMANDS = [
  12. ('Java', ['mvn', '-f',
  13. path.join(path.dirname(__file__), "../../parsers/java/pom.xml"),
  14. 'exec:java', '-Dexec.args="%d"']),
  15. ('Ruby', ['rake', '-f',
  16. path.join(path.dirname(__file__), "../../parsers/ruby/Rakefile"),
  17. "'start_server[%d]'"])
  18. ]
  19. class UnsupportedFileError(Exception):
  20. pass
  21. def _lang(codelet):
  22. """
  23. Private function to identify the language of a codelet.
  24. :param codelet: The codelet object to identified.
  25. :type code: Codelet
  26. .. todo::
  27. Modify function to incorporate tags from stackoverflow.
  28. """
  29. try:
  30. if codelet.filename:
  31. lex = pgl.get_lexer_for_filename(codelet.filename)
  32. else:
  33. lex = pgl.guess_lexer(codelet.code)
  34. except util.ClassNotFound:
  35. raise UnsupportedFileError(codelet.filename)
  36. return LANGS.index(lex.name)
  37. def _recv_data(server_socket):
  38. """
  39. Private function to read string response from a server. It reads a certain
  40. amount of data based on the size it is sent from the server.
  41. :param server_socket: The server that the client is connected to, and will,
  42. read from.
  43. :type code: socket.ServerSocket
  44. """
  45. recv_size = 8192
  46. total_data = []
  47. size_data = cur_data = ''
  48. total_size, size = 0, sys.maxint
  49. while total_size < size:
  50. cur_data = server_socket.recv(recv_size)
  51. if not total_data:
  52. if len(size_data) > 4:
  53. size_data += cur_data
  54. size = struct.unpack('>i', size_data[:4])[0]
  55. recv_size = size
  56. if recv_size > sys.maxint: recv_size = sys.maxint
  57. total_data.append(size_data[4:])
  58. else:
  59. size_data += cur_data
  60. else:
  61. total_data.append(cur_data)
  62. total_size = sum([len(s) for s in total_data])
  63. server_socket.close()
  64. return ''.join(total_data)
  65. def start_parse_servers():
  66. """
  67. Starts all the parse servers for languages besides python.
  68. :rtype: list
  69. """
  70. procs = []
  71. for (lang, cmd) in PARSER_COMMANDS:
  72. procs.append(
  73. subprocess.Popen(' '.join(cmd) % (5001 + LANGS.index(lang)),
  74. shell=True))
  75. return procs
  76. def parse(codelet):
  77. """
  78. Dispatches the codelet to the correct parser based on its language.
  79. It is the job of the respective parsers to accumulate data about the
  80. code and to convert it into a string representing a python dict.
  81. The codelet is then given dict as its 'symbols' field.
  82. :param codelet: The codelet object to parsed.
  83. :type code: Codelet
  84. """
  85. lang = _lang(codelet)
  86. source = codelet.code
  87. codelet.language = lang
  88. server_socket_number = 5001 + lang
  89. if lang == LANGS.index('Python'):
  90. parse_py(codelet)
  91. else:
  92. server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  93. server_socket.connect(("localhost", server_socket_number))
  94. server_socket.send("%d\n%s" % (len(source), source))
  95. symbols = json.loads(_recv_data(server_socket))
  96. symbols = {key: [(name, [tuple(loc)
  97. for loc in syms[name]['assignments']],
  98. [tuple(loc) for loc in syms[name]['uses']])
  99. for name in syms.keys()]
  100. for key, syms in symbols.iteritems()}
  101. print symbols
  102. codelet.symbols = symbols