A semantic search engine for source code https://bitshift.benkurtovic.com/
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 
 
 
 

94 рядки
2.6 KiB

  1. import json, pygments.lexers as pgl, sys, socket, struct
  2. from ..languages import LANGS
  3. from .python import parse_py
  4. _all__ = ["parse"]
  5. class UnsupportedFileError(Exception):
  6. pass
  7. def _lang(codelet):
  8. """
  9. Private function to identify the language of a codelet.
  10. :param codelet: The codelet object to identified.
  11. :type code: Codelet
  12. .. todo::
  13. Modify function to incorporate tags from stackoverflow.
  14. """
  15. if codelet.filename is not None:
  16. try:
  17. return pgl.guess_lexer_for_filename(codelet.filename, '').name
  18. except:
  19. raise UnsupportedFileError('Could not find a lexer for the codelet\'s filename')
  20. return LANGS.index(pgl.guess_lexer(codelet.code))
  21. def _recv_data(server_socket):
  22. """
  23. Private function to read string response from a server. It reads a certain
  24. amount of data based on the size it is sent from the server.
  25. :param server_socket: The server that the client is connected to, and will,
  26. read from.
  27. :type code: socket.ServerSocket
  28. """
  29. recv_size = 8192
  30. total_data = []; size_data = cur_data = ''
  31. total_size = 0; size = sys.maxint
  32. while total_size < size:
  33. cur_data = server_socket.recv(recv_size)
  34. if not total_data:
  35. if len(size_data) > 4:
  36. size_data += cur_data
  37. size = struct.unpack('>i', size_data[:4])[0]
  38. recv_size = size
  39. if recv_size > sys.maxint: recv_size = sys.maxint
  40. total_data.append(size_data[4:])
  41. else:
  42. size_data += cur_data
  43. else:
  44. total_data.append(cur_data)
  45. total_size = sum([len(s) for s in total_data])
  46. server_socket.close()
  47. return ''.join(total_data);
  48. def parse(codelet):
  49. """
  50. Dispatches the codelet to the correct parser based on its language.
  51. It is the job of the respective parsers to accumulate data about the
  52. code and to convert it into a string representing a python dict.
  53. The codelet is then given dict as its 'symbols' field.
  54. :param codelet: The codelet object to parsed.
  55. :type code: Codelet
  56. """
  57. lang = _lang(codelet); source = codelet.code
  58. codelet.language = lang
  59. server_socket_number = 5000 + lang
  60. if lang == LANGS.index('Python'):
  61. parse_py(codelet)
  62. else:
  63. server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  64. server_socket.connect(("localhost", server_socket_number))
  65. server_socket.send("%d\n%s" % (len(source), source));
  66. symbols = json.loads(_recv_data(server_socket))
  67. codelet.symbols = symbols