A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

102 lines
2.6 KiB

  1. import json
  2. import sys
  3. import socket
  4. import struct
  5. from pygments import lexers as pgl, util
  6. from ..languages import LANGS
  7. from .python import parse_py
  8. _all__ = ["parse"]
  9. class UnsupportedFileError(Exception):
  10. pass
  11. def _lang(codelet):
  12. """
  13. Private function to identify the language of a codelet.
  14. :param codelet: The codelet object to identified.
  15. :type code: Codelet
  16. .. todo::
  17. Modify function to incorporate tags from stackoverflow.
  18. """
  19. try:
  20. if codelet.filename:
  21. lex = pgl.get_lexer_for_filename(codelet.filename)
  22. else:
  23. lex = pgl.guess_lexer(codelet.code)
  24. except util.ClassNotFound:
  25. raise UnsupportedFileError(codelet.filename)
  26. return LANGS.index(lex.name)
  27. def _recv_data(server_socket):
  28. """
  29. Private function to read string response from a server. It reads a certain
  30. amount of data based on the size it is sent from the server.
  31. :param server_socket: The server that the client is connected to, and will,
  32. read from.
  33. :type code: socket.ServerSocket
  34. """
  35. recv_size = 8192
  36. total_data = []
  37. size_data = cur_data = ''
  38. total_size, size = 0, sys.maxint
  39. while total_size < size:
  40. cur_data = server_socket.recv(recv_size)
  41. if not total_data:
  42. if len(size_data) > 4:
  43. size_data += cur_data
  44. size = struct.unpack('>i', size_data[:4])[0]
  45. recv_size = size
  46. if recv_size > sys.maxint: recv_size = sys.maxint
  47. total_data.append(size_data[4:])
  48. else:
  49. size_data += cur_data
  50. else:
  51. total_data.append(cur_data)
  52. total_size = sum([len(s) for s in total_data])
  53. server_socket.close()
  54. return ''.join(total_data)
  55. def parse(codelet):
  56. """
  57. Dispatches the codelet to the correct parser based on its language.
  58. It is the job of the respective parsers to accumulate data about the
  59. code and to convert it into a string representing a python dict.
  60. The codelet is then given dict as its 'symbols' field.
  61. :param codelet: The codelet object to parsed.
  62. :type code: Codelet
  63. """
  64. lang = _lang(codelet)
  65. source = codelet.code
  66. codelet.language = lang
  67. server_socket_number = 5000 + lang
  68. if lang == LANGS.index('Python'):
  69. parse_py(codelet)
  70. else:
  71. server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  72. server_socket.connect(("localhost", server_socket_number))
  73. server_socket.send("%d\n%s" % (len(source), source))
  74. symbols = json.loads(_recv_data(server_socket))
  75. codelet.symbols = symbols