A semantic search engine for source code https://bitshift.benkurtovic.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

84 lines
2.2 KiB

  1. import ast, pygments.lexers as pgl, sys, socket, struct
  2. from ..languages import LANGS
  3. from .python import parse_py
  4. _all__ = ["parse"]
  5. def _lang(codelet):
  6. """
  7. Private function to identify the language of a codelet.
  8. :param codelet: The codelet object to identified.
  9. :type code: Codelet
  10. .. todo::
  11. Modify function to incorporate tags from stackoverflow.
  12. """
  13. if codelet.filename is not None:
  14. return pgl.guess_lexer_for_filename(codelet.filename).name
  15. return LANGS.index(pgl.guess_lexer(codelet.code))
  16. def _recv_data(server_socket):
  17. """
  18. Private function to read string response from a server. It reads a certain
  19. amount of data based on the size it is sent from the server.
  20. :param server_socket: The server that the client is connected to, and will,
  21. read from.
  22. :type code: socket.ServerSocket
  23. """
  24. recv_size = 8192
  25. total_data = []; size_data = cur_data = ''
  26. total_size = 0; size = sys.maxint
  27. while total_size < size:
  28. cur_data = server_socket.recv(recv_size)
  29. if not total_data:
  30. if len(size_data) > 4:
  31. size_data += cur_data
  32. size = struct.unpack('>i', size_data[:4])[0]
  33. recv_size = size
  34. if recv_size > sys.maxint: recv_size = sys.maxint
  35. total_data.append(size_data[4:])
  36. else:
  37. size_data += cur_data
  38. else:
  39. total_data.append(cur_data)
  40. total_size = sum([len(s) for s in total_data])
  41. server_socket.close()
  42. return ''.join(total_data);
  43. def parse(codelet):
  44. """
  45. Dispatches the codelet to the correct parser based on its language.
  46. :param codelet: The codelet object to parsed.
  47. :type code: Codelet
  48. """
  49. lang = _lang(codelet); source = codelet.code
  50. server_socket_number = 5000 + lang
  51. if lang == LANGS.index('Python'):
  52. parse_py(codelet)
  53. else:
  54. server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  55. server_socket.connect(("localhost", server_socket_number))
  56. server_socket.send("%d\n%s" % (len(source), source));
  57. symbols = ast.literal_eval(_recv_data(server_socket))
  58. codelet.symbols = symbols