A semantic search engine for source code https://bitshift.benkurtovic.com/
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

83 linhas
2.3 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging
  6. import logging.handlers
  7. import os
  8. import Queue
  9. import sys
  10. import time
  11. from threading import Event
  12. from bitshift.crawler import crawler, indexer
  13. from bitshift.parser import start_parse_servers
  14. __all__ = ["crawl"]
  15. def crawl():
  16. """
  17. Initialize all crawlers (and indexers).
  18. Start the:
  19. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  20. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  21. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  22. """
  23. _configure_logging()
  24. MAX_URL_QUEUE_SIZE = 5e3
  25. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  26. run_event = Event()
  27. run_event.set()
  28. threads = [crawler.GitHubCrawler(repo_clone_queue, run_event),
  29. crawler.BitbucketCrawler(repo_clone_queue, run_event),
  30. indexer.GitIndexer(repo_clone_queue, run_event)]
  31. parse_servers = start_parse_servers()
  32. time.sleep(5)
  33. for thread in threads:
  34. thread.start()
  35. try:
  36. while 1:
  37. time.sleep(0.1)
  38. except KeyboardInterrupt:
  39. run_event.clear()
  40. for thread in threads:
  41. thread.join()
  42. for server in parse_servers:
  43. server.kill()
  44. def _configure_logging():
  45. # This isn't ideal, since it means the bitshift python package must be kept
  46. # inside the app, but it works for now:
  47. root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
  48. log_dir = os.path.join(root, "logs")
  49. if not os.path.exists(log_dir):
  50. os.mkdir(log_dir)
  51. logging.getLogger("requests").setLevel(logging.WARNING)
  52. logging.getLogger("urllib3").setLevel(logging.WARNING)
  53. formatter = logging.Formatter(
  54. fmt=("%(asctime)s %(levelname)s %(name)s:%(funcName)s"
  55. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  56. handler = logging.handlers.TimedRotatingFileHandler(
  57. "%s/%s" % (log_dir, "app.log"), when="H", interval=1,
  58. backupCount=20)
  59. handler.setFormatter(formatter)
  60. root_logger = logging.getLogger()
  61. root_logger.addHandler(handler)
  62. root_logger.setLevel(logging.NOTSET)
  63. if __name__ == "__main__":
  64. crawl()