A semantic search engine for source code https://bitshift.benkurtovic.com/
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 
 

83 行
2.3 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging
  6. import logging.handlers
  7. import os
  8. import Queue
  9. import time
  10. from threading import Event
  11. from bitshift.crawler import crawler, indexer
  12. from bitshift.parser import start_parse_servers
  13. __all__ = ["crawl"]
  14. def crawl():
  15. """
  16. Initialize all crawlers (and indexers).
  17. Start the:
  18. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  19. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  20. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  21. """
  22. _configure_logging()
  23. MAX_URL_QUEUE_SIZE = 5e3
  24. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  25. run_event = Event()
  26. run_event.set()
  27. threads = [crawler.GitHubCrawler(repo_clone_queue, run_event),
  28. crawler.BitbucketCrawler(repo_clone_queue, run_event),
  29. indexer.GitIndexer(repo_clone_queue, run_event)]
  30. for thread in threads:
  31. thread.start()
  32. parse_servers = start_parse_servers()
  33. time.sleep(5)
  34. try:
  35. while 1:
  36. time.sleep(0.1)
  37. except KeyboardInterrupt:
  38. run_event.clear()
  39. for thread in threads:
  40. thread.join()
  41. for server in parse_servers:
  42. server.kill()
  43. def _configure_logging():
  44. # This isn't ideal, since it means the bitshift python package must be kept
  45. # inside the app, but it works for now:
  46. root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
  47. log_dir = os.path.join(root, "logs")
  48. if not os.path.exists(log_dir):
  49. os.mkdir(log_dir)
  50. logging.getLogger("requests").setLevel(logging.WARNING)
  51. logging.getLogger("urllib3").setLevel(logging.WARNING)
  52. formatter = logging.Formatter(
  53. fmt=("%(asctime)s %(levelname)s %(name)s:%(funcName)s"
  54. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  55. handler = logging.handlers.TimedRotatingFileHandler(
  56. "%s/%s" % (log_dir, "app.log"), when="H", interval=1,
  57. backupCount=20)
  58. handler.setFormatter(formatter)
  59. root_logger = logging.getLogger()
  60. root_logger.addHandler(handler)
  61. root_logger.setLevel(logging.NOTSET)
  62. if __name__ == "__main__":
  63. _configure_logging()
  64. crawl()