A semantic search engine for source code https://bitshift.benkurtovic.com/
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

56 lines
1.5 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging, logging.handlers, os, Queue
  6. from bitshift.crawler import crawler, indexer
  7. __all__ = ["crawl"]
  8. def crawl():
  9. """
  10. Initialize all crawlers (and indexers).
  11. Start the:
  12. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  13. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  14. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  15. """
  16. _configure_logging()
  17. MAX_URL_QUEUE_SIZE = 5e3
  18. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  19. threads = [crawler.GitHubCrawler(repo_clone_queue),
  20. crawler.BitbucketCrawler(repo_clone_queue),
  21. indexer.GitIndexer(repo_clone_queue)]
  22. for thread in threads:
  23. thread.start()
  24. def _configure_logging():
  25. LOG_FILE_DIR = "log"
  26. if not os.path.exists(LOG_FILE_DIR):
  27. os.mkdir(LOG_FILE_DIR)
  28. logging.getLogger("requests").setLevel(logging.WARNING)
  29. logging.getLogger("urllib3").setLevel(logging.WARNING)
  30. formatter = logging.Formatter(
  31. fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s"
  32. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  33. handler = logging.handlers.TimedRotatingFileHandler(
  34. "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1,
  35. backupCount=20)
  36. handler.setFormatter(formatter)
  37. root_logger = logging.getLogger()
  38. root_logger.addHandler(handler)
  39. root_logger.setLevel(logging.NOTSET)