A semantic search engine for source code https://bitshift.benkurtovic.com/
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
 
 
 
 
 
 

56 wiersze
1.5 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging, logging.handlers, os, Queue
  6. from bitshift.crawler import crawler, indexer
  7. __all__ = ["crawl"]
  8. def crawl():
  9. """
  10. Initialize all crawlers (and indexers).
  11. Start the:
  12. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  13. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  14. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  15. """
  16. _configure_logging()
  17. MAX_URL_QUEUE_SIZE = 5e3
  18. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  19. threads = [crawler.GitHubCrawler(repo_clone_queue),
  20. crawler.BitbucketCrawler(repo_clone_queue),
  21. indexer.GitIndexer(repo_clone_queue)]
  22. for thread in threads:
  23. thread.start()
  24. def _configure_logging():
  25. LOG_FILE_DIR = "log"
  26. if not os.path.exists(LOG_FILE_DIR):
  27. os.mkdir(LOG_FILE_DIR)
  28. logging.getLogger("requests").setLevel(logging.WARNING)
  29. logging.getLogger("urllib3").setLevel(logging.WARNING)
  30. formatter = logging.Formatter(
  31. fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s"
  32. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  33. handler = logging.handlers.TimedRotatingFileHandler(
  34. "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1,
  35. backupCount=20)
  36. handler.setFormatter(formatter)
  37. root_logger = logging.getLogger()
  38. root_logger.addHandler(handler)
  39. root_logger.setLevel(logging.NOTSET)