A semantic search engine for source code https://bitshift.benkurtovic.com/
Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.
 
 
 
 
 
 

56 righe
1.5 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging, logging.handlers, os, Queue
  6. from bitshift.crawler import crawler, indexer
  7. __all__ = ["crawl"]
  8. def crawl():
  9. """
  10. Initialize all crawlers (and indexers).
  11. Start the:
  12. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  13. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  14. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  15. """
  16. _configure_logging()
  17. MAX_URL_QUEUE_SIZE = 5e3
  18. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  19. threads = [crawler.GitHubCrawler(repo_clone_queue),
  20. crawler.BitbucketCrawler(repo_clone_queue),
  21. indexer.GitIndexer(repo_clone_queue)]
  22. for thread in threads:
  23. thread.start()
  24. def _configure_logging():
  25. LOG_FILE_DIR = "log"
  26. if not os.path.exists(LOG_FILE_DIR):
  27. os.mkdir(LOG_FILE_DIR)
  28. logging.getLogger("requests").setLevel(logging.WARNING)
  29. logging.getLogger("urllib3").setLevel(logging.WARNING)
  30. formatter = logging.Formatter(
  31. fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s"
  32. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  33. handler = logging.handlers.TimedRotatingFileHandler(
  34. "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1,
  35. backupCount=20)
  36. handler.setFormatter(formatter)
  37. root_logger = logging.getLogger()
  38. root_logger.addHandler(handler)
  39. root_logger.setLevel(logging.NOTSET)