A semantic search engine for source code https://bitshift.benkurtovic.com/
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

56 linhas
1.5 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging, logging.handlers, os, Queue
  6. from bitshift.crawler import crawler, indexer
  7. __all__ = ["crawl"]
  8. def crawl():
  9. """
  10. Initialize all crawlers (and indexers).
  11. Start the:
  12. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  13. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  14. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  15. """
  16. _configure_logging()
  17. MAX_URL_QUEUE_SIZE = 5e3
  18. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  19. threads = [crawler.GitHubCrawler(repo_clone_queue),
  20. crawler.BitbucketCrawler(repo_clone_queue),
  21. indexer.GitIndexer(repo_clone_queue)]
  22. for thread in threads:
  23. thread.start()
  24. def _configure_logging():
  25. LOG_FILE_DIR = "log"
  26. if not os.path.exists(LOG_FILE_DIR):
  27. os.mkdir(LOG_FILE_DIR)
  28. logging.getLogger("requests").setLevel(logging.WARNING)
  29. logging.getLogger("urllib3").setLevel(logging.WARNING)
  30. formatter = logging.Formatter(
  31. fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s"
  32. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  33. handler = logging.handlers.TimedRotatingFileHandler(
  34. "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1,
  35. backupCount=20)
  36. handler.setFormatter(formatter)
  37. root_logger = logging.getLogger()
  38. root_logger.addHandler(handler)
  39. root_logger.setLevel(logging.NOTSET)