A semantic search engine for source code https://bitshift.benkurtovic.com/
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

56 rader
1.5 KiB

  1. """
  2. :synopsis: Parent crawler module, which supervises all crawlers.
  3. Contains functions for initializing all subsidiary, threaded crawlers.
  4. """
  5. import logging, logging.handlers, os, Queue
  6. from bitshift.crawler import crawler, indexer
  7. __all__ = ["crawl"]
  8. def crawl():
  9. """
  10. Initialize all crawlers (and indexers).
  11. Start the:
  12. 1. GitHub crawler, :class:`crawler.GitHubCrawler`.
  13. 2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
  14. 3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
  15. """
  16. _configure_logging()
  17. MAX_URL_QUEUE_SIZE = 5e3
  18. repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
  19. threads = [crawler.GitHubCrawler(repo_clone_queue),
  20. crawler.BitbucketCrawler(repo_clone_queue),
  21. indexer.GitIndexer(repo_clone_queue)]
  22. for thread in threads:
  23. thread.start()
  24. def _configure_logging():
  25. LOG_FILE_DIR = "log"
  26. if not os.path.exists(LOG_FILE_DIR):
  27. os.mkdir(LOG_FILE_DIR)
  28. logging.getLogger("requests").setLevel(logging.WARNING)
  29. logging.getLogger("urllib3").setLevel(logging.WARNING)
  30. formatter = logging.Formatter(
  31. fmt=("%(asctime)s %(levelname)s %(name)s %(funcName)s"
  32. " %(message)s"), datefmt="%y-%m-%d %H:%M:%S")
  33. handler = logging.handlers.TimedRotatingFileHandler(
  34. "%s/%s" % (LOG_FILE_DIR, "app.log"), when="H", interval=1,
  35. backupCount=20)
  36. handler.setFormatter(formatter)
  37. root_logger = logging.getLogger()
  38. root_logger.addHandler(handler)
  39. root_logger.setLevel(logging.NOTSET)