Browse Source

Add partially integrated BitbucketCrawler().

Add:
    bitshift/crawler/
        __init__.py
            -Initialize 'BitbucketCrawler()' singleton.
            -Instantiate all thread instances on-the-fly in a 'threads' array, as
            opposed to individual named variables.

        crawler.py
            -Add 'BitbucketCrawler()', to crawl Bitbucket for repositories.
            -Not entirely tested for proper functionality.
            -The Bitbucket framework is not yet accounted for in
            'indexer._generate_file_url()'.
tags/v1.0^2
Severyn Kozak 10 years ago
parent
commit
2954161747
1 changed files with 9 additions and 7 deletions
  1. +9
    -7
      bitshift/crawler/__init__.py

+ 9
- 7
bitshift/crawler/__init__.py View File

@@ -15,20 +15,22 @@ def crawl():
Initialize all crawlers (and indexers). Initialize all crawlers (and indexers).


Start the: Start the:
1. GitHub crawler, :class:`bitshift.crawler.crawler.GitHubCrawler`
2. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`
1. GitHub crawler, :class:`crawler.GitHubCrawler`.
2. Bitbucket crawler, :class:`crawler.BitbucketCrawler`.
3. Git indexer, :class:`bitshift.crawler.indexer.GitIndexer`.
""" """


MAX_URL_QUEUE_SIZE = 5e3 MAX_URL_QUEUE_SIZE = 5e3
DEBUG_FILE = "crawler.log" DEBUG_FILE = "crawler.log"


logging.basicConfig(filename=DEBUG_FILE, logging.basicConfig(filename=DEBUG_FILE,
format="%(asctime)s:\t%(threadName)s:\t%(message)s",
format="%(levelname)s %(asctime)s:\t%(threadName)s:\t%(message)s",
level=logging.DEBUG) level=logging.DEBUG)


repository_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
github_crawler = crawler.GitHubCrawler(repository_queue)
git_indexer = indexer.GitIndexer(repository_queue)
repo_clone_queue = Queue.Queue(maxsize=MAX_URL_QUEUE_SIZE)
threads = [crawler.GitHubCrawler(repo_clone_queue),
crawler.BitbucketCrawler(repo_clone_queue),
indexer.GitIndexer(repo_clone_queue)]


for thread in [github_crawler, git_indexer]:
for thread in threads:
thread.start() thread.start()

Loading…
Cancel
Save