Переглянути джерело

Attempt to fix /tmp race condition.

tags/v1.0^2
Ben Kurtovic 10 роки тому
джерело
коміт
d609c233a1
2 змінених файлів з 18 додано та 19 видалено
  1. +1
    -0
      bitshift/crawler/crawl.py
  2. +17
    -19
      bitshift/crawler/indexer.py

+ 1
- 0
bitshift/crawler/crawl.py Переглянути файл

@@ -40,6 +40,7 @@ def crawl():
for thread in threads: for thread in threads:
thread.start() thread.start()
parse_servers = start_parse_servers() parse_servers = start_parse_servers()
time.sleep(5)


try: try:
while 1: while 1:


+ 17
- 19
bitshift/crawler/indexer.py Переглянути файл

@@ -33,6 +33,7 @@ class GitRepository(object):
repository belongs to (eg, GitHub, BitBucket). repository belongs to (eg, GitHub, BitBucket).
:ivar rank: (float) The rank of the repository, as assigned by :ivar rank: (float) The rank of the repository, as assigned by
:class:`crawler.GitHubCrawler`. :class:`crawler.GitHubCrawler`.
:ivar dirname: (str) The repository's on-disk directory name.
""" """


def __init__(self, url, name, framework_name, rank): def __init__(self, url, name, framework_name, rank):
@@ -54,6 +55,7 @@ class GitRepository(object):
self.name = name self.name = name
self.framework_name = framework_name self.framework_name = framework_name
self.rank = rank self.rank = rank
self.dirname = name.replace("-", "--").replace("/", "-")


class GitIndexer(threading.Thread): class GitIndexer(threading.Thread):
""" """
@@ -125,19 +127,14 @@ class GitIndexer(threading.Thread):
:type repo_url: :class:`GitRepository` :type repo_url: :class:`GitRepository`
""" """


with _ChangeDir("%s/%s" % (GIT_CLONE_DIR, repo.name)):
with _ChangeDir("%s/%s" % (GIT_CLONE_DIR, repo.dirname)):
try: try:
self._insert_repository_codelets(repo) self._insert_repository_codelets(repo)
except Exception: except Exception:
self._logger.exception("Exception raised while indexing:") self._logger.exception("Exception raised while indexing:")
finally: finally:
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)):
if len([obj for obj in os.listdir('.') if
os.path.isdir(obj)]) <= 1:
shutil.rmtree("%s/%s" % (
GIT_CLONE_DIR, repo.name.split("/")[0]))
else:
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name))
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.dirname)):
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.dirname))


def _insert_repository_codelets(self, repo): def _insert_repository_codelets(self, repo):
""" """
@@ -167,9 +164,9 @@ class GitIndexer(threading.Thread):


authors = [(self._decode(author), None) for author in authors = [(self._decode(author), None) for author in
commits_meta[filename]["authors"]] commits_meta[filename]["authors"]]
codelet = Codelet("%s:%s" % (repo.name, filename), source, filename,
None, authors, self._generate_file_url(filename,
repo.url, repo.framework_name),
url = self._generate_file_url(filename, repo.url, repo.framework_name)
codelet = Codelet("%s: %s" % (repo.name, filename), source,
filename, None, authors, url,
commits_meta[filename]["time_created"], commits_meta[filename]["time_created"],
commits_meta[filename]["time_last_modified"], commits_meta[filename]["time_last_modified"],
repo.rank) repo.rank)
@@ -439,18 +436,19 @@ class _GitCloner(threading.Thread):
GIT_CLONE_TIMEOUT = 500 GIT_CLONE_TIMEOUT = 500


queue_percent_full = (float(self.index_queue.qsize()) / queue_percent_full = (float(self.index_queue.qsize()) /
self.index_queue.maxsize) * 100
self.index_queue.maxsize) * 100


exit_code = None exit_code = None
command = ("perl -e 'alarm shift @ARGV; exec @ARGV' %d git clone"
" --single-branch %s %s/%s || pkill -f git")
command = ["perl", "-e", "alarm shift @ARGV; exec @ARGV",
str(GIT_CLONE_TIMEOUT), "git", "clone", "--single-branch",
repo.url, GIT_CLONE_DIR + "/" + repo.dirname, "||", "pkill",
"-f", "git"]


command_attempt = 0 command_attempt = 0
while exit_code is None: while exit_code is None:
try: try:
exit_code = subprocess.call(command % (GIT_CLONE_TIMEOUT,
repo.url, GIT_CLONE_DIR, repo.name), shell=True)
except Exception:
exit_code = subprocess.call(command)
except Exception: # TODO: subprocess.CalledProcessError instead?
time.sleep(1) time.sleep(1)
command_attempt += 1 command_attempt += 1
if command_attempt == 20: if command_attempt == 20:
@@ -461,8 +459,8 @@ class _GitCloner(threading.Thread):
break break


if exit_code != 0: if exit_code != 0:
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.name)):
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.name))
if os.path.isdir("%s/%s" % (GIT_CLONE_DIR, repo.dirname)):
shutil.rmtree("%s/%s" % (GIT_CLONE_DIR, repo.dirname))
return return


while self.index_queue.full(): while self.index_queue.full():


Завантаження…
Відмінити
Зберегти