Browse Source

Some locks needed to be tightened.

tags/v0.2
Ben Kurtovic 10 years ago
parent
commit
e73e626994
1 changed files with 34 additions and 32 deletions
  1. +34
    -32
      earwigbot/wiki/copyvios/workers.py

+ 34
- 32
earwigbot/wiki/copyvios/workers.py View File

@@ -201,25 +201,26 @@ class _CopyvioWorker(object):
else: else:
timeout = None timeout = None


with self._queues.lock:
if self._queue:
source = self._queue.get(timeout=timeout)
if self._queue.empty():
if self._queue:
source = self._queue.pop()
with self._queues.lock:
if not self._queue:
del self._queues.sites[self._site] del self._queues.sites[self._site]
self._queue = None self._queue = None
else:
site, queue = self._queues.unassigned.get(timeout=timeout)
if site is StopIteration:
return StopIteration
source = queue.get_nowait()
if queue.empty():
else:
site, queue = self._queues.unassigned.get(timeout=timeout)
if site is StopIteration:
return StopIteration
source = queue.pop()
with self._queues.lock:
if not queue:
del self._queues.sites[site] del self._queues.sites[site]
else: else:
self._site = site self._site = site
self._queue = queue self._queue = queue
if not source.active():
return self._dequeue()
return source
if not source.active():
return self._dequeue()
return source


def _run(self): def _run(self):
"""Main entry point for the worker thread. """Main entry point for the worker thread.
@@ -326,30 +327,31 @@ class CopyvioWorkspace(object):
returns ``True`` if we should skip it and ``False`` otherwise. returns ``True`` if we should skip it and ``False`` otherwise.
""" """
for url in urls: for url in urls:
if self._is_finished:
break
if url in self._handled_urls:
continue
self._handled_urls.append(url)
if exclude_check and exclude_check(url):
continue

try:
key = tldextract.extract(url).registered_domain
except ImportError: # Fall back on very naive method
from urlparse import urlparse
key = u".".join(urlparse(url).netloc.split(".")[-2:])

source = _CopyvioSource(url=url, key=key, **self._source_args)
logmsg = u"enqueue(): {0} {1} -> {2}"
with self._queues.lock: with self._queues.lock:
if self._is_finished:
break
if url in self._handled_urls:
continue
self._handled_urls.append(url)
if exclude_check and exclude_check(url):
continue

try:
key = tldextract.extract(url).registered_domain
except ImportError: # Fall back on very naive method
from urlparse import urlparse
key = u".".join(urlparse(url).netloc.split(".")[-2:])

source = _CopyvioSource(url=url, key=key, **self._source_args)
self.sources.append(source)
logmsg = u"enqueue(): {0} {1} -> {2}"
if key in self._queues.sites: if key in self._queues.sites:
self._logger.debug(logmsg.format("append", key, url)) self._logger.debug(logmsg.format("append", key, url))
self._queues.sites[key].put(source)
self._queues.sites[key].append(source)
else: else:
self._logger.debug(logmsg.format("new", key, url)) self._logger.debug(logmsg.format("new", key, url))
self._queues.sites[key] = queue = Queue()
queue.put(source)
self._queues.sites[key] = queue = []
queue.append(source)
self._queues.unassigned.put((key, queue)) self._queues.unassigned.put((key, queue))


def wait(self): def wait(self):


Loading…
Cancel
Save