diff --git a/earwigbot/wiki/copyvios/__init__.py b/earwigbot/wiki/copyvios/__init__.py index 4b4a2cf..90673f1 100644 --- a/earwigbot/wiki/copyvios/__init__.py +++ b/earwigbot/wiki/copyvios/__init__.py @@ -268,8 +268,8 @@ class CopyvioMixIn(object): searcher = self._get_search_engine() parser = ArticleTextParser(self.get()) article = MarkovChain(parser.strip()) - workspace = _CopyvioWorkspace(article, min_confidence, - until, self._logger, self._addheaders) + workspace = _CopyvioWorkspace(article, min_confidence, until, + self._logger, self._addheaders) if self._exclusions_db: self._exclusions_db.sync(self.site.name) exclude = lambda u: self._exclusions_db.check(self.site.name, u) @@ -328,8 +328,8 @@ class CopyvioMixIn(object): start_time = time() until = (start_time + max_time) if max_time > 0 else None article = MarkovChain(ArticleTextParser(self.get()).strip()) - workspace = _CopyvioWorkspace(article, min_confidence, - until, self._logger, self._addheaders) + workspace = _CopyvioWorkspace(article, min_confidence, until, + self._logger, self._addheaders, max_time) workspace.enqueue([url]) workspace.spawn(1) workspace.wait() diff --git a/earwigbot/wiki/copyvios/parsers.py b/earwigbot/wiki/copyvios/parsers.py index 9083158..de58122 100644 --- a/earwigbot/wiki/copyvios/parsers.py +++ b/earwigbot/wiki/copyvios/parsers.py @@ -134,7 +134,8 @@ class ArticleTextParser(BaseTextParser): """ schemes = ("http://", "https://") links = mwparserfromhell.parse(self.text).ifilter_external_links() - return [link.url for link in links if link.url.startswith(schemes)] + return [unicode(link.url) for link in links + if link.url.startswith(schemes)] class HTMLTextParser(BaseTextParser): diff --git a/earwigbot/wiki/copyvios/search.py b/earwigbot/wiki/copyvios/search.py index 7570294..4c77a45 100644 --- a/earwigbot/wiki/copyvios/search.py +++ b/earwigbot/wiki/copyvios/search.py @@ -77,9 +77,8 @@ class YahooBOSSSearchEngine(BaseSearchEngine): "oauth_nonce": oauth.generate_nonce(), "oauth_timestamp": oauth.Request.make_timestamp(), "oauth_consumer_key": consumer.key, - "q": quote_plus(query.encode("utf8")), - "type": "html,text", - "format": "json", + "q": quote_plus(query.encode("utf8")), "count": 5, + "type": "html,text", "format": "json", } req = oauth.Request(method="GET", url=url, parameters=params)