Procházet zdrojové kódy

Bugfix, cleanup.

tags/v0.2
Ben Kurtovic před 9 roky
rodič
revize
5874467ec3
3 změnil soubory, kde provedl 8 přidání a 8 odebrání
  1. +4
    -4
      earwigbot/wiki/copyvios/__init__.py
  2. +2
    -1
      earwigbot/wiki/copyvios/parsers.py
  3. +2
    -3
      earwigbot/wiki/copyvios/search.py

+ 4
- 4
earwigbot/wiki/copyvios/__init__.py Zobrazit soubor

@@ -268,8 +268,8 @@ class CopyvioMixIn(object):
searcher = self._get_search_engine()
parser = ArticleTextParser(self.get())
article = MarkovChain(parser.strip())
workspace = _CopyvioWorkspace(article, min_confidence,
until, self._logger, self._addheaders)
workspace = _CopyvioWorkspace(article, min_confidence, until,
self._logger, self._addheaders)
if self._exclusions_db:
self._exclusions_db.sync(self.site.name)
exclude = lambda u: self._exclusions_db.check(self.site.name, u)
@@ -328,8 +328,8 @@ class CopyvioMixIn(object):
start_time = time()
until = (start_time + max_time) if max_time > 0 else None
article = MarkovChain(ArticleTextParser(self.get()).strip())
workspace = _CopyvioWorkspace(article, min_confidence,
until, self._logger, self._addheaders)
workspace = _CopyvioWorkspace(article, min_confidence, until,
self._logger, self._addheaders, max_time)
workspace.enqueue([url])
workspace.spawn(1)
workspace.wait()


+ 2
- 1
earwigbot/wiki/copyvios/parsers.py Zobrazit soubor

@@ -134,7 +134,8 @@ class ArticleTextParser(BaseTextParser):
"""
schemes = ("http://", "https://")
links = mwparserfromhell.parse(self.text).ifilter_external_links()
return [link.url for link in links if link.url.startswith(schemes)]
return [unicode(link.url) for link in links
if link.url.startswith(schemes)]


class HTMLTextParser(BaseTextParser):


+ 2
- 3
earwigbot/wiki/copyvios/search.py Zobrazit soubor

@@ -77,9 +77,8 @@ class YahooBOSSSearchEngine(BaseSearchEngine):
"oauth_nonce": oauth.generate_nonce(),
"oauth_timestamp": oauth.Request.make_timestamp(),
"oauth_consumer_key": consumer.key,
"q": quote_plus(query.encode("utf8")),
"type": "html,text",
"format": "json",
"q": quote_plus(query.encode("utf8")), "count": 5,
"type": "html,text", "format": "json",
}

req = oauth.Request(method="GET", url=url, parameters=params)


Načítá se…
Zrušit
Uložit