Kaynağa Gözat

Add support for Bing Search

tags/v0.3
Ben Kurtovic 8 yıl önce
ebeveyn
işleme
977b587e5e
2 değiştirilmiş dosya ile 92 ekleme ve 17 silme
  1. +19
    -11
      earwigbot/wiki/copyvios/__init__.py
  2. +73
    -6
      earwigbot/wiki/copyvios/search.py

+ 19
- 11
earwigbot/wiki/copyvios/__init__.py Dosyayı Görüntüle

@@ -23,15 +23,14 @@
from time import sleep, time
from urllib2 import build_opener

from earwigbot import exceptions, importer
from earwigbot import exceptions
from earwigbot.wiki.copyvios.markov import MarkovChain
from earwigbot.wiki.copyvios.parsers import ArticleTextParser
from earwigbot.wiki.copyvios.search import YahooBOSSSearchEngine
from earwigbot.wiki.copyvios.search import (
BingSearchEngine, YahooBOSSSearchEngine)
from earwigbot.wiki.copyvios.workers import (
globalize, localize, CopyvioWorkspace)

oauth = importer.new("oauth2")

__all__ = ["CopyvioMixIn", "globalize", "localize"]

class CopyvioMixIn(object):
@@ -62,20 +61,29 @@ class CopyvioMixIn(object):
unknown to us, and UnsupportedSearchEngineError if we are missing a
required package or module, like oauth2 for "Yahoo! BOSS".
"""
engines = {
"Bing": BingSearchEngine,
"Yahoo! BOSS": YahooBOSSSearchEngine
}

engine = self._search_config["engine"]
if engine not in engines:
raise exceptions.UnknownSearchEngineError(engine)

klass = engines[engine]
credentials = self._search_config["credentials"]
opener = build_opener()
opener.addheaders = self._addheaders

if engine == "Yahoo! BOSS":
for dep in klass.requirements():
try:
oauth.__version__ # Force-load the lazy module
__import__(dep).__package__
except ImportError:
e = "Yahoo! BOSS requires the 'oauth2' package: https://github.com/simplegeo/python-oauth2"
e = "Missing a required dependency ({}) for the {} engine"
e = e.format(dep, engine)
raise exceptions.UnsupportedSearchEngineError(e)
opener = build_opener()
opener.addheaders = self._addheaders
return YahooBOSSSearchEngine(credentials, opener)

raise exceptions.UnknownSearchEngineError(engine)
return klass(credentials, opener)

def copyvio_check(self, min_confidence=0.75, max_queries=15, max_time=-1,
no_searches=False, no_links=False, short_circuit=True):


+ 73
- 6
earwigbot/wiki/copyvios/search.py Dosyayı Görüntüle

@@ -24,7 +24,7 @@ from gzip import GzipFile
from json import loads
from socket import error
from StringIO import StringIO
from urllib import quote
from urllib import quote, urlencode
from urllib2 import URLError

from earwigbot import importer
@@ -32,7 +32,7 @@ from earwigbot.exceptions import SearchQueryError

oauth = importer.new("oauth2")

__all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]
__all__ = ["BaseSearchEngine", "BingSearchEngine", "YahooBOSSSearchEngine"]

class BaseSearchEngine(object):
"""Base class for a simple search engine interface."""
@@ -51,6 +51,11 @@ class BaseSearchEngine(object):
"""Return a nice string representation of the search engine."""
return "<{0}>".format(self.__class__.__name__)

@staticmethod
def requirements():
"""Return a list of packages required by this search engine."""
return []

def search(self, query):
"""Use this engine to search for *query*.

@@ -59,6 +64,64 @@ class BaseSearchEngine(object):
raise NotImplementedError()


class BingSearchEngine(BaseSearchEngine):
"""A search engine interface with Bing Search (via Azure Marketplace)."""
name = "Bing"

def __init__(self, cred, opener):
super(BingSearchEngine, self).__init__(cred, opener)

key = self.cred["key"]
auth = (key + ":" + key).encode("base64").replace("\n", "")
self.opener.addheaders.append(("Authorization", "Basic " + auth))

def search(self, query):
"""Do a Bing web search for *query*.

Returns a list of URLs, no more than five, ranked by relevance
(as determined by Bing).
Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
"""
service = "SearchWeb" if self.cred["type"] == "searchweb" else "Search"
url = "https://api.datamarket.azure.com/Bing/{0}/Web?".format(service)
params = {
"$format": "json",
"$top": "5",
"Query": "'\"" + query.replace('"', "").encode("utf8") + "\"'",
"Market": "'en-US'",
"Adult": "'Off'",
"Options": "'DisableLocationDetection'",
"WebFileType": "'HTM+HTML+PDF+TEXT+TXT'",
"WebSearchOptions": "'DisableHostCollapsing+DisableQueryAlterations'"
}

try:
response = self.opener.open(url + urlencode(params))
result = response.read()
except (URLError, error) as exc:
raise SearchQueryError("Bing Error: " + str(exc))

if response.headers.get("Content-Encoding") == "gzip":
stream = StringIO(result)
gzipper = GzipFile(fileobj=stream)
result = gzipper.read()

if response.getcode() != 200:
err = "Bing Error: got response code '{0}':\n{1}'"
raise SearchQueryError(err.format(response.getcode(), result))
try:
res = loads(result)
except ValueError:
err = "Bing Error: JSON could not be decoded"
raise SearchQueryError(err)

try:
results = res["d"]["results"]
except KeyError:
return []
return [result["Url"] for result in results]


class YahooBOSSSearchEngine(BaseSearchEngine):
"""A search engine interface with Yahoo! BOSS."""
name = "Yahoo! BOSS"
@@ -70,6 +133,10 @@ class YahooBOSSSearchEngine(BaseSearchEngine):
args = ["=".join((enc(k), enc(v))) for k, v in params.iteritems()]
return base + "?" + "&".join(args)

@staticmethod
def requirements():
return ["oauth2"]

def search(self, query):
"""Do a Yahoo! BOSS web search for *query*.

@@ -104,13 +171,13 @@ class YahooBOSSSearchEngine(BaseSearchEngine):
result = gzipper.read()

if response.getcode() != 200:
e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
raise SearchQueryError(e.format(response.getcode(), result))
err = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
raise SearchQueryError(err.format(response.getcode(), result))
try:
res = loads(result)
except ValueError:
e = "Yahoo! BOSS Error: JSON could not be decoded"
raise SearchQueryError(e)
err = "Yahoo! BOSS Error: JSON could not be decoded"
raise SearchQueryError(err)

try:
results = res["bossresponse"]["web"]["results"]


Yükleniyor…
İptal
Kaydet