From a95356676bfb5baf20c97d026a4b0ec67f894b5a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 5 Jun 2016 23:01:55 -0400 Subject: [PATCH] Add GoogleSearchEngine. --- earwigbot/wiki/copyvios/search.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/earwigbot/wiki/copyvios/search.py b/earwigbot/wiki/copyvios/search.py index e46abef..f6e21e3 100644 --- a/earwigbot/wiki/copyvios/search.py +++ b/earwigbot/wiki/copyvios/search.py @@ -34,8 +34,8 @@ from earwigbot.exceptions import SearchQueryError etree = importer.new("lxml.etree") oauth = importer.new("oauth2") -__all__ = ["BingSearchEngine", "YahooBOSSSearchEngine", "YandexSearchEngine", - "SEARCH_ENGINES"] +__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YahooBOSSSearchEngine", + "YandexSearchEngine", "SEARCH_ENGINES"] class _BaseSearchEngine(object): """Base class for a simple search engine interface.""" @@ -132,6 +132,38 @@ class BingSearchEngine(_BaseSearchEngine): return [result["Url"] for result in results] +class GoogleSearchEngine(_BaseSearchEngine): + """A search engine interface with Google Search.""" + name = "Google" + + def search(self, query): + """Do a Google web search for *query*. + + Returns a list of URLs ranked by relevance (as determined by Google). + Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. + """ + domain = self.cred.get("proxy", "www.googleapis.com") + url = "https://{0}/customsearch/v1?".format(domain) + params = { + "cx": self.cred["id"], + "key": self.cred["key"], + "q": '"' + query.replace('"', "").encode("utf8") + '"', + "alt": "json", + "num": str(self.count), + "safe": "off" + "fields": "items(link)" + } + + result = self._open(url + urlencode(params)) + + try: + res = loads(result) + except ValueError: + err = "Google Error: JSON could not be decoded" + raise SearchQueryError(err) + return [item["link"] for item in res["items"]] + + class YahooBOSSSearchEngine(_BaseSearchEngine): """A search engine interface with Yahoo! BOSS.""" name = "Yahoo! BOSS" @@ -224,6 +256,7 @@ class YandexSearchEngine(_BaseSearchEngine): SEARCH_ENGINES = { "Bing": BingSearchEngine, + "Google": GoogleSearchEngine, "Yahoo! BOSS": YahooBOSSSearchEngine, "Yandex": YandexSearchEngine }