A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.9 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from json import loads
  23. from urllib import quote_plus, urlencode
  24. try:
  25. import oauth2 as oauth
  26. except ImportError:
  27. oauth = None
  28. from earwigbot.wiki.exceptions import SearchQueryError
  29. __all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]
  30. class BaseSearchEngine(object):
  31. def __init__(self, cred):
  32. """Store credentials 'cred' for searching later on."""
  33. self.cred = cred
  34. def search(self, query):
  35. """Use this engine to search for 'query'.
  36. Not implemented in this base class; overridden in subclasses."""
  37. raise NotImplementedError()
  38. class YahooBOSSSearchEngine(BaseSearchEngine):
  39. def search(self, query):
  40. """Do a Yahoo! BOSS web search for 'query'.
  41. Returns a list of URLs, no more than fifty, ranked by relevance (as
  42. determined by Yahoo). Raises SearchQueryError() on errors.
  43. """
  44. base_url = "http://yboss.yahooapis.com/ysearch/web"
  45. query = quote_plus(query.join('"', '"'))
  46. params = {"q": query, "style": "raw", "format": "json"}
  47. url = "{0}?{1}".format(base_url, urlencode(params))
  48. consumer = oauth.Consumer(key=self.cred["key"], secret=self.cred["secret"])
  49. client = oauth.Client(consumer)
  50. headers, body = client.request(url, "GET")
  51. if headers["status"] != "200":
  52. e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
  53. raise SearchQueryError(e.format(headers["status"], body))
  54. try:
  55. res = loads(body)
  56. except ValueError:
  57. e = "Yahoo! BOSS Error: JSON could not be decoded"
  58. raise SearchQueryError(e)
  59. try:
  60. results = res["bossresponse"]["web"]["results"]
  61. except KeyError:
  62. return []
  63. return [result["url"] for result in results]