A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
3.2 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from json import loads
  23. from urllib import quote_plus, urlencode
  24. try:
  25. import oauth2 as oauth
  26. except ImportError:
  27. oauth = None
  28. from earwigbot.exceptions import SearchQueryError
  29. __all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]
  30. class BaseSearchEngine(object):
  31. def __init__(self, cred):
  32. """Store credentials 'cred' for searching later on."""
  33. self.cred = cred
  34. def __repr__(self):
  35. """Return the canonical string representation of the search engine."""
  36. return "{0}()".format(self.__class__.__name__)
  37. def __str__(self):
  38. """Return a nice string representation of the search engine."""
  39. return "<{0}>".format(self.__class__.__name__)
  40. def search(self, query):
  41. """Use this engine to search for 'query'.
  42. Not implemented in this base class; overridden in subclasses."""
  43. raise NotImplementedError()
  44. class YahooBOSSSearchEngine(BaseSearchEngine):
  45. def search(self, query):
  46. """Do a Yahoo! BOSS web search for 'query'.
  47. Returns a list of URLs, no more than fifty, ranked by relevance (as
  48. determined by Yahoo). Raises SearchQueryError() on errors.
  49. """
  50. base_url = "http://yboss.yahooapis.com/ysearch/web"
  51. query = quote_plus(query.join('"', '"'))
  52. params = {"q": query, "style": "raw", "format": "json"}
  53. url = "{0}?{1}".format(base_url, urlencode(params))
  54. consumer = oauth.Consumer(key=self.cred["key"], secret=self.cred["secret"])
  55. client = oauth.Client(consumer)
  56. headers, body = client.request(url, "GET")
  57. if headers["status"] != "200":
  58. e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
  59. raise SearchQueryError(e.format(headers["status"], body))
  60. try:
  61. res = loads(body)
  62. except ValueError:
  63. e = "Yahoo! BOSS Error: JSON could not be decoded"
  64. raise SearchQueryError(e)
  65. try:
  66. results = res["bossresponse"]["web"]["results"]
  67. except KeyError:
  68. return []
  69. return [result["url"] for result in results]