A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
4.0 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from gzip import GzipFile
  23. from json import loads
  24. from StringIO import StringIO
  25. from urllib import quote_plus
  26. from earwigbot import importer
  27. from earwigbot.exceptions import SearchQueryError
  28. oauth = importer.new("oauth2")
  29. __all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]
  30. class BaseSearchEngine(object):
  31. """Base class for a simple search engine interface."""
  32. name = "Base"
  33. def __init__(self, cred, opener):
  34. """Store credentials (*cred*) and *opener* for searching later on."""
  35. self.cred = cred
  36. self.opener = opener
  37. def __repr__(self):
  38. """Return the canonical string representation of the search engine."""
  39. return "{0}()".format(self.__class__.__name__)
  40. def __str__(self):
  41. """Return a nice string representation of the search engine."""
  42. return "<{0}>".format(self.__class__.__name__)
  43. def search(self, query):
  44. """Use this engine to search for *query*.
  45. Not implemented in this base class; overridden in subclasses.
  46. """
  47. raise NotImplementedError()
  48. class YahooBOSSSearchEngine(BaseSearchEngine):
  49. """A search engine interface with Yahoo! BOSS."""
  50. name = "Yahoo! BOSS"
  51. def search(self, query):
  52. """Do a Yahoo! BOSS web search for *query*.
  53. Returns a list of URLs, no more than fifty, ranked by relevance (as
  54. determined by Yahoo). Raises
  55. :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
  56. """
  57. key, secret = self.cred["key"], self.cred["secret"]
  58. consumer = oauth.Consumer(key=key, secret=secret)
  59. url = "http://yboss.yahooapis.com/ysearch/web"
  60. params = {
  61. "oauth_version": oauth.OAUTH_VERSION,
  62. "oauth_nonce": oauth.generate_nonce(),
  63. "oauth_timestamp": oauth.Request.make_timestamp(),
  64. "oauth_consumer_key": consumer.key,
  65. "q": quote_plus(query.encode("utf8")), "count": 5,
  66. "type": "html,text", "format": "json",
  67. }
  68. req = oauth.Request(method="GET", url=url, parameters=params)
  69. req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), consumer, None)
  70. response = self.opener.open(req.to_url())
  71. result = response.read()
  72. if response.headers.get("Content-Encoding") == "gzip":
  73. stream = StringIO(result)
  74. gzipper = GzipFile(fileobj=stream)
  75. result = gzipper.read()
  76. if response.getcode() != 200:
  77. e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
  78. raise SearchQueryError(e.format(response.getcode(), result))
  79. try:
  80. res = loads(result)
  81. except ValueError:
  82. e = "Yahoo! BOSS Error: JSON could not be decoded"
  83. raise SearchQueryError(e)
  84. try:
  85. results = res["bossresponse"]["web"]["results"]
  86. except KeyError:
  87. return []
  88. return [result["url"] for result in results]