A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

92 rader
3.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from json import loads
  23. from urllib import quote_plus, urlencode
  24. import oauth2 as oauth
  25. from earwigbot.exceptions import SearchQueryError
  26. __all__ = ["BaseSearchEngine", "YahooBOSSSearchEngine"]
  27. class BaseSearchEngine(object):
  28. """Base class for a simple search engine interface."""
  29. name = "Base"
  30. def __init__(self, cred):
  31. """Store credentials *cred* for searching later on."""
  32. self.cred = cred
  33. def __repr__(self):
  34. """Return the canonical string representation of the search engine."""
  35. return "{0}()".format(self.__class__.__name__)
  36. def __str__(self):
  37. """Return a nice string representation of the search engine."""
  38. return "<{0}>".format(self.__class__.__name__)
  39. def search(self, query):
  40. """Use this engine to search for *query*.
  41. Not implemented in this base class; overridden in subclasses.
  42. """
  43. raise NotImplementedError()
  44. class YahooBOSSSearchEngine(BaseSearchEngine):
  45. """A search engine interface with Yahoo! BOSS."""
  46. name = "Yahoo! BOSS"
  47. def search(self, query):
  48. """Do a Yahoo! BOSS web search for *query*.
  49. Returns a list of URLs, no more than fifty, ranked by relevance (as
  50. determined by Yahoo). Raises
  51. :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
  52. """
  53. base_url = "http://yboss.yahooapis.com/ysearch/web"
  54. query = quote_plus(query.join('"', '"'))
  55. params = {"q": query, "type": "html,text", "format": "json"}
  56. url = "{0}?{1}".format(base_url, urlencode(params))
  57. consumer = oauth.Consumer(key=self.cred["key"],
  58. secret=self.cred["secret"])
  59. client = oauth.Client(consumer)
  60. headers, body = client.request(url, "GET")
  61. if headers["status"] != "200":
  62. e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
  63. raise SearchQueryError(e.format(headers["status"], body))
  64. try:
  65. res = loads(body)
  66. except ValueError:
  67. e = "Yahoo! BOSS Error: JSON could not be decoded"
  68. raise SearchQueryError(e)
  69. try:
  70. results = res["bossresponse"]["web"]["results"]
  71. except KeyError:
  72. return []
  73. return [result["url"] for result in results]