A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

site.py 3.7 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # -*- coding: utf-8 -*-
  2. from json import loads
  3. from urllib import urlencode
  4. from urllib2 import urlopen
  5. from wiki.tools.category import Category
  6. from wiki.tools.constants import *
  7. from wiki.tools.exceptions import NamespaceNotFoundError
  8. from wiki.tools.page import Page
  9. from wiki.tools.user import User
  10. class Site(object):
  11. """
  12. EarwigBot's Wiki Toolset: Site Class
  13. """
  14. def __init__(self, name, project, lang, api=None, sql=(None, None)):
  15. """
  16. Docstring needed
  17. """
  18. self.name = name
  19. self.project = project
  20. self.lang = lang
  21. self._api = api
  22. self._sql = sql
  23. self._namespaces = None
  24. def _get_namespaces_from_api(self):
  25. """
  26. Docstring needed
  27. """
  28. params = {"action": "query", "meta": "siteinfo",
  29. "siprop": "namespaces|namespacealiases"}
  30. result = self.api_query(params)
  31. if self._namespaces is None:
  32. self._namespaces = {}
  33. for namespace in result["query"]["namespaces"].values():
  34. ns_id = namespace["id"]
  35. name = namespace["*"]
  36. try:
  37. canonical = namespace["canonical"]
  38. except KeyError:
  39. self._namespaces[ns_id] = [name]
  40. else:
  41. if name != canonical:
  42. self._namespaces[ns_id] = [name, canonical]
  43. else:
  44. self._namespaces[ns_id] = [name]
  45. for namespace in result["query"]["namespacealiases"]:
  46. ns_id = namespace["id"]
  47. alias = namespace["*"]
  48. self._namespaces[ns_id].append(alias)
  49. def api_query(self, params):
  50. """
  51. Docstring needed
  52. """
  53. params["format"] = "json"
  54. data = urlencode(params)
  55. result = urlopen(self._api, data).read()
  56. return loads(result)
  57. def namespaces(self):
  58. """
  59. Docstring needed
  60. """
  61. if self._namespaces is None:
  62. self._get_namespaces_from_api()
  63. return self._namespaces
  64. def namespace_id_to_name(self, ns_id, all=False):
  65. """
  66. Docstring needed
  67. """
  68. if self._namespaces is None:
  69. self._get_namespaces_from_api()
  70. try:
  71. if all:
  72. return self._namespaces[ns_id]
  73. else:
  74. return self._namespaces[ns_id][0]
  75. except KeyError:
  76. e = "There is no namespace with id {0}.".format(ns_id)
  77. raise NamespaceNotFoundError(e)
  78. def namespace_name_to_id(self, name):
  79. """
  80. Docstring needed
  81. """
  82. if self._namespaces is None:
  83. self._get_namespaces_from_api()
  84. lname = name.lower()
  85. for ns_id, names in self._namespaces.items():
  86. lnames = [n.lower() for n in names] # be case-insensitive
  87. if lname in lnames:
  88. return ns_id
  89. e = "There is no namespace with name '{0}'.".format(name)
  90. raise NamespaceNotFoundError(e)
  91. def get_page(self, pagename):
  92. """
  93. Docstring needed
  94. """
  95. prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
  96. prefix = pagename.split(":", 1)[0]
  97. if prefix != pagename: # avoid a page that is simply "Category"
  98. if prefix in prefixes:
  99. return Category(self, pagename)
  100. return Page(self, pagename)
  101. def get_category(self, catname):
  102. """
  103. Docstring needed
  104. """
  105. prefix = self.namespace_id_to_name(NS_CATEGORY)
  106. pagename = "{0}:{1}".format(prefix, catname)
  107. return Category(self, pagename)
  108. def get_user(self, username):
  109. """
  110. Docstring needed
  111. """
  112. return User(self, username)