A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

203 lignes
5.7 KiB

  1. # -*- coding: utf-8 -*-
  2. from json import loads
  3. from urllib import urlencode
  4. from urllib2 import urlopen
  5. from wiki.tools.category import Category
  6. from wiki.tools.constants import *
  7. from wiki.tools.exceptions import NamespaceNotFoundError
  8. from wiki.tools.page import Page
  9. from wiki.tools.user import User
  10. class Site(object):
  11. """
  12. EarwigBot's Wiki Toolset: Site Class
  13. """
  14. def __init__(self, name=None, project=None, lang=None, base_url=None,
  15. article_path=None, script_path=None, sql=(None, None),
  16. namespaces=None):
  17. """
  18. Docstring needed
  19. """
  20. self._name = name
  21. self._project = project
  22. self._lang = lang
  23. self._base_url = base_url
  24. self._article_path = article_path
  25. self._script_path = script_path
  26. self._sql = sql
  27. self._namespaces = namespaces
  28. # get all of the above attributes that were not specified by the user
  29. self._load_attributes()
  30. def _load_attributes(self, force=False):
  31. """
  32. Docstring needed
  33. """
  34. # all attributes to be loaded, except _namespaces, which is a special
  35. # case because it requires additional params in the API query
  36. attrs = [self._name, self._project, self._lang, self._base_url,
  37. self._article_path, self._script_path]
  38. params = {"action": "query", "meta": "siteinfo"}
  39. if self._namespaces is None or force:
  40. params["siprop"] = "general|namespaces|namespacealiases"
  41. result = self.api_query(params)
  42. self._load_namespaces(result)
  43. elif all(attrs): # everything is already specified and we're not told
  44. return # to force a reload, so do nothing
  45. else: # we're only loading attributes other than _namespaces
  46. params["siprop"] = "general"
  47. result = self.api_query(params)
  48. res = result["query"]["general"]
  49. if self._name is None or force:
  50. self._name = res["wikiid"]
  51. if self._project is None or force:
  52. self._project = res["sitename"].lower()
  53. if self._lang is None or force:
  54. self._lang = res["lang"]
  55. if self._base_url is None or force:
  56. self._base_url = res["server"]
  57. if self._article_path is None or force:
  58. self._article_path = res["articlepath"]
  59. if self._script_path is None or force:
  60. self._script_path = res["scriptpath"]
  61. def _load_namespaces(self, result):
  62. """
  63. Docstring needed
  64. """
  65. self._namespaces = {}
  66. for namespace in result["query"]["namespaces"].values():
  67. ns_id = namespace["id"]
  68. name = namespace["*"]
  69. try:
  70. canonical = namespace["canonical"]
  71. except KeyError:
  72. self._namespaces[ns_id] = [name]
  73. else:
  74. if name != canonical:
  75. self._namespaces[ns_id] = [name, canonical]
  76. else:
  77. self._namespaces[ns_id] = [name]
  78. for namespace in result["query"]["namespacealiases"]:
  79. ns_id = namespace["id"]
  80. alias = namespace["*"]
  81. self._namespaces[ns_id].append(alias)
  82. def api_query(self, params):
  83. """
  84. Docstring needed
  85. """
  86. url = ''.join((self._base_url, self._script_path, "/api.php"))
  87. params["format"] = "json"
  88. data = urlencode(params)
  89. result = urlopen(url, data).read()
  90. return loads(result)
  91. def name(self):
  92. """
  93. Docstring needed
  94. """
  95. return self._name
  96. def project(self):
  97. """
  98. Docstring needed
  99. """
  100. return self._project
  101. def lang(self):
  102. """
  103. Docstring needed
  104. """
  105. return self._lang
  106. def base_url(self):
  107. """
  108. Docstring needed
  109. """
  110. return self._base_url
  111. def article_path(self):
  112. """
  113. Docstring needed
  114. """
  115. return self._article_path
  116. def script_path(self):
  117. """
  118. Docstring needed
  119. """
  120. return self._script_path
  121. def namespaces(self):
  122. """
  123. Docstring needed
  124. """
  125. return self._namespaces
  126. def namespace_id_to_name(self, ns_id, all=False):
  127. """
  128. Docstring needed
  129. """
  130. try:
  131. if all:
  132. return self._namespaces[ns_id]
  133. else:
  134. return self._namespaces[ns_id][0]
  135. except KeyError:
  136. e = "There is no namespace with id {0}.".format(ns_id)
  137. raise NamespaceNotFoundError(e)
  138. def namespace_name_to_id(self, name):
  139. """
  140. Docstring needed
  141. """
  142. lname = name.lower()
  143. for ns_id, names in self._namespaces.items():
  144. lnames = [n.lower() for n in names] # be case-insensitive
  145. if lname in lnames:
  146. return ns_id
  147. e = "There is no namespace with name '{0}'.".format(name)
  148. raise NamespaceNotFoundError(e)
  149. def get_page(self, pagename):
  150. """
  151. Docstring needed
  152. """
  153. prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
  154. prefix = pagename.split(":", 1)[0]
  155. if prefix != pagename: # avoid a page that is simply "Category"
  156. if prefix in prefixes:
  157. return Category(self, pagename)
  158. return Page(self, pagename)
  159. def get_category(self, catname):
  160. """
  161. Docstring needed
  162. """
  163. prefix = self.namespace_id_to_name(NS_CATEGORY)
  164. pagename = "{0}:{1}".format(prefix, catname)
  165. return Category(self, pagename)
  166. def get_user(self, username):
  167. """
  168. Docstring needed
  169. """
  170. return User(self, username)