A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

site.py 8.1 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. # -*- coding: utf-8 -*-
  2. from cookielib import CookieJar
  3. from json import loads
  4. from urllib import urlencode
  5. from urllib2 import build_opener, HTTPCookieProcessor, URLError
  6. from wiki.tools.category import Category
  7. from wiki.tools.constants import *
  8. from wiki.tools.exceptions import *
  9. from wiki.tools.page import Page
  10. from wiki.tools.user import User
  11. class Site(object):
  12. """
  13. EarwigBot's Wiki Toolset: Site Class
  14. """
  15. def __init__(self, name=None, project=None, lang=None, base_url=None,
  16. article_path=None, script_path=None, sql=(None, None),
  17. namespaces=None, login=(None, None)):
  18. """
  19. Docstring needed
  20. """
  21. # attributes referring to site information, filled in by an API query
  22. # if they are missing (and an API url is available)
  23. self._name = name
  24. self._project = project
  25. self._lang = lang
  26. self._base_url = base_url
  27. self._article_path = article_path
  28. self._script_path = script_path
  29. self._sql = sql
  30. self._namespaces = namespaces
  31. # set up cookiejar and URL opener for making API queries
  32. self._cookiejar = CookieJar(cookie_file)
  33. self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
  34. self._opener.addheaders = [('User-agent', USER_AGENT)]
  35. # use a username and password to login if they were provided
  36. if login[0] is not None and login[1] is not None:
  37. self._login(login[0], login[1])
  38. # get all of the above attributes that were not specified as arguments
  39. self._load_attributes()
  40. def _login(self, name, password, token="", attempt=0):
  41. """
  42. Docstring needed
  43. """
  44. params = {"action": "login", "lgname": name, "lgpassword": password,
  45. "lgtoken": token}
  46. result = self.api_query(params)
  47. res = result["login"]["result"]
  48. if res == "Success":
  49. return
  50. elif res == "NeedToken" and attempt == 0:
  51. token = result["login"]["token"]
  52. return self._login(name, password, token, attempt=1)
  53. else:
  54. if res == "Illegal":
  55. e = "The provided username is illegal."
  56. elif res == "NotExists":
  57. e = "The provided username does not exist."
  58. elif res == "EmptyPass":
  59. e = "No password was given."
  60. elif res == "WrongPass" or res == "WrongPluginPass":
  61. e = "The given password is incorrect."
  62. else:
  63. e = "Couldn't login; server says '{0}'.".format(res)
  64. raise LoginError(e)
  65. def _load_attributes(self, force=False):
  66. """
  67. Docstring needed
  68. """
  69. # all attributes to be loaded, except _namespaces, which is a special
  70. # case because it requires additional params in the API query
  71. attrs = [self._name, self._project, self._lang, self._base_url,
  72. self._article_path, self._script_path]
  73. params = {"action": "query", "meta": "siteinfo"}
  74. if self._namespaces is None or force:
  75. params["siprop"] = "general|namespaces|namespacealiases"
  76. result = self.api_query(params)
  77. self._load_namespaces(result)
  78. elif all(attrs): # everything is already specified and we're not told
  79. return # to force a reload, so do nothing
  80. else: # we're only loading attributes other than _namespaces
  81. params["siprop"] = "general"
  82. result = self.api_query(params)
  83. res = result["query"]["general"]
  84. if self._name is None or force:
  85. self._name = res["wikiid"]
  86. if self._project is None or force:
  87. self._project = res["sitename"].lower()
  88. if self._lang is None or force:
  89. self._lang = res["lang"]
  90. if self._base_url is None or force:
  91. self._base_url = res["server"]
  92. if self._article_path is None or force:
  93. self._article_path = res["articlepath"]
  94. if self._script_path is None or force:
  95. self._script_path = res["scriptpath"]
  96. def _load_namespaces(self, result):
  97. """
  98. Docstring needed
  99. """
  100. self._namespaces = {}
  101. for namespace in result["query"]["namespaces"].values():
  102. ns_id = namespace["id"]
  103. name = namespace["*"]
  104. try:
  105. canonical = namespace["canonical"]
  106. except KeyError:
  107. self._namespaces[ns_id] = [name]
  108. else:
  109. if name != canonical:
  110. self._namespaces[ns_id] = [name, canonical]
  111. else:
  112. self._namespaces[ns_id] = [name]
  113. for namespace in result["query"]["namespacealiases"]:
  114. ns_id = namespace["id"]
  115. alias = namespace["*"]
  116. self._namespaces[ns_id].append(alias)
  117. def api_query(self, params):
  118. """
  119. Docstring needed
  120. """
  121. url = ''.join((self._base_url, self._script_path, "/api.php"))
  122. params["format"] = "json" # this is the only format we understand
  123. data = urlencode(params)
  124. try:
  125. response = self._opener.open(url, data)
  126. except URLError as error:
  127. if hasattr(error, "reason"):
  128. e = "API query at {0} failed because {1}.".format(error.geturl,
  129. error.reason)
  130. elif hasattr(error, "code"):
  131. e = "API query at {0} failed; got an error code of {1}."
  132. e = e.format(error.geturl, error.code)
  133. else:
  134. e = "API query failed."
  135. raise SiteAPIError(e)
  136. else:
  137. result = response.read()
  138. return loads(result) # parse as a JSON object
  139. def name(self):
  140. """
  141. Docstring needed
  142. """
  143. return self._name
  144. def project(self):
  145. """
  146. Docstring needed
  147. """
  148. return self._project
  149. def lang(self):
  150. """
  151. Docstring needed
  152. """
  153. return self._lang
  154. def base_url(self):
  155. """
  156. Docstring needed
  157. """
  158. return self._base_url
  159. def article_path(self):
  160. """
  161. Docstring needed
  162. """
  163. return self._article_path
  164. def script_path(self):
  165. """
  166. Docstring needed
  167. """
  168. return self._script_path
  169. def namespaces(self):
  170. """
  171. Docstring needed
  172. """
  173. return self._namespaces
  174. def namespace_id_to_name(self, ns_id, all=False):
  175. """
  176. Docstring needed
  177. """
  178. try:
  179. if all:
  180. return self._namespaces[ns_id]
  181. else:
  182. return self._namespaces[ns_id][0]
  183. except KeyError:
  184. e = "There is no namespace with id {0}.".format(ns_id)
  185. raise NamespaceNotFoundError(e)
  186. def namespace_name_to_id(self, name):
  187. """
  188. Docstring needed
  189. """
  190. lname = name.lower()
  191. for ns_id, names in self._namespaces.items():
  192. lnames = [n.lower() for n in names] # be case-insensitive
  193. if lname in lnames:
  194. return ns_id
  195. e = "There is no namespace with name '{0}'.".format(name)
  196. raise NamespaceNotFoundError(e)
  197. def get_page(self, pagename):
  198. """
  199. Docstring needed
  200. """
  201. prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
  202. prefix = pagename.split(":", 1)[0]
  203. if prefix != pagename: # avoid a page that is simply "Category"
  204. if prefix in prefixes:
  205. return Category(self, pagename)
  206. return Page(self, pagename)
  207. def get_category(self, catname):
  208. """
  209. Docstring needed
  210. """
  211. prefix = self.namespace_id_to_name(NS_CATEGORY)
  212. pagename = "{0}:{1}".format(prefix, catname)
  213. return Category(self, pagename)
  214. def get_user(self, username=None):
  215. """
  216. Docstring needed
  217. """
  218. if username is None:
  219. params = {"action": "query", "meta": "userinfo"}
  220. result = self.api_query(params)
  221. username = result["query"]["userinfo"]["name"]
  222. return User(self, username)