A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

277 lines
8.8 KiB

  1. # -*- coding: utf-8 -*-
  2. from cookielib import CookieJar
  3. from json import loads
  4. from urllib import unquote_plus, urlencode
  5. from urllib2 import build_opener, HTTPCookieProcessor, URLError
  6. from urlparse import urlparse
  7. from wiki.tools.category import Category
  8. from wiki.tools.constants import *
  9. from wiki.tools.exceptions import *
  10. from wiki.tools.page import Page
  11. from wiki.tools.user import User
  12. class Site(object):
  13. """
  14. EarwigBot's Wiki Toolset: Site Class
  15. """
  16. def __init__(self, name=None, project=None, lang=None, base_url=None,
  17. article_path=None, script_path=None, sql=(None, None),
  18. namespaces=None, login=(None, None)):
  19. """
  20. Docstring needed
  21. """
  22. # attributes referring to site information, filled in by an API query
  23. # if they are missing (and an API url is available)
  24. self._name = name
  25. self._project = project
  26. self._lang = lang
  27. self._base_url = base_url
  28. self._article_path = article_path
  29. self._script_path = script_path
  30. self._sql = sql
  31. self._namespaces = namespaces
  32. # set up cookiejar and URL opener for making API queries
  33. self._cookiejar = CookieJar()
  34. self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
  35. self._opener.addheaders = [('User-agent', USER_AGENT)]
  36. # use a username and password to login if they were provided
  37. if login[0] is not None and login[1] is not None:
  38. self._login(login[0], login[1])
  39. # get all of the above attributes that were not specified as arguments
  40. self._load_attributes()
  41. def _login(self, name, password, token="", attempt=0):
  42. """
  43. Docstring needed
  44. """
  45. params = {"action": "login", "lgname": name, "lgpassword": password,
  46. "lgtoken": token}
  47. result = self.api_query(params)
  48. res = result["login"]["result"]
  49. if res == "Success":
  50. return
  51. elif res == "NeedToken" and attempt == 0:
  52. token = result["login"]["token"]
  53. return self._login(name, password, token, attempt=1)
  54. else:
  55. if res == "Illegal":
  56. e = "The provided username is illegal."
  57. elif res == "NotExists":
  58. e = "The provided username does not exist."
  59. elif res == "EmptyPass":
  60. e = "No password was given."
  61. elif res == "WrongPass" or res == "WrongPluginPass":
  62. e = "The given password is incorrect."
  63. else:
  64. e = "Couldn't login; server says '{0}'.".format(res)
  65. raise LoginError(e)
  66. def _get_logged_in_user(self):
  67. """
  68. Docstring needed
  69. """
  70. # first try to get username from the cookie jar to avoid an
  71. # unnecessary API query
  72. cookie_name = ''.join((self._name, "UserName"))
  73. cookie_domain = urlparse(self._base_url).netloc
  74. for cookie in self._cookiejar:
  75. if cookie.name == cookie_name and cookie.domain == cookie_domain:
  76. return unquote_plus(cookie.value)
  77. # if we end up here, we're probably an anon and thus an API query
  78. # will be required to get our username
  79. params = {"action": "query", "meta": "userinfo"}
  80. result = self.api_query(params)
  81. return result["query"]["userinfo"]["name"]
  82. def _load_attributes(self, force=False):
  83. """
  84. Docstring needed
  85. """
  86. # all attributes to be loaded, except _namespaces, which is a special
  87. # case because it requires additional params in the API query
  88. attrs = [self._name, self._project, self._lang, self._base_url,
  89. self._article_path, self._script_path]
  90. params = {"action": "query", "meta": "siteinfo"}
  91. if self._namespaces is None or force:
  92. params["siprop"] = "general|namespaces|namespacealiases"
  93. result = self.api_query(params)
  94. self._load_namespaces(result)
  95. elif all(attrs): # everything is already specified and we're not told
  96. return # to force a reload, so do nothing
  97. else: # we're only loading attributes other than _namespaces
  98. params["siprop"] = "general"
  99. result = self.api_query(params)
  100. res = result["query"]["general"]
  101. if self._name is None or force:
  102. self._name = res["wikiid"]
  103. if self._project is None or force:
  104. self._project = res["sitename"].lower()
  105. if self._lang is None or force:
  106. self._lang = res["lang"]
  107. if self._base_url is None or force:
  108. self._base_url = res["server"]
  109. if self._article_path is None or force:
  110. self._article_path = res["articlepath"]
  111. if self._script_path is None or force:
  112. self._script_path = res["scriptpath"]
  113. def _load_namespaces(self, result):
  114. """
  115. Docstring needed
  116. """
  117. self._namespaces = {}
  118. for namespace in result["query"]["namespaces"].values():
  119. ns_id = namespace["id"]
  120. name = namespace["*"]
  121. try:
  122. canonical = namespace["canonical"]
  123. except KeyError:
  124. self._namespaces[ns_id] = [name]
  125. else:
  126. if name != canonical:
  127. self._namespaces[ns_id] = [name, canonical]
  128. else:
  129. self._namespaces[ns_id] = [name]
  130. for namespace in result["query"]["namespacealiases"]:
  131. ns_id = namespace["id"]
  132. alias = namespace["*"]
  133. self._namespaces[ns_id].append(alias)
  134. def api_query(self, params):
  135. """
  136. Docstring needed
  137. """
  138. url = ''.join((self._base_url, self._script_path, "/api.php"))
  139. params["format"] = "json" # this is the only format we understand
  140. data = urlencode(params)
  141. try:
  142. response = self._opener.open(url, data)
  143. except URLError as error:
  144. if hasattr(error, "reason"):
  145. e = "API query at {0} failed because {1}.".format(error.geturl,
  146. error.reason)
  147. elif hasattr(error, "code"):
  148. e = "API query at {0} failed; got an error code of {1}."
  149. e = e.format(error.geturl, error.code)
  150. else:
  151. e = "API query failed."
  152. raise SiteAPIError(e)
  153. else:
  154. result = response.read()
  155. return loads(result) # parse as a JSON object
  156. def name(self):
  157. """
  158. Docstring needed
  159. """
  160. return self._name
  161. def project(self):
  162. """
  163. Docstring needed
  164. """
  165. return self._project
  166. def lang(self):
  167. """
  168. Docstring needed
  169. """
  170. return self._lang
  171. def base_url(self):
  172. """
  173. Docstring needed
  174. """
  175. return self._base_url
  176. def article_path(self):
  177. """
  178. Docstring needed
  179. """
  180. return self._article_path
  181. def script_path(self):
  182. """
  183. Docstring needed
  184. """
  185. return self._script_path
  186. def namespaces(self):
  187. """
  188. Docstring needed
  189. """
  190. return self._namespaces
  191. def namespace_id_to_name(self, ns_id, all=False):
  192. """
  193. Docstring needed
  194. """
  195. try:
  196. if all:
  197. return self._namespaces[ns_id]
  198. else:
  199. return self._namespaces[ns_id][0]
  200. except KeyError:
  201. e = "There is no namespace with id {0}.".format(ns_id)
  202. raise NamespaceNotFoundError(e)
  203. def namespace_name_to_id(self, name):
  204. """
  205. Docstring needed
  206. """
  207. lname = name.lower()
  208. for ns_id, names in self._namespaces.items():
  209. lnames = [n.lower() for n in names] # be case-insensitive
  210. if lname in lnames:
  211. return ns_id
  212. e = "There is no namespace with name '{0}'.".format(name)
  213. raise NamespaceNotFoundError(e)
  214. def get_page(self, pagename):
  215. """
  216. Docstring needed
  217. """
  218. prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
  219. prefix = pagename.split(":", 1)[0]
  220. if prefix != pagename: # avoid a page that is simply "Category"
  221. if prefix in prefixes:
  222. return Category(self, pagename)
  223. return Page(self, pagename)
  224. def get_category(self, catname):
  225. """
  226. Docstring needed
  227. """
  228. prefix = self.namespace_id_to_name(NS_CATEGORY)
  229. pagename = "{0}:{1}".format(prefix, catname)
  230. return Category(self, pagename)
  231. def get_user(self, username=None):
  232. """
  233. Docstring needed
  234. """
  235. if username is None:
  236. username = self._get_logged_in_user()
  237. return User(self, username)