A Python robot that edits Wikipedia and interacts with people over IRC https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

705 lines
28 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from hashlib import md5
  23. import re
  24. from time import gmtime, strftime
  25. from urllib import quote
  26. from earwigbot import exceptions
  27. from earwigbot.wiki.copyright import CopyrightMixin
  28. __all__ = ["Page"]
  29. class Page(CopyrightMixin):
  30. """
  31. **EarwigBot's Wiki Toolset: Page Class**
  32. Represents a page on a given :py:class:`~earwigbot.wiki.site.Site`. Has
  33. methods for getting information about the page, getting page content, and
  34. so on. :py:class:`~earwigbot.wiki.category.Category` is a subclass of
  35. :py:class:`Page` with additional methods.
  36. *Attributes:*
  37. - :py:attr:`site`: the page's corresponding Site object
  38. - :py:attr:`title`: the page's title, or pagename
  39. - :py:attr:`exists`: whether the page exists
  40. - :py:attr:`pageid`: an integer ID representing the page
  41. - :py:attr:`url`: the page's URL
  42. - :py:attr:`namespace`: the page's namespace as an integer
  43. - :py:attr:`protection`: the page's current protection status
  44. - :py:attr:`is_talkpage`: ``True`` if this is a talkpage, else ``False``
  45. - :py:attr:`is_redirect`: ``True`` if this is a redirect, else ``False``
  46. *Public methods:*
  47. - :py:meth:`reload`: forcibly reloads the page's attributes
  48. - :py:meth:`toggle_talk`: returns a content page's talk page, or vice versa
  49. - :py:meth:`get`: returns the page's content
  50. - :py:meth:`get_redirect_target`: returns the page's destination if it is a
  51. redirect
  52. - :py:meth:`get_creator`: returns a User object representing the first
  53. person to edit the page
  54. - :py:meth:`edit`: replaces the page's content or creates a new page
  55. - :py:meth:`add_section`: adds a new section at the bottom of the page
  56. - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixin.copyvio_check`:
  57. checks the page for copyright violations
  58. - :py:meth:`~earwigbot.wiki.copyvios.CopyrightMixin.copyvio_compare`:
  59. checks the page for like :py:meth:`copyvio_check`, but against a specific
  60. URL
  61. """
  62. re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]"
  63. def __init__(self, site, title, follow_redirects=False):
  64. """Constructor for new Page instances.
  65. Takes three arguments: a Site object, the Page's title (or pagename),
  66. and whether or not to follow redirects (optional, defaults to False).
  67. As with User, site.get_page() is preferred. Site's method has support
  68. for a default *follow_redirects* value in our config, while __init__()
  69. always defaults to False.
  70. __init__() will not do any API queries, but it will use basic namespace
  71. logic to determine our namespace ID and if we are a talkpage.
  72. """
  73. super(Page, self).__init__(site)
  74. self._site = site
  75. self._title = title.strip()
  76. self._follow_redirects = self._keep_following = follow_redirects
  77. self._exists = 0
  78. self._pageid = None
  79. self._is_redirect = None
  80. self._lastrevid = None
  81. self._protection = None
  82. self._fullurl = None
  83. self._content = None
  84. self._creator = None
  85. # Attributes used for editing/deleting/protecting/etc:
  86. self._token = None
  87. self._basetimestamp = None
  88. self._starttimestamp = None
  89. # Try to determine the page's namespace using our site's namespace
  90. # converter:
  91. prefix = self._title.split(":", 1)[0]
  92. if prefix != title: # ignore a page that's titled "Category" or "User"
  93. try:
  94. self._namespace = self._site.namespace_name_to_id(prefix)
  95. except NamespaceNotFoundError:
  96. self._namespace = 0
  97. else:
  98. self._namespace = 0
  99. # Is this a talkpage? Talkpages have odd IDs, while content pages have
  100. # even IDs, excluding the "special" namespaces:
  101. if self._namespace < 0:
  102. self._is_talkpage = False
  103. else:
  104. self._is_talkpage = self._namespace % 2 == 1
  105. def __repr__(self):
  106. """Return the canonical string representation of the Page."""
  107. res = "Page(title={0!r}, follow_redirects={1!r}, site={2!r})"
  108. return res.format(self._title, self._follow_redirects, self._site)
  109. def __str__(self):
  110. """Return a nice string representation of the Page."""
  111. return '<Page "{0}" of {1}>'.format(self.title, str(self._site))
  112. def _assert_validity(self):
  113. """Used to ensure that our page's title is valid.
  114. If this method is called when our page is not valid (and after
  115. _load_attributes() has been called), InvalidPageError will be raised.
  116. Note that validity != existence. If a page's title is invalid (e.g, it
  117. contains "[") it will always be invalid, and cannot be edited.
  118. """
  119. if self._exists == 1:
  120. e = "Page '{0}' is invalid.".format(self._title)
  121. raise exceptions.InvalidPageError(e)
  122. def _assert_existence(self):
  123. """Used to ensure that our page exists.
  124. If this method is called when our page doesn't exist (and after
  125. _load_attributes() has been called), PageNotFoundError will be raised.
  126. It will also call _assert_validity() beforehand.
  127. """
  128. self._assert_validity()
  129. if self._exists == 2:
  130. e = "Page '{0}' does not exist.".format(self._title)
  131. raise exceptions.PageNotFoundError(e)
  132. def _load(self):
  133. """Call _load_attributes() and follows redirects if we're supposed to.
  134. This method will only follow redirects if follow_redirects=True was
  135. passed to __init__() (perhaps indirectly passed by site.get_page()).
  136. It avoids the API's &redirects param in favor of manual following,
  137. so we can act more realistically (we don't follow double redirects, and
  138. circular redirects don't break us).
  139. This will raise RedirectError if we have a problem following, but that
  140. is a bug and should NOT happen.
  141. If we're following a redirect, this will make a grand total of three
  142. API queries. It's a lot, but each one is quite small.
  143. """
  144. self._load_attributes()
  145. if self._keep_following and self._is_redirect:
  146. self._title = self.get_redirect_target()
  147. self._keep_following = False # don't follow double redirects
  148. self._content = None # reset the content we just loaded
  149. self._load_attributes()
  150. def _load_attributes(self, result=None):
  151. """Load various data from the API in a single query.
  152. Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
  153. ._protection, ._namespace, ._is_talkpage, ._creator, ._lastrevid,
  154. ._token, and ._starttimestamp using the API. It will do a query of
  155. its own unless *result* is provided, in which case we'll pretend
  156. *result* is what the query returned.
  157. Assuming the API is sound, this should not raise any exceptions.
  158. """
  159. if not result:
  160. params = {"action": "query", "rvprop": "user", "intoken": "edit",
  161. "prop": "info|revisions", "rvlimit": 1, "rvdir": "newer",
  162. "titles": self._title, "inprop": "protection|url"}
  163. result = self._site._api_query(params)
  164. res = result["query"]["pages"].values()[0]
  165. # Normalize our pagename/title thing:
  166. self._title = res["title"]
  167. try:
  168. res["redirect"]
  169. except KeyError:
  170. self._is_redirect = False
  171. else:
  172. self._is_redirect = True
  173. self._pageid = int(result["query"]["pages"].keys()[0])
  174. if self._pageid < 0:
  175. if "missing" in res:
  176. # If it has a negative ID and it's missing; we can still get
  177. # data like the namespace, protection, and URL:
  178. self._exists = 2
  179. else:
  180. # If it has a negative ID and it's invalid, then break here,
  181. # because there's no other data for us to get:
  182. self._exists = 1
  183. return
  184. else:
  185. self._exists = 3
  186. self._fullurl = res["fullurl"]
  187. self._protection = res["protection"]
  188. try:
  189. self._token = res["edittoken"]
  190. except KeyError:
  191. pass
  192. else:
  193. self._starttimestamp = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
  194. # We've determined the namespace and talkpage status in __init__()
  195. # based on the title, but now we can be sure:
  196. self._namespace = res["ns"]
  197. self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs
  198. # These last two fields will only be specified if the page exists:
  199. self._lastrevid = res.get("lastrevid")
  200. try:
  201. self._creator = res['revisions'][0]['user']
  202. except KeyError:
  203. pass
  204. def _load_content(self, result=None):
  205. """Load current page content from the API.
  206. If *result* is provided, we'll pretend that is the result of an API
  207. query and try to get content from that. Otherwise, we'll do an API
  208. query on our own.
  209. Don't call this directly, ever; use reload() followed by get() if you
  210. want to force content reloading.
  211. """
  212. if not result:
  213. params = {"action": "query", "prop": "revisions", "rvlimit": 1,
  214. "rvprop": "content|timestamp", "titles": self._title}
  215. result = self._site._api_query(params)
  216. res = result["query"]["pages"].values()[0]
  217. try:
  218. self._content = res["revisions"][0]["*"]
  219. self._basetimestamp = res["revisions"][0]["timestamp"]
  220. except KeyError:
  221. # This can only happen if the page was deleted since we last called
  222. # self._load_attributes(). In that case, some of our attributes are
  223. # outdated, so force another self._load_attributes():
  224. self._load_attributes()
  225. self._assert_existence()
  226. def _edit(self, params=None, text=None, summary=None, minor=None, bot=None,
  227. force=None, section=None, captcha_id=None, captcha_word=None,
  228. tries=0):
  229. """Edit the page!
  230. If *params* is given, we'll use it as our API query parameters.
  231. Otherwise, we'll build params using the given kwargs via
  232. _build_edit_params().
  233. We'll then try to do the API query, and catch any errors the API raises
  234. in _handle_edit_errors(). We'll then throw these back as subclasses of
  235. EditError.
  236. """
  237. # Try to get our edit token, and die if we can't:
  238. if not self._token:
  239. self._load_attributes()
  240. if not self._token:
  241. e = "You don't have permission to edit this page."
  242. raise exceptions.PermissionsError(e)
  243. # Weed out invalid pages before we get too far:
  244. self._assert_validity()
  245. # Build our API query string:
  246. if not params:
  247. params = self._build_edit_params(text, summary, minor, bot, force,
  248. section, captcha_id, captcha_word)
  249. else: # Make sure we have the right token:
  250. params["token"] = self._token
  251. # Try the API query, catching most errors with our handler:
  252. try:
  253. result = self._site._api_query(params)
  254. except SiteAPIError as error:
  255. if not hasattr(error, "code"):
  256. raise # We can only handle errors with a code attribute
  257. result = self._handle_edit_errors(error, params, tries)
  258. # If everything was successful, reset invalidated attributes:
  259. if result["edit"]["result"] == "Success":
  260. self._content = None
  261. self._basetimestamp = None
  262. self._exists = 0
  263. return
  264. # If we're here, then the edit failed. If it's because of AssertEdit,
  265. # handle that. Otherwise, die - something odd is going on:
  266. try:
  267. assertion = result["edit"]["assert"]
  268. except KeyError:
  269. raise exceptions.EditError(result["edit"])
  270. self._handle_assert_edit(assertion, params, tries)
  271. def _build_edit_params(self, text, summary, minor, bot, force, section,
  272. captcha_id, captcha_word):
  273. """Given some keyword arguments, build an API edit query string."""
  274. hashed = md5(text).hexdigest() # Checksum to ensure text is correct
  275. params = {"action": "edit", "title": self._title, "text": text,
  276. "token": self._token, "summary": summary, "md5": hashed}
  277. if section:
  278. params["section"] = section
  279. if captcha_id and captcha_word:
  280. params["captchaid"] = captcha_id
  281. params["captchaword"] = captcha_word
  282. if minor:
  283. params["minor"] = "true"
  284. else:
  285. params["notminor"] = "true"
  286. if bot:
  287. params["bot"] = "true"
  288. if not force:
  289. params["starttimestamp"] = self._starttimestamp
  290. if self._basetimestamp:
  291. params["basetimestamp"] = self._basetimestamp
  292. if self._exists == 2:
  293. # Page does not exist; don't edit if it already exists:
  294. params["createonly"] = "true"
  295. else:
  296. params["recreate"] = "true"
  297. return params
  298. def _handle_edit_errors(self, error, params, tries):
  299. """If our edit fails due to some error, try to handle it.
  300. We'll either raise an appropriate exception (for example, if the page
  301. is protected), or we'll try to fix it (for example, if we can't edit
  302. due to being logged out, we'll try to log in).
  303. """
  304. if error.code in ["noedit", "cantcreate", "protectedtitle",
  305. "noimageredirect"]:
  306. raise exceptions.PermissionsError(error.info)
  307. elif error.code in ["noedit-anon", "cantcreate-anon",
  308. "noimageredirect-anon"]:
  309. if not all(self._site._login_info):
  310. # Insufficient login info:
  311. raise exceptions.PermissionsError(error.info)
  312. if tries == 0:
  313. # We have login info; try to login:
  314. self._site._login(self._site._login_info)
  315. self._token = None # Need a new token; old one is invalid now
  316. return self._edit(params=params, tries=1)
  317. else:
  318. # We already tried to log in and failed!
  319. e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug."
  320. raise exceptions.LoginError(e)
  321. elif error.code in ["editconflict", "pagedeleted", "articleexists"]:
  322. # These attributes are now invalidated:
  323. self._content = None
  324. self._basetimestamp = None
  325. self._exists = 0
  326. raise exceptions.EditConflictError(error.info)
  327. elif error.code in ["emptypage", "emptynewsection"]:
  328. raise exceptions.NoContentError(error.info)
  329. elif error.code == "contenttoobig":
  330. raise exceptions.ContentTooBigError(error.info)
  331. elif error.code == "spamdetected":
  332. raise exceptions.SpamDetectedError(error.info)
  333. elif error.code == "filtered":
  334. raise exceptions.FilteredError(error.info)
  335. raise exceptions.EditError(": ".join((error.code, error.info)))
  336. def _handle_assert_edit(self, assertion, params, tries):
  337. """If we can't edit due to a failed AssertEdit assertion, handle that.
  338. If the assertion was 'user' and we have valid login information, try to
  339. log in. Otherwise, raise PermissionsError with details.
  340. """
  341. if assertion == "user":
  342. if not all(self._site._login_info):
  343. # Insufficient login info:
  344. e = "AssertEdit: user assertion failed, and no login info was provided."
  345. raise exceptions.PermissionsError(e)
  346. if tries == 0:
  347. # We have login info; try to login:
  348. self._site._login(self._site._login_info)
  349. self._token = None # Need a new token; old one is invalid now
  350. return self._edit(params=params, tries=1)
  351. else:
  352. # We already tried to log in and failed!
  353. e = "Although we should be logged in, we are not. This may be a cookie problem or an odd bug."
  354. raise exceptions.LoginError(e)
  355. elif assertion == "bot":
  356. e = "AssertEdit: bot assertion failed; we don't have a bot flag!"
  357. raise exceptions.PermissionsError(e)
  358. # Unknown assertion, maybe "true", "false", or "exists":
  359. e = "AssertEdit: assertion '{0}' failed.".format(assertion)
  360. raise exceptions.PermissionsError(e)
  361. @property
  362. def site(self):
  363. """The Page's corresponding Site object."""
  364. return self._site
  365. @property
  366. def title(self):
  367. """The Page's title, or "pagename".
  368. This won't do any API queries on its own. Any other attributes or
  369. methods that do API queries will reload the title, however, like
  370. :py:attr:`exists` and :py:meth:`get`, potentially "normalizing" it or
  371. following redirects if :py:attr:`self._follow_redirects` is ``True``.
  372. """
  373. return self._title
  374. @property
  375. def exists(self):
  376. """Information about whether the Page exists or not.
  377. The "information" is a tuple with two items. The first is a bool,
  378. either ``True`` if the page exists or ``False`` if it does not. The
  379. second is a string giving more information, either ``"invalid"``,
  380. (title is invalid, e.g. it contains ``"["``), ``"missing"``, or
  381. ``"exists"``.
  382. Makes an API query only if we haven't already made one.
  383. """
  384. cases = {
  385. 0: (None, "unknown"),
  386. 1: (False, "invalid"),
  387. 2: (False, "missing"),
  388. 3: (True, "exists"),
  389. }
  390. if self._exists == 0:
  391. self._load()
  392. return cases[self._exists]
  393. @property
  394. def pageid(self):
  395. """An integer ID representing the Page.
  396. Makes an API query only if we haven't already made one.
  397. Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
  398. :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
  399. invalid or the page does not exist, respectively.
  400. """
  401. if self._exists == 0:
  402. self._load()
  403. self._assert_existence() # Missing pages do not have IDs
  404. return self._pageid
  405. @property
  406. def url(self):
  407. """The page's URL.
  408. Like :py:meth:`title`, this won't do any API queries on its own. If the
  409. API was never queried for this page, we will attempt to determine the
  410. URL ourselves based on the title.
  411. """
  412. if self._fullurl:
  413. return self._fullurl
  414. else:
  415. slug = quote(self._title.replace(" ", "_"), safe="/:")
  416. path = self._site._article_path.replace("$1", slug)
  417. return ''.join((self._site._base_url, path))
  418. @property
  419. def namespace(self):
  420. """The page's namespace ID (an integer).
  421. Like :py:meth:`title`, this won't do any API queries on its own. If the
  422. API was never queried for this page, we will attempt to determine the
  423. namespace ourselves based on the title.
  424. """
  425. return self._namespace
  426. @property
  427. def protection(self):
  428. """The page's current protection status.
  429. Makes an API query only if we haven't already made one.
  430. Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` if the page
  431. name is invalid. Won't raise an error if the page is missing because
  432. those can still be create-protected.
  433. """
  434. if self._exists == 0:
  435. self._load()
  436. self._assert_validity() # Invalid pages cannot be protected
  437. return self._protection
  438. @property
  439. def is_talkpage(self):
  440. """``True`` if the page is a talkpage, otherwise ``False``.
  441. Like :py:meth:`title`, this won't do any API queries on its own. If the
  442. API was never queried for this page, we will attempt to determine
  443. whether it is a talkpage ourselves based on its namespace.
  444. """
  445. return self._is_talkpage
  446. @property
  447. def is_redirect(self):
  448. """``True`` if the page is a redirect, otherwise ``False``.
  449. Makes an API query only if we haven't already made one.
  450. We will return ``False`` even if the page does not exist or is invalid.
  451. """
  452. if self._exists == 0:
  453. self._load()
  454. return self._is_redirect
  455. def reload(self):
  456. """Forcibly reload the page's attributes.
  457. Emphasis on *reload*: this is only necessary if there is reason to
  458. believe they have changed.
  459. """
  460. self._load()
  461. if self._content is not None:
  462. # Only reload content if it has already been loaded:
  463. self._load_content()
  464. def toggle_talk(self, follow_redirects=None):
  465. """Return a content page's talk page, or vice versa.
  466. The title of the new page is determined by namespace logic, not API
  467. queries. We won't make any API queries on our own.
  468. If *follow_redirects* is anything other than ``None`` (the default), it
  469. will be passed to the new :py:class:`~earwigbot.wiki.page.Page`
  470. object's :py:meth:`__init__`. Otherwise, we'll use the value passed to
  471. our own :py:meth:`__init__`.
  472. Will raise :py:exc:`~earwigbot.exceptions.InvalidPageError` if we try
  473. to get the talk page of a special page (in the ``Special:`` or
  474. ``Media:`` namespaces), but we won't raise an exception if our page is
  475. otherwise missing or invalid.
  476. """
  477. if self._namespace < 0:
  478. ns = self._site.namespace_id_to_name(self._namespace)
  479. e = "Pages in the {0} namespace can't have talk pages.".format(ns)
  480. raise exceptions.InvalidPageError(e)
  481. if self._is_talkpage:
  482. new_ns = self._namespace - 1
  483. else:
  484. new_ns = self._namespace + 1
  485. try:
  486. body = self._title.split(":", 1)[1]
  487. except IndexError:
  488. body = self._title
  489. new_prefix = self._site.namespace_id_to_name(new_ns)
  490. # If the new page is in namespace 0, don't do ":Title" (it's correct,
  491. # but unnecessary), just do "Title":
  492. if new_prefix:
  493. new_title = u":".join((new_prefix, body))
  494. else:
  495. new_title = body
  496. if follow_redirects is None:
  497. follow_redirects = self._follow_redirects
  498. return Page(self._site, new_title, follow_redirects)
  499. def get(self):
  500. """Return page content, which is cached if you try to call get again.
  501. Raises InvalidPageError or PageNotFoundError if the page name is
  502. invalid or the page does not exist, respectively.
  503. """
  504. if self._exists == 0:
  505. # Kill two birds with one stone by doing an API query for both our
  506. # attributes and our page content:
  507. params = {"action": "query", "rvlimit": 1, "titles": self._title,
  508. "prop": "info|revisions", "inprop": "protection|url",
  509. "intoken": "edit", "rvprop": "content|timestamp"}
  510. result = self._site._api_query(params)
  511. self._load_attributes(result=result)
  512. self._assert_existence()
  513. self._load_content(result=result)
  514. # Follow redirects if we're told to:
  515. if self._keep_following and self._is_redirect:
  516. self._title = self.get_redirect_target()
  517. self._keep_following = False # Don't follow double redirects
  518. self._exists = 0 # Force another API query
  519. self.get()
  520. return self._content
  521. # Make sure we're dealing with a real page here. This may be outdated
  522. # if the page was deleted since we last called self._load_attributes(),
  523. # but self._load_content() can handle that:
  524. self._assert_existence()
  525. if self._content is None:
  526. self._load_content()
  527. return self._content
  528. def get_redirect_target(self):
  529. """If the page is a redirect, return its destination.
  530. Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
  531. :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
  532. invalid or the page does not exist, respectively. Raises
  533. :py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a
  534. redirect.
  535. """
  536. content = self.get()
  537. try:
  538. return re.findall(self.re_redirect, content, flags=re.I)[0]
  539. except IndexError:
  540. e = "The page does not appear to have a redirect target."
  541. raise exceptions.RedirectError(e)
  542. def get_creator(self):
  543. """Return the User object for the first person to edit the page.
  544. Makes an API query only if we haven't already made one. Normally, we
  545. can get the creator along with everything else (except content) in
  546. :py:meth:`_load_attributes`. However, due to a limitation in the API
  547. (can't get the editor of one revision and the content of another at
  548. both ends of the history), if our other attributes were only loaded
  549. through :py:meth:`get`, we'll have to do another API query.
  550. Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or
  551. :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is
  552. invalid or the page does not exist, respectively.
  553. """
  554. if self._exists == 0:
  555. self._load()
  556. self._assert_existence()
  557. if not self._creator:
  558. self._load()
  559. self._assert_existence()
  560. return self._site.get_user(self._creator)
  561. def edit(self, text, summary, minor=False, bot=True, force=False):
  562. """Replace the page's content or creates a new page.
  563. *text* is the new page content, with *summary* as the edit summary.
  564. If *minor* is ``True``, the edit will be marked as minor. If *bot* is
  565. ``True``, the edit will be marked as a bot edit, but only if we
  566. actually have a bot flag.
  567. Use *force* to push the new content even if there's an edit conflict or
  568. the page was deleted/recreated between getting our edit token and
  569. editing our page. Be careful with this!
  570. """
  571. self._edit(text=text, summary=summary, minor=minor, bot=bot,
  572. force=force)
  573. def add_section(self, text, title, minor=False, bot=True, force=False):
  574. """Add a new section to the bottom of the page.
  575. The arguments for this are the same as those for :py:meth:`edit`, but
  576. instead of providing a summary, you provide a section title.
  577. Likewise, raised exceptions are the same as :py:meth:`edit`'s.
  578. This should create the page if it does not already exist, with just the
  579. new section as content.
  580. """
  581. self._edit(text=text, summary=title, minor=minor, bot=bot, force=force,
  582. section="new")