Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
8.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from datetime import datetime
  23. import mwparserfromhell
  24. from earwigbot.tasks import Task
  25. from earwigbot.wiki.constants import *
  26. NS_DRAFT = 118
  27. class AfCUndated(Task):
  28. """A task to clear [[Category:Undated AfC submissions]]."""
  29. name = "afc_undated"
  30. number = 5
  31. def setup(self):
  32. cfg = self.config.tasks.get(self.name, {})
  33. self.category = cfg.get("category", "Undated AfC submissions")
  34. default_summary = "Adding timestamp to undated [[WP:AFC|Articles for creation]] submission."
  35. self.summary = self.make_summary(cfg.get("summary", default_summary))
  36. self.namespaces = {
  37. "submission": [NS_USER, NS_PROJECT, NS_PROJECT_TALK, NS_DRAFT],
  38. "talk": [NS_TALK, NS_FILE_TALK, NS_TEMPLATE_TALK, NS_HELP_TALK,
  39. NS_CATEGORY_TALK]
  40. }
  41. self.aliases = {
  42. "submission": ["AfC submission"],
  43. "talk": ["WikiProject Articles for creation"]
  44. }
  45. def run(self, **kwargs):
  46. try:
  47. self.statistics = self.bot.tasks.get("afc_statistics")
  48. except KeyError:
  49. err = "Requires afc_statistics task (from earwigbot_plugins)"
  50. self.logger.error(err)
  51. return
  52. self.site = self.bot.wiki.get_site()
  53. category = self.site.get_category(self.category)
  54. logmsg = u"Undated category [[{0}]] has {1} members"
  55. self.logger.info(logmsg.format(category.title, category.size))
  56. if category.size:
  57. self._build_aliases()
  58. counter = 0
  59. for page in category:
  60. if not counter % 10:
  61. if self.shutoff_enabled():
  62. return
  63. self._process_page(page)
  64. counter += 1
  65. def _build_aliases(self):
  66. """Build template name aliases for the AfC templates."""
  67. for key in self.aliases:
  68. base = self.aliases[key][0]
  69. aliases = [base, "Template:" + base]
  70. result = self.site.api_query(
  71. action="query", list="backlinks", bllimit=50,
  72. blfilterredir="redirects", bltitle=aliases[1])
  73. for data in result["query"]["backlinks"]:
  74. redir = self.site.get_page(data["title"])
  75. aliases.append(redir.title)
  76. if redir.namespace == NS_TEMPLATE:
  77. aliases.append(redir.title.split(":", 1)[1])
  78. self.aliases[key] = aliases
  79. def _process_page(self, page):
  80. """Date the necessary templates inside a page object."""
  81. if not page.check_exclusion():
  82. msg = u"Skipping [[{0}]]; bot excluded from editing"
  83. self.logger.info(msg.format(page.title))
  84. return
  85. is_sub = page.namespace in self.namespaces["submission"]
  86. is_talk = page.namespace in self.namespaces["talk"]
  87. if is_sub:
  88. aliases = self.aliases["submission"]
  89. timestamp = self._get_timestamp(page)
  90. elif is_talk:
  91. aliases = self.aliases["talk"]
  92. timestamp, reviewer = self._get_talkdata(page)
  93. else:
  94. msg = u"[[{0}]] is undated, but in a namespace I don't know how to process"
  95. self.logger.warn(msg.format(page.title))
  96. return
  97. if not timestamp:
  98. return
  99. code = mwparserfromhell.parse(page.get())
  100. changes = 0
  101. for template in code.filter_templates():
  102. has_ts = template.has("ts", ignore_empty=True)
  103. if template.name.matches(aliases) and not has_ts:
  104. template.add("ts", timestamp)
  105. has_reviewer = template.has("reviewer", ignore_empty=True)
  106. if is_talk and not has_reviewer:
  107. template.add("reviewer", reviewer)
  108. changes += 1
  109. if changes:
  110. msg = u"Dating [[{0}]]: {1}x {2}"
  111. self.logger.info(msg.format(page.title, changes, aliases[0]))
  112. page.edit(unicode(code), self.summary)
  113. else:
  114. msg = u"[[{0}]] is undated, but I can't figure out what to replace"
  115. self.logger.warn(msg.format(page.title))
  116. def _get_timestamp(self, page):
  117. """Get the timestamp associated with a particular submission."""
  118. self.logger.debug(u"[[{0}]]: Getting timestamp".format(page.title))
  119. result = self.site.api_query(
  120. action="query", prop="revisions", rvprop="timestamp", rvlimit=1,
  121. rvdir="newer", titles=page.title)
  122. data = result["query"]["pages"].values()[0]
  123. if "revisions" not in data:
  124. log = u"Couldn't get timestamp for [[{0}]]"
  125. self.logger.warn(log.format(page.title))
  126. return None
  127. raw = data["revisions"][0]["timestamp"]
  128. ts = datetime.strptime(raw, "%Y-%m-%dT%H:%M:%SZ")
  129. return ts.strftime("%Y%m%d%H%M%S")
  130. def _get_talkdata(self, page):
  131. """Get the timestamp and reviewer associated with a talkpage.
  132. This is the mover for a normal article submission, and the uploader for
  133. a file page.
  134. """
  135. subject = page.toggle_talk()
  136. if subject.exists == subject.PAGE_MISSING:
  137. log = u"Couldn't process [[{0}]]: subject page doesn't exist"
  138. self.logger.warn(log.format(page.title))
  139. return None, None
  140. if subject.namespace == NS_FILE:
  141. self.logger.debug(u"[[{0}]]: Getting filedata".format(page.title))
  142. return self._get_filedata(subject)
  143. self.logger.debug(u"[[{0}]]: Getting talkdata".format(page.title))
  144. user, ts, revid = self.statistics.get_accepted(subject.pageid)
  145. if not ts:
  146. if subject.is_redirect or subject.namespace == NS_CATEGORY:
  147. log = u"[[{0}]]: Couldn't get talkdata; trying redir/cat data"
  148. self.logger.debug(log.format(page.title))
  149. return self._get_redirdata(subject)
  150. log = u"Couldn't get talkdata for [[{0}]]"
  151. self.logger.warn(log.format(page.title))
  152. return None, None
  153. return ts.strftime("%Y%m%d%H%M%S"), user
  154. def _get_filedata(self, page):
  155. """Get the timestamp and reviewer associated with a file talkpage."""
  156. result = self.site.api_query(action="query", prop="imageinfo",
  157. titles=page.title)
  158. data = result["query"]["pages"].values()[0]
  159. if "imageinfo" not in data:
  160. log = u"Couldn't get filedata for [[{0}]]"
  161. self.logger.warn(log.format(page.title))
  162. return None, None
  163. info = data["imageinfo"][0]
  164. ts = datetime.strptime(info["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
  165. return ts.strftime("%Y%m%d%H%M%S"), info["user"]
  166. def _get_redirdata(self, page):
  167. """Get the timestamp and reviewer for a redirect/category talkpage."""
  168. result = self.site.api_query(
  169. action="query", prop="revisions", rvprop="timestamp|user",
  170. rvlimit=1, rvdir="newer", titles=page.title)
  171. if "batchcomplete" not in result:
  172. log = u"Couldn't get redir/cat talkdata for [[{0}]]: has multiple revisions"
  173. self.logger.warn(log.format(page.title))
  174. return None, None
  175. rev = result["query"]["pages"].values()[0]["revisions"][0]
  176. ts = datetime.strptime(rev["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
  177. return ts.strftime("%Y%m%d%H%M%S"), rev["user"]