Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

afc_undated.py 8.8 KiB

11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. from datetime import datetime
  21. import mwparserfromhell
  22. from earwigbot.tasks import Task
  23. from earwigbot.wiki.constants import (
  24. NS_CATEGORY,
  25. NS_CATEGORY_TALK,
  26. NS_FILE,
  27. NS_FILE_TALK,
  28. NS_HELP_TALK,
  29. NS_PROJECT,
  30. NS_PROJECT_TALK,
  31. NS_TALK,
  32. NS_TEMPLATE,
  33. NS_TEMPLATE_TALK,
  34. NS_USER,
  35. )
  36. NS_DRAFT = 118
  37. class AfCUndated(Task):
  38. """A task to clear [[Category:Undated AfC submissions]]."""
  39. name = "afc_undated"
  40. number = 5
  41. def setup(self):
  42. cfg = self.config.tasks.get(self.name, {})
  43. self.category = cfg.get("category", "Undated AfC submissions")
  44. default_summary = (
  45. "Adding timestamp to undated [[WP:AFC|Articles for creation]] submission."
  46. )
  47. self.summary = self.make_summary(cfg.get("summary", default_summary))
  48. self.namespaces = {
  49. "submission": [NS_USER, NS_PROJECT, NS_PROJECT_TALK, NS_DRAFT],
  50. "talk": [
  51. NS_TALK,
  52. NS_FILE_TALK,
  53. NS_TEMPLATE_TALK,
  54. NS_HELP_TALK,
  55. NS_CATEGORY_TALK,
  56. ],
  57. }
  58. self.aliases = {
  59. "submission": ["AfC submission"],
  60. "talk": ["WikiProject Articles for creation"],
  61. }
  62. def run(self, **kwargs):
  63. try:
  64. self.statistics = self.bot.tasks.get("afc_statistics")
  65. except KeyError:
  66. err = "Requires afc_statistics task (from earwigbot_plugins)"
  67. self.logger.error(err)
  68. return
  69. self.site = self.bot.wiki.get_site()
  70. category = self.site.get_category(self.category)
  71. logmsg = "Undated category [[{0}]] has {1} members"
  72. self.logger.info(logmsg.format(category.title, category.size))
  73. if category.size:
  74. self._build_aliases()
  75. counter = 0
  76. for page in category:
  77. if not counter % 10:
  78. if self.shutoff_enabled():
  79. return
  80. self._process_page(page)
  81. counter += 1
  82. def _build_aliases(self):
  83. """Build template name aliases for the AfC templates."""
  84. for key in self.aliases:
  85. base = self.aliases[key][0]
  86. aliases = [base, "Template:" + base]
  87. result = self.site.api_query(
  88. action="query",
  89. list="backlinks",
  90. bllimit=50,
  91. blfilterredir="redirects",
  92. bltitle=aliases[1],
  93. )
  94. for data in result["query"]["backlinks"]:
  95. redir = self.site.get_page(data["title"])
  96. aliases.append(redir.title)
  97. if redir.namespace == NS_TEMPLATE:
  98. aliases.append(redir.title.split(":", 1)[1])
  99. self.aliases[key] = aliases
  100. def _process_page(self, page):
  101. """Date the necessary templates inside a page object."""
  102. if not page.check_exclusion():
  103. msg = "Skipping [[{0}]]; bot excluded from editing"
  104. self.logger.info(msg.format(page.title))
  105. return
  106. is_sub = page.namespace in self.namespaces["submission"]
  107. is_talk = page.namespace in self.namespaces["talk"]
  108. if is_sub:
  109. aliases = self.aliases["submission"]
  110. timestamp = self._get_timestamp(page)
  111. elif is_talk:
  112. aliases = self.aliases["talk"]
  113. timestamp, reviewer = self._get_talkdata(page)
  114. else:
  115. msg = "[[{0}]] is undated, but in a namespace I don't know how to process"
  116. self.logger.warn(msg.format(page.title))
  117. return
  118. if not timestamp:
  119. return
  120. code = mwparserfromhell.parse(page.get())
  121. changes = 0
  122. for template in code.filter_templates():
  123. has_ts = template.has("ts", ignore_empty=True)
  124. if template.name.matches(aliases) and not has_ts:
  125. template.add("ts", timestamp)
  126. has_reviewer = template.has("reviewer", ignore_empty=True)
  127. if is_talk and not has_reviewer:
  128. template.add("reviewer", reviewer)
  129. changes += 1
  130. if changes:
  131. msg = "Dating [[{0}]]: {1}x {2}"
  132. self.logger.info(msg.format(page.title, changes, aliases[0]))
  133. page.edit(str(code), self.summary)
  134. else:
  135. msg = "[[{0}]] is undated, but I can't figure out what to replace"
  136. self.logger.warn(msg.format(page.title))
  137. def _get_timestamp(self, page):
  138. """Get the timestamp associated with a particular submission."""
  139. self.logger.debug(f"[[{page.title}]]: Getting timestamp")
  140. result = self.site.api_query(
  141. action="query",
  142. prop="revisions",
  143. rvprop="timestamp",
  144. rvlimit=1,
  145. rvdir="newer",
  146. titles=page.title,
  147. )
  148. data = result["query"]["pages"].values()[0]
  149. if "revisions" not in data:
  150. log = "Couldn't get timestamp for [[{0}]]"
  151. self.logger.warn(log.format(page.title))
  152. return None
  153. raw = data["revisions"][0]["timestamp"]
  154. ts = datetime.strptime(raw, "%Y-%m-%dT%H:%M:%SZ")
  155. return ts.strftime("%Y%m%d%H%M%S")
  156. def _get_talkdata(self, page):
  157. """Get the timestamp and reviewer associated with a talkpage.
  158. This is the mover for a normal article submission, and the uploader for
  159. a file page.
  160. """
  161. subject = page.toggle_talk()
  162. if subject.exists == subject.PAGE_MISSING:
  163. log = "Couldn't process [[{0}]]: subject page doesn't exist"
  164. self.logger.warn(log.format(page.title))
  165. return None, None
  166. if subject.namespace == NS_FILE:
  167. self.logger.debug(f"[[{page.title}]]: Getting filedata")
  168. return self._get_filedata(subject)
  169. self.logger.debug(f"[[{page.title}]]: Getting talkdata")
  170. user, ts, revid = self.statistics.get_accepted(subject.pageid)
  171. if not ts:
  172. if subject.is_redirect or subject.namespace == NS_CATEGORY:
  173. log = "[[{0}]]: Couldn't get talkdata; trying redir/cat data"
  174. self.logger.debug(log.format(page.title))
  175. return self._get_redirdata(subject)
  176. log = "Couldn't get talkdata for [[{0}]]"
  177. self.logger.warn(log.format(page.title))
  178. return None, None
  179. return ts.strftime("%Y%m%d%H%M%S"), user
  180. def _get_filedata(self, page):
  181. """Get the timestamp and reviewer associated with a file talkpage."""
  182. result = self.site.api_query(
  183. action="query", prop="imageinfo", titles=page.title
  184. )
  185. data = result["query"]["pages"].values()[0]
  186. if "imageinfo" not in data:
  187. log = "Couldn't get filedata for [[{0}]]"
  188. self.logger.warn(log.format(page.title))
  189. return None, None
  190. info = data["imageinfo"][0]
  191. ts = datetime.strptime(info["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
  192. return ts.strftime("%Y%m%d%H%M%S"), info["user"]
  193. def _get_redirdata(self, page):
  194. """Get the timestamp and reviewer for a redirect/category talkpage."""
  195. result = self.site.api_query(
  196. action="query",
  197. prop="revisions",
  198. rvprop="timestamp|user",
  199. rvlimit=1,
  200. rvdir="newer",
  201. titles=page.title,
  202. )
  203. if "batchcomplete" not in result:
  204. log = "Couldn't get redir/cat talkdata for [[{0}]]: has multiple revisions"
  205. self.logger.warn(log.format(page.title))
  206. return None, None
  207. rev = result["query"]["pages"].values()[0]["revisions"][0]
  208. ts = datetime.strptime(rev["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
  209. return ts.strftime("%Y%m%d%H%M%S"), rev["user"]