Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
6.2 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from time import sleep
  24. from earwigbot.tasks import Task
  25. from earwigbot.wiki import constants
  26. import mwparserfromhell
  27. class InfoboxStation(Task):
  28. """
  29. A task to replace ``{{Infobox China station}}`` and
  30. ``{{Infobox Japan station}}`` with ``{{Infobox station}}``.
  31. """
  32. name = "infobox_station"
  33. number = 20
  34. def setup(self):
  35. self.site = self.bot.wiki.get_site()
  36. self._targets = {
  37. "China": (
  38. ["Infobox China station", "Infobox china station"],
  39. "Infobox China station/sandbox",
  40. "Infobox China station/sandbox/cats",
  41. "Wikipedia:Templates for discussion/Log/2015 February 8#Template:Infobox China station"
  42. ),
  43. "Japan": (
  44. ["Infobox Japan station", "Infobox japan station"],
  45. "Infobox Japan station/sandbox",
  46. "Infobox Japan station/sandbox/cats",
  47. "Wikipedia:Templates for discussion/Log/2015 May 9#Template:Infobox Japan station"
  48. ),
  49. }
  50. self._replacement = "{{Infobox station}}"
  51. self._sleep_time = 2
  52. self.summary = self.make_summary(
  53. "Replacing {source} with {dest} per [[{discussion}|TfD]].")
  54. def run(self, **kwargs):
  55. limit = int(kwargs.get("limit", kwargs.get("edits", 0)))
  56. for name, args in self._targets.items():
  57. if self.shutoff_enabled():
  58. return
  59. self._replace(name, args, limit)
  60. def _replace(self, name, args, limit=0):
  61. """
  62. Replace a template in all pages that transclude it.
  63. """
  64. self.logger.info("Replacing {0} infobox template".format(name))
  65. count = 0
  66. for title in self._get_transclusions(args[0][0]):
  67. if limit > 0 and count >= limit:
  68. logmsg = "Reached limit of {0} edits for {1} infoboxes"
  69. self.logger.info(logmsg.format(limit, name))
  70. return
  71. count += 1
  72. if count % 5 == 0 and self.shutoff_enabled():
  73. return
  74. page = self.site.get_page(title)
  75. self._process_page(page, args)
  76. self.logger.info("All {0} infoboxes updated".format(name))
  77. def _process_page(self, page, args):
  78. """
  79. Process a single page to replace a template.
  80. """
  81. self.logger.debug("Processing [[{0}]]".format(page.title))
  82. if not page.check_exclusion():
  83. self.logger.warn("Bot excluded from [[{0}]]".format(page.title))
  84. return
  85. code = mwparserfromhell.parse(page.get(), skip_style_tags=True)
  86. cats = []
  87. for tmpl in code.filter_templates():
  88. if tmpl.name.matches(args[0]):
  89. tmpl.name = "subst:" + args[2]
  90. cats.extend(self._get_cats(page, unicode(tmpl)))
  91. tmpl.name = "subst:" + args[1]
  92. self._add_cats(code, cats)
  93. if code == page.get():
  94. msg = "Couldn't figure out what to edit in [[{0}]]"
  95. self.logger.warn(msg.format(page.title))
  96. return
  97. summary = self.summary.format(
  98. source="{{" + args[0][0] + "}}", dest=self._replacement,
  99. discussion=args[3])
  100. page.edit(unicode(code), summary, minor=True)
  101. sleep(self._sleep_time)
  102. def _add_cats(self, code, cats):
  103. """Add category data (*cats*) to wikicode."""
  104. current_cats = code.filter_wikilinks(
  105. matches=lambda link: link.title.lower().startswith("category:"))
  106. norm = lambda cat: cat.title.lower()[len("category:"):].strip()
  107. catlist = [unicode(cat) for cat in cats if not any(
  108. norm(cur) == norm(cat) for cur in current_cats)]
  109. if not catlist:
  110. return
  111. text = "\n".join(catlist)
  112. if current_cats:
  113. code.insert_before(current_cats[0], text + "\n")
  114. return
  115. for tmpl in code.filter_templates():
  116. if tmpl.name.lower().endswith("stub"):
  117. prev = code.get(code.index(tmpl) - 1)
  118. if prev.endswith("\n\n"):
  119. code.replace(prev, prev[:-1])
  120. code.insert_before(tmpl, text + "\n\n")
  121. def _get_cats(self, page, tmpl):
  122. """
  123. Return the categories that should be added to the page.
  124. """
  125. result = self.site.api_query(action="parse", title=page.title,
  126. prop="text", onlypst=1, text=tmpl)
  127. text = result["parse"]["text"]["*"]
  128. return mwparserfromhell.parse(text).filter_wikilinks()
  129. def _get_transclusions(self, tmpl):
  130. """
  131. Return a list of mainspace translusions of the given template.
  132. """
  133. query = """SELECT page_title
  134. FROM templatelinks
  135. LEFT JOIN page ON tl_from = page_id
  136. WHERE tl_namespace = ? AND tl_title = ? AND tl_from_namespace = ?"""
  137. results = self.site.sql_query(query, (
  138. constants.NS_TEMPLATE, tmpl.replace(" ", "_"), constants.NS_MAIN))
  139. return [title.decode("utf8").replace("_", " ") for (title,) in results]