Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

168 lines
6.1 KiB

  1. # Copyright (C) 2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. from time import sleep
  21. import mwparserfromhell
  22. from earwigbot.tasks import Task
  23. from earwigbot.wiki import constants
  24. class InfoboxStation(Task):
  25. """
  26. A task to replace ``{{Infobox China station}}`` and
  27. ``{{Infobox Japan station}}`` with ``{{Infobox station}}``.
  28. """
  29. name = "infobox_station"
  30. number = 20
  31. def setup(self):
  32. self.site = self.bot.wiki.get_site()
  33. self._targets = {
  34. "China": (
  35. ["Infobox China station", "Infobox china station"],
  36. "Infobox China station/sandbox",
  37. "Infobox China station/sandbox/cats",
  38. "Wikipedia:Templates for discussion/Log/2015 February 8#Template:Infobox China station",
  39. ),
  40. "Japan": (
  41. ["Infobox Japan station", "Infobox japan station"],
  42. "Infobox Japan station/sandbox",
  43. "Infobox Japan station/sandbox/cats",
  44. "Wikipedia:Templates for discussion/Log/2015 May 9#Template:Infobox Japan station",
  45. ),
  46. }
  47. self._replacement = "{{Infobox station}}"
  48. self._sleep_time = 2
  49. self.summary = self.make_summary(
  50. "Replacing {source} with {dest} per [[{discussion}|TfD]]."
  51. )
  52. def run(self, **kwargs):
  53. limit = int(kwargs.get("limit", kwargs.get("edits", 0)))
  54. for name, args in self._targets.items():
  55. if self.shutoff_enabled():
  56. return
  57. self._replace(name, args, limit)
  58. def _replace(self, name, args, limit=0):
  59. """
  60. Replace a template in all pages that transclude it.
  61. """
  62. self.logger.info(f"Replacing {name} infobox template")
  63. count = 0
  64. for title in self._get_transclusions(args[0][0]):
  65. if limit > 0 and count >= limit:
  66. logmsg = "Reached limit of {0} edits for {1} infoboxes"
  67. self.logger.info(logmsg.format(limit, name))
  68. return
  69. count += 1
  70. if count % 5 == 0 and self.shutoff_enabled():
  71. return
  72. page = self.site.get_page(title)
  73. self._process_page(page, args)
  74. self.logger.info(f"All {name} infoboxes updated")
  75. def _process_page(self, page, args):
  76. """
  77. Process a single page to replace a template.
  78. """
  79. self.logger.debug(f"Processing [[{page.title}]]")
  80. if not page.check_exclusion():
  81. self.logger.warn(f"Bot excluded from [[{page.title}]]")
  82. return
  83. code = mwparserfromhell.parse(page.get(), skip_style_tags=True)
  84. cats = []
  85. for tmpl in code.filter_templates():
  86. if tmpl.name.matches(args[0]):
  87. tmpl.name = "subst:" + args[2]
  88. cats.extend(self._get_cats(page, str(tmpl)))
  89. tmpl.name = "subst:" + args[1]
  90. self._add_cats(code, cats)
  91. if code == page.get():
  92. msg = "Couldn't figure out what to edit in [[{0}]]"
  93. self.logger.warn(msg.format(page.title))
  94. return
  95. summary = self.summary.format(
  96. source="{{" + args[0][0] + "}}", dest=self._replacement, discussion=args[3]
  97. )
  98. page.edit(str(code), summary, minor=True)
  99. sleep(self._sleep_time)
  100. def _add_cats(self, code, cats):
  101. """Add category data (*cats*) to wikicode."""
  102. current_cats = code.filter_wikilinks(
  103. matches=lambda link: link.title.lower().startswith("category:")
  104. )
  105. def norm(cat):
  106. return cat.title.lower()[len("category:") :].strip()
  107. catlist = [
  108. str(cat)
  109. for cat in cats
  110. if not any(norm(cur) == norm(cat) for cur in current_cats)
  111. ]
  112. if not catlist:
  113. return
  114. text = "\n".join(catlist)
  115. if current_cats:
  116. code.insert_before(current_cats[0], text + "\n")
  117. return
  118. for tmpl in code.filter_templates():
  119. if tmpl.name.lower().endswith("stub"):
  120. prev = code.get(code.index(tmpl) - 1)
  121. if prev.endswith("\n\n"):
  122. code.replace(prev, prev[:-1])
  123. code.insert_before(tmpl, text + "\n\n")
  124. def _get_cats(self, page, tmpl):
  125. """
  126. Return the categories that should be added to the page.
  127. """
  128. result = self.site.api_query(
  129. action="parse", title=page.title, prop="text", onlypst=1, text=tmpl
  130. )
  131. text = result["parse"]["text"]["*"]
  132. return mwparserfromhell.parse(text).filter_wikilinks()
  133. def _get_transclusions(self, tmpl):
  134. """
  135. Return a list of mainspace translusions of the given template.
  136. """
  137. query = """SELECT page_title
  138. FROM templatelinks
  139. LEFT JOIN page ON tl_from = page_id
  140. WHERE tl_namespace = ? AND tl_title = ? AND tl_from_namespace = ?"""
  141. results = self.site.sql_query(
  142. query, (constants.NS_TEMPLATE, tmpl.replace(" ", "_"), constants.NS_MAIN)
  143. )
  144. return [title.decode("utf8").replace("_", " ") for (title,) in results]