Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

159 linhas
6.1 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from time import sleep
  24. from earwigbot.tasks import Task
  25. from earwigbot.wiki import constants
  26. import mwparserfromhell
  27. class InfoboxStation(Task):
  28. """
  29. A task to replace ``{{Infobox China station}}`` and
  30. ``{{Infobox Japan station}}`` with ``{{Infobox station}}``.
  31. """
  32. name = "infobox_station"
  33. number = 20
  34. def setup(self):
  35. self.site = self.bot.wiki.get_site()
  36. self._targets = {
  37. "China": (
  38. ["Infobox China station", "Infobox china station"],
  39. "Infobox China station/sandbox",
  40. "Infobox China station/sandbox/cats",
  41. "Wikipedia:Templates for discussion/Log/2015 February 8#Template:Infobox China station"
  42. ),
  43. "Japan": (
  44. ["Infobox Japan station", "Infobox japan station"],
  45. "Infobox Japan station/sandbox",
  46. "Infobox Japan station/sandbox/cats",
  47. "Wikipedia:Templates for discussion/Log/2015 May 9#Template:Infobox Japan station"
  48. ),
  49. }
  50. self._replacement = "{{Infobox station}}"
  51. self._sleep_time = 6
  52. self.summary = self.make_summary(
  53. "Replacing {source} with {dest} per [[{discussion}|TfD]].")
  54. def run(self, **kwargs):
  55. limit = int(kwargs.get("limit"), 0)
  56. for name, args in self._targets.items():
  57. if self.shutoff_enabled():
  58. return
  59. self._replace(name, args, limit)
  60. def _replace(self, name, args, limit=0):
  61. """
  62. Replace a template in all pages that transclude it.
  63. """
  64. self.logger.info("Replacing {0} infobox template".format(name))
  65. count = 0
  66. for title in self._get_transclusions(args[0][0]):
  67. if limit > 0 and count >= limit:
  68. logmsg = "Reached limit of {0} edits for {1} infoboxes"
  69. self.logger.info(logmsg.format(limit, name))
  70. return
  71. count += 1
  72. if count % 5 == 0 and self.shutoff_enabled():
  73. return
  74. page = self.site.get_page(title)
  75. self._process_page(page, args)
  76. self.logger.info("All {0} infoboxes updated".format(name))
  77. def _process_page(self, page, args):
  78. """
  79. Process a single page to replace a template.
  80. """
  81. self.logger.debug("Processing [[{0}]]".format(page.title))
  82. if not page.check_exclusion():
  83. self.logger.warn("Bot excluded from [[{0}]]".format(page.title))
  84. return
  85. code = mwparserfromhell.parse(page.get(), skip_style_tags=True)
  86. for tmpl in code.filter_templates():
  87. if tmpl.name.matches(args[0]):
  88. tmpl.name = "subst:" + args[2]
  89. cats = self._get_cats(page, unicode(tmpl))
  90. tmpl.name = "subst:" + args[1]
  91. self._add_cats(code, cats)
  92. if code == page.get():
  93. msg = "Couldn't figure out what to edit in [[{0}]]"
  94. self.logger.warn(msg.format(page.title))
  95. return
  96. summary = self.summary.format(
  97. source="{{" + args[0][0] + "}}", dest=self._replacement,
  98. discussion=args[3])
  99. page.edit(unicode(code), summary, minor=True)
  100. sleep(self._sleep_time)
  101. def _add_cats(self, code, cats):
  102. """Add category data (*cats*) to wikicode."""
  103. current_cats = code.filter_wikilinks(
  104. matches=lambda link: link.title.lower().startswith("category:"))
  105. norm = lambda cat: cat.title.lower()[len("category:"):].strip()
  106. catlist = [unicode(cat) for cat in cats if not any(
  107. norm(cur) == norm(cat) for cur in current_cats)]
  108. if not catlist:
  109. return
  110. text = "\n".join(catlist)
  111. if current_cats:
  112. code.insert_before(current_cats[0], text + "\n")
  113. return
  114. for tmpl in code.filter_templates():
  115. if tmpl.name.lower().endswith("stub"):
  116. prev = code.get(code.index(tmpl) - 1)
  117. if prev.endswith("\n\n"):
  118. code.replace(prev, prev[:-1])
  119. code.insert_before(tmpl, text + "\n\n")
  120. def _get_cats(self, page, tmpl):
  121. """
  122. Return the categories that should be added to the page.
  123. """
  124. result = self.site.api_query(action="parse", title=page.title,
  125. prop="text", onlypst=1, text=tmpl)
  126. text = result["parse"]["text"]["*"]
  127. return mwparserfromhell.parse(text).filter_wikilinks()
  128. def _get_transclusions(self, tmpl):
  129. """
  130. Return a list of mainspace translusions of the given template.
  131. """
  132. query = """SELECT page_title
  133. FROM templatelinks
  134. LEFT JOIN page ON tl_from = page_id
  135. WHERE tl_namespace = ? AND tl_title = ? AND tl_from_namespace = ?"""
  136. results = self.site.sql_query(query, (
  137. constants.NS_TEMPLATE, tmpl.replace(" ", "_"), constants.NS_MAIN))
  138. return [title.decode("utf8").replace("_", " ") for (title,) in results]