Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
4.2 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2017 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. import time
  23. from earwigbot.tasks import Task
  24. class BannerUntag(Task):
  25. """A task to undo mistaken tagging edits made by wikiproject_tagger."""
  26. name = "banner_untag"
  27. number = 14
  28. def run(self, **kwargs):
  29. self.site = self.bot.wiki.get_site()
  30. self.summary = kwargs["summary"]
  31. self.throttle = int(kwargs.get("throttle", 0))
  32. rev_file = kwargs["rev-file"]
  33. done_file = kwargs["done-file"]
  34. error_file = kwargs["error-file"]
  35. with open(done_file) as donefp:
  36. done = [int(line) for line in donefp.read().splitlines()]
  37. with open(rev_file) as fp:
  38. data = [[int(x) for x in line.split("\t")]
  39. for line in fp.read().splitlines()]
  40. data = [item for item in data if item[0] not in done]
  41. with open(error_file, "a") as errfp:
  42. with open(done_file, "a") as donefp:
  43. self._process_data(data, errfp, donefp)
  44. def _process_data(self, data, errfile, donefile):
  45. chunksize = 50
  46. for chunkidx in range((len(data) + chunksize - 1) / chunksize):
  47. chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize]
  48. if self.shutoff_enabled():
  49. return
  50. self._process_chunk(chunk, errfile, donefile)
  51. def _process_chunk(self, chunk, errfile, donefile):
  52. pageids_to_revids = dict(chunk)
  53. res = self.site.api_query(
  54. action="query", prop="revisions", rvprop="ids",
  55. pageids="|".join(str(item[0]) for item in chunk), formatversion=2)
  56. stage2 = []
  57. for pagedata in res["query"]["pages"]:
  58. pageid = pagedata["pageid"]
  59. if "title" not in pagedata:
  60. self.logger.info("Skipping pageid=%s, doesn't exist" % pageid)
  61. donefile.write("%d\n" % pageid)
  62. continue
  63. title = pagedata["title"]
  64. revid = pagedata["revisions"][0]["revid"]
  65. parentid = pagedata["revisions"][0]["parentid"]
  66. if pageids_to_revids[pageid] == revid:
  67. stage2.append(str(parentid))
  68. else:
  69. self.logger.info(u"Skipping [[%s]], not latest edit" % title)
  70. donefile.write("%d\n" % pageid)
  71. errfile.write("%s\n" % title.encode("utf8"))
  72. if not stage2:
  73. return
  74. res2 = self.site.api_query(
  75. action="query", prop="revisions", rvprop="content", rvslots="main",
  76. revids="|".join(stage2), formatversion=2)
  77. for pagedata in res2["query"]["pages"]:
  78. revision = pagedata["revisions"][0]["slots"]["main"]
  79. if revision["contentmodel"] != "wikitext":
  80. continue
  81. pageid = pagedata["pageid"]
  82. title = pagedata["title"]
  83. content = revision["content"]
  84. self.logger.debug(u"Reverting one edit on [[%s]]" % title)
  85. page = self.site.get_page(title)
  86. page.edit(content, self.make_summary(self.summary), minor=True)
  87. donefile.write("%d\n" % pageid)
  88. if self.throttle:
  89. time.sleep(self.throttle)