Additional IRC commands and bot tasks for EarwigBot https://en.wikipedia.org/wiki/User:EarwigBot
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

banner_untag.py 4.3 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. # Copyright (C) 2017 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. import time
  21. from earwigbot.tasks import Task
  22. class BannerUntag(Task):
  23. """A task to undo mistaken tagging edits made by wikiproject_tagger."""
  24. name = "banner_untag"
  25. number = 14
  26. def run(self, **kwargs):
  27. self.site = self.bot.wiki.get_site()
  28. self.summary = kwargs["summary"]
  29. self.throttle = int(kwargs.get("throttle", 0))
  30. rev_file = kwargs["rev-file"]
  31. done_file = kwargs["done-file"]
  32. error_file = kwargs["error-file"]
  33. with open(done_file) as donefp:
  34. done = [int(line) for line in donefp.read().splitlines()]
  35. with open(rev_file) as fp:
  36. data = [
  37. [int(x) for x in line.split("\t")] for line in fp.read().splitlines()
  38. ]
  39. data = [item for item in data if item[0] not in done]
  40. with open(error_file, "a") as errfp:
  41. with open(done_file, "a") as donefp:
  42. self._process_data(data, errfp, donefp)
  43. def _process_data(self, data, errfile, donefile):
  44. chunksize = 50
  45. for chunkidx in range((len(data) + chunksize - 1) / chunksize):
  46. chunk = data[chunkidx * chunksize : (chunkidx + 1) * chunksize]
  47. if self.shutoff_enabled():
  48. return
  49. self._process_chunk(chunk, errfile, donefile)
  50. def _process_chunk(self, chunk, errfile, donefile):
  51. pageids_to_revids = dict(chunk)
  52. res = self.site.api_query(
  53. action="query",
  54. prop="revisions",
  55. rvprop="ids",
  56. pageids="|".join(str(item[0]) for item in chunk),
  57. formatversion=2,
  58. )
  59. stage2 = []
  60. for pagedata in res["query"]["pages"]:
  61. pageid = pagedata["pageid"]
  62. if "title" not in pagedata:
  63. self.logger.info("Skipping pageid=%s, doesn't exist" % pageid)
  64. donefile.write("%d\n" % pageid)
  65. continue
  66. title = pagedata["title"]
  67. revid = pagedata["revisions"][0]["revid"]
  68. parentid = pagedata["revisions"][0]["parentid"]
  69. if pageids_to_revids[pageid] == revid:
  70. stage2.append(str(parentid))
  71. else:
  72. self.logger.info("Skipping [[%s]], not latest edit" % title)
  73. donefile.write("%d\n" % pageid)
  74. errfile.write("%s\n" % title.encode("utf8"))
  75. if not stage2:
  76. return
  77. res2 = self.site.api_query(
  78. action="query",
  79. prop="revisions",
  80. rvprop="content",
  81. rvslots="main",
  82. revids="|".join(stage2),
  83. formatversion=2,
  84. )
  85. for pagedata in res2["query"]["pages"]:
  86. revision = pagedata["revisions"][0]["slots"]["main"]
  87. if revision["contentmodel"] != "wikitext":
  88. continue
  89. pageid = pagedata["pageid"]
  90. title = pagedata["title"]
  91. content = revision["content"]
  92. self.logger.debug("Reverting one edit on [[%s]]" % title)
  93. page = self.site.get_page(title)
  94. page.edit(content, self.make_summary(self.summary), minor=True)
  95. donefile.write("%d\n" % pageid)
  96. if self.throttle:
  97. time.sleep(self.throttle)