# -*- coding: utf-8 -*- # # Copyright (C) 2017 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import time from earwigbot.tasks import Task class BannerUntag(Task): """A task to undo mistaken tagging edits made by wikiproject_tagger.""" name = "banner_untag" number = 14 def run(self, **kwargs): self.site = self.bot.wiki.get_site() self.summary = kwargs["summary"] self.throttle = int(kwargs.get("throttle", 0)) rev_file = kwargs["rev-file"] done_file = kwargs["done-file"] error_file = kwargs["error-file"] with open(done_file) as donefp: done = [int(line) for line in donefp.read().splitlines()] with open(rev_file) as fp: data = [[int(x) for x in line.split("\t")] for line in fp.read().splitlines()] data = [item for item in data if item[0] not in done] with open(error_file, "a") as errfp: with open(done_file, "a") as donefp: self._process_data(data, errfp, donefp) def _process_data(self, data, errfile, donefile): chunksize = 50 for chunkidx in range((len(data) + chunksize - 1) / chunksize): chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize] if self.shutoff_enabled(): return self._process_chunk(chunk, errfile, donefile) def _process_chunk(self, chunk, errfile, donefile): pageids_to_revids = dict(chunk) res = self.site.api_query( action="query", prop="revisions", rvprop="ids", pageids="|".join(str(item[0]) for item in chunk), formatversion=2) stage2 = [] for pagedata in res["query"]["pages"]: pageid = pagedata["pageid"] if "title" not in pagedata: self.logger.info("Skipping pageid=%s, doesn't exist" % pageid) donefile.write("%d\n" % pageid) continue title = pagedata["title"] revid = pagedata["revisions"][0]["revid"] parentid = pagedata["revisions"][0]["parentid"] if pageids_to_revids[pageid] == revid: stage2.append(str(parentid)) else: self.logger.info(u"Skipping [[%s]], not latest edit" % title) donefile.write("%d\n" % pageid) errfile.write("%s\n" % title.encode("utf8")) if not stage2: return res2 = self.site.api_query( action="query", prop="revisions", rvprop="content", rvslots="main", revids="|".join(stage2), formatversion=2) for pagedata in res2["query"]["pages"]: revision = pagedata["revisions"][0]["slots"]["main"] if revision["contentmodel"] != "wikitext": continue pageid = pagedata["pageid"] title = pagedata["title"] content = revision["content"] self.logger.debug(u"Reverting one edit on [[%s]]" % title) page = self.site.get_page(title) page.edit(content, self.make_summary(self.summary), minor=True) donefile.write("%d\n" % pageid) if self.throttle: time.sleep(self.throttle)