|
@@ -0,0 +1,101 @@ |
|
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
# |
|
|
|
|
|
# Copyright (C) 2017 Ben Kurtovic <ben.kurtovic@gmail.com> |
|
|
|
|
|
# |
|
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
|
|
|
|
# of this software and associated documentation files (the "Software"), to deal |
|
|
|
|
|
# in the Software without restriction, including without limitation the rights |
|
|
|
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
|
|
|
|
# copies of the Software, and to permit persons to whom the Software is |
|
|
|
|
|
# furnished to do so, subject to the following conditions: |
|
|
|
|
|
# |
|
|
|
|
|
# The above copyright notice and this permission notice shall be included in |
|
|
|
|
|
# all copies or substantial portions of the Software. |
|
|
|
|
|
# |
|
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
|
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
|
|
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
|
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
|
|
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
|
|
|
|
# SOFTWARE. |
|
|
|
|
|
|
|
|
|
|
|
import time |
|
|
|
|
|
|
|
|
|
|
|
from earwigbot.tasks import Task |
|
|
|
|
|
|
|
|
|
|
|
class BannerUntag(Task): |
|
|
|
|
|
"""A task to undo mistaken tagging edits made by wikiproject_tagger.""" |
|
|
|
|
|
name = "banner_untag" |
|
|
|
|
|
number = 14 |
|
|
|
|
|
|
|
|
|
|
|
def run(self, **kwargs): |
|
|
|
|
|
self.site = self.bot.wiki.get_site() |
|
|
|
|
|
self.summary = kwargs["summary"] |
|
|
|
|
|
self.throttle = int(kwargs.get("throttle", 0)) |
|
|
|
|
|
|
|
|
|
|
|
rev_file = kwargs["rev-file"] |
|
|
|
|
|
done_file = kwargs["done-file"] |
|
|
|
|
|
error_file = kwargs["error-file"] |
|
|
|
|
|
|
|
|
|
|
|
with open(rev_file) as fp: |
|
|
|
|
|
data = fp.read().splitlines() |
|
|
|
|
|
|
|
|
|
|
|
with open(done_file) as donefp: |
|
|
|
|
|
done = [int(line) for line in donefp.read().splitlines()] |
|
|
|
|
|
|
|
|
|
|
|
with open(error_file, "a") as errfp: |
|
|
|
|
|
with open(done_file, "a") as donefp: |
|
|
|
|
|
self._process_data(data, done, errfp, donefp) |
|
|
|
|
|
|
|
|
|
|
|
def _process_data(self, data, done, errfile, donefile): |
|
|
|
|
|
chunksize = 50 |
|
|
|
|
|
for chunkidx in range((len(data) + chunksize - 1) / chunksize): |
|
|
|
|
|
chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize] |
|
|
|
|
|
chunk = [[int(x) for x in line.split("\t")] for line in chunk] |
|
|
|
|
|
if self.shutoff_enabled(): |
|
|
|
|
|
return |
|
|
|
|
|
self._process_chunk(chunk, done, errfile, donefile) |
|
|
|
|
|
|
|
|
|
|
|
def _process_chunk(self, chunk, done, errfile, donefile): |
|
|
|
|
|
pageids_to_revids = dict(chunk) |
|
|
|
|
|
res = self.site.api_query( |
|
|
|
|
|
action="query", prop="revisions", rvprop="ids", |
|
|
|
|
|
pageids="|".join(str(item[0]) for item in chunk), formatversion=2) |
|
|
|
|
|
|
|
|
|
|
|
stage2 = [] |
|
|
|
|
|
for pagedata in res["query"]["pages"]: |
|
|
|
|
|
pageid = pagedata["pageid"] |
|
|
|
|
|
title = pagedata["title"] |
|
|
|
|
|
revid = pagedata["revisions"][0]["revid"] |
|
|
|
|
|
parentid = pagedata["revisions"][0]["parentid"] |
|
|
|
|
|
if pageids_to_revids[pageid] == revid: |
|
|
|
|
|
stage2.append(str(parentid)) |
|
|
|
|
|
else: |
|
|
|
|
|
self.logger.info(u"Skipping [[%s]], not latest edit" % title) |
|
|
|
|
|
done.append(pageid) |
|
|
|
|
|
donefile.write("%d\n" % pageid) |
|
|
|
|
|
errfile.write(u"%s\n" % title) |
|
|
|
|
|
|
|
|
|
|
|
if not stage2: |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
res2 = self.site.api_query( |
|
|
|
|
|
action="query", prop="revisions", rvprop="content", |
|
|
|
|
|
revids="|".join(stage2), formatversion=2) |
|
|
|
|
|
|
|
|
|
|
|
for pagedata in res2["query"]["pages"]: |
|
|
|
|
|
if pagedata["revisions"][0]["contentmodel"] != "wikitext": |
|
|
|
|
|
continue |
|
|
|
|
|
pageid = pagedata["pageid"] |
|
|
|
|
|
title = pagedata["title"] |
|
|
|
|
|
content = pagedata["revisions"][0]["content"] |
|
|
|
|
|
|
|
|
|
|
|
self.logger.debug(u"Reverting one edit on [[%s]]" % title) |
|
|
|
|
|
page = self.site.get_page(title) |
|
|
|
|
|
page.edit(content, self.summary, minor=True, bot=True) |
|
|
|
|
|
|
|
|
|
|
|
done.append(pageid) |
|
|
|
|
|
donefile.write("%d\n" % pageid) |
|
|
|
|
|
if self.throttle: |
|
|
|
|
|
time.sleep(self.throttle) |