Browse Source

Add banner_untag task.

main
Ben Kurtovic 7 years ago
parent
commit
f8f44293d9
1 changed files with 101 additions and 0 deletions
  1. +101
    -0
      tasks/banner_untag.py

+ 101
- 0
tasks/banner_untag.py View File

@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2017 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import time

from earwigbot.tasks import Task

class BannerUntag(Task):
"""A task to undo mistaken tagging edits made by wikiproject_tagger."""
name = "banner_untag"
number = 14

def run(self, **kwargs):
self.site = self.bot.wiki.get_site()
self.summary = kwargs["summary"]
self.throttle = int(kwargs.get("throttle", 0))

rev_file = kwargs["rev-file"]
done_file = kwargs["done-file"]
error_file = kwargs["error-file"]

with open(rev_file) as fp:
data = fp.read().splitlines()

with open(done_file) as donefp:
done = [int(line) for line in donefp.read().splitlines()]

with open(error_file, "a") as errfp:
with open(done_file, "a") as donefp:
self._process_data(data, done, errfp, donefp)

def _process_data(self, data, done, errfile, donefile):
chunksize = 50
for chunkidx in range((len(data) + chunksize - 1) / chunksize):
chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize]
chunk = [[int(x) for x in line.split("\t")] for line in chunk]
if self.shutoff_enabled():
return
self._process_chunk(chunk, done, errfile, donefile)

def _process_chunk(self, chunk, done, errfile, donefile):
pageids_to_revids = dict(chunk)
res = self.site.api_query(
action="query", prop="revisions", rvprop="ids",
pageids="|".join(str(item[0]) for item in chunk), formatversion=2)

stage2 = []
for pagedata in res["query"]["pages"]:
pageid = pagedata["pageid"]
title = pagedata["title"]
revid = pagedata["revisions"][0]["revid"]
parentid = pagedata["revisions"][0]["parentid"]
if pageids_to_revids[pageid] == revid:
stage2.append(str(parentid))
else:
self.logger.info(u"Skipping [[%s]], not latest edit" % title)
done.append(pageid)
donefile.write("%d\n" % pageid)
errfile.write(u"%s\n" % title)

if not stage2:
return

res2 = self.site.api_query(
action="query", prop="revisions", rvprop="content",
revids="|".join(stage2), formatversion=2)

for pagedata in res2["query"]["pages"]:
if pagedata["revisions"][0]["contentmodel"] != "wikitext":
continue
pageid = pagedata["pageid"]
title = pagedata["title"]
content = pagedata["revisions"][0]["content"]

self.logger.debug(u"Reverting one edit on [[%s]]" % title)
page = self.site.get_page(title)
page.edit(content, self.summary, minor=True, bot=True)

done.append(pageid)
donefile.write("%d\n" % pageid)
if self.throttle:
time.sleep(self.throttle)

Loading…
Cancel
Save