Browse Source

fix edge case

main
Ben Kurtovic 7 years ago
parent
commit
c8bd66db2a
1 changed files with 9 additions and 10 deletions
  1. +9
    -10
      tasks/banner_untag.py

+ 9
- 10
tasks/banner_untag.py View File

@@ -38,26 +38,27 @@ class BannerUntag(Task):
done_file = kwargs["done-file"] done_file = kwargs["done-file"]
error_file = kwargs["error-file"] error_file = kwargs["error-file"]


with open(rev_file) as fp:
data = fp.read().splitlines()

with open(done_file) as donefp: with open(done_file) as donefp:
done = [int(line) for line in donefp.read().splitlines()] done = [int(line) for line in donefp.read().splitlines()]


with open(rev_file) as fp:
data = [[int(x) for x in line.split("\t")]
for line in fp.read().splitlines()]
data = [item for item in data if item[0] not in done]

with open(error_file, "a") as errfp: with open(error_file, "a") as errfp:
with open(done_file, "a") as donefp: with open(done_file, "a") as donefp:
self._process_data(data, done, errfp, donefp)
self._process_data(data, errfp, donefp)


def _process_data(self, data, done, errfile, donefile):
def _process_data(self, data, errfile, donefile):
chunksize = 50 chunksize = 50
for chunkidx in range((len(data) + chunksize - 1) / chunksize): for chunkidx in range((len(data) + chunksize - 1) / chunksize):
chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize] chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize]
chunk = [[int(x) for x in line.split("\t")] for line in chunk]
if self.shutoff_enabled(): if self.shutoff_enabled():
return return
self._process_chunk(chunk, done, errfile, donefile)
self._process_chunk(chunk, errfile, donefile)


def _process_chunk(self, chunk, done, errfile, donefile):
def _process_chunk(self, chunk, errfile, donefile):
pageids_to_revids = dict(chunk) pageids_to_revids = dict(chunk)
res = self.site.api_query( res = self.site.api_query(
action="query", prop="revisions", rvprop="ids", action="query", prop="revisions", rvprop="ids",
@@ -73,7 +74,6 @@ class BannerUntag(Task):
stage2.append(str(parentid)) stage2.append(str(parentid))
else: else:
self.logger.info(u"Skipping [[%s]], not latest edit" % title) self.logger.info(u"Skipping [[%s]], not latest edit" % title)
done.append(pageid)
donefile.write("%d\n" % pageid) donefile.write("%d\n" % pageid)
errfile.write(u"%s\n" % title) errfile.write(u"%s\n" % title)


@@ -95,7 +95,6 @@ class BannerUntag(Task):
page = self.site.get_page(title) page = self.site.get_page(title)
page.edit(content, self.summary, minor=True, bot=True) page.edit(content, self.summary, minor=True, bot=True)


done.append(pageid)
donefile.write("%d\n" % pageid) donefile.write("%d\n" % pageid)
if self.throttle: if self.throttle:
time.sleep(self.throttle) time.sleep(self.throttle)

Loading…
Cancel
Save