|
@@ -38,26 +38,27 @@ class BannerUntag(Task): |
|
|
done_file = kwargs["done-file"] |
|
|
done_file = kwargs["done-file"] |
|
|
error_file = kwargs["error-file"] |
|
|
error_file = kwargs["error-file"] |
|
|
|
|
|
|
|
|
with open(rev_file) as fp: |
|
|
|
|
|
data = fp.read().splitlines() |
|
|
|
|
|
|
|
|
|
|
|
with open(done_file) as donefp: |
|
|
with open(done_file) as donefp: |
|
|
done = [int(line) for line in donefp.read().splitlines()] |
|
|
done = [int(line) for line in donefp.read().splitlines()] |
|
|
|
|
|
|
|
|
|
|
|
with open(rev_file) as fp: |
|
|
|
|
|
data = [[int(x) for x in line.split("\t")] |
|
|
|
|
|
for line in fp.read().splitlines()] |
|
|
|
|
|
data = [item for item in data if item[0] not in done] |
|
|
|
|
|
|
|
|
with open(error_file, "a") as errfp: |
|
|
with open(error_file, "a") as errfp: |
|
|
with open(done_file, "a") as donefp: |
|
|
with open(done_file, "a") as donefp: |
|
|
self._process_data(data, done, errfp, donefp) |
|
|
|
|
|
|
|
|
self._process_data(data, errfp, donefp) |
|
|
|
|
|
|
|
|
def _process_data(self, data, done, errfile, donefile): |
|
|
|
|
|
|
|
|
def _process_data(self, data, errfile, donefile): |
|
|
chunksize = 50 |
|
|
chunksize = 50 |
|
|
for chunkidx in range((len(data) + chunksize - 1) / chunksize): |
|
|
for chunkidx in range((len(data) + chunksize - 1) / chunksize): |
|
|
chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize] |
|
|
chunk = data[chunkidx*chunksize:(chunkidx+1)*chunksize] |
|
|
chunk = [[int(x) for x in line.split("\t")] for line in chunk] |
|
|
|
|
|
if self.shutoff_enabled(): |
|
|
if self.shutoff_enabled(): |
|
|
return |
|
|
return |
|
|
self._process_chunk(chunk, done, errfile, donefile) |
|
|
|
|
|
|
|
|
self._process_chunk(chunk, errfile, donefile) |
|
|
|
|
|
|
|
|
def _process_chunk(self, chunk, done, errfile, donefile): |
|
|
|
|
|
|
|
|
def _process_chunk(self, chunk, errfile, donefile): |
|
|
pageids_to_revids = dict(chunk) |
|
|
pageids_to_revids = dict(chunk) |
|
|
res = self.site.api_query( |
|
|
res = self.site.api_query( |
|
|
action="query", prop="revisions", rvprop="ids", |
|
|
action="query", prop="revisions", rvprop="ids", |
|
@@ -73,7 +74,6 @@ class BannerUntag(Task): |
|
|
stage2.append(str(parentid)) |
|
|
stage2.append(str(parentid)) |
|
|
else: |
|
|
else: |
|
|
self.logger.info(u"Skipping [[%s]], not latest edit" % title) |
|
|
self.logger.info(u"Skipping [[%s]], not latest edit" % title) |
|
|
done.append(pageid) |
|
|
|
|
|
donefile.write("%d\n" % pageid) |
|
|
donefile.write("%d\n" % pageid) |
|
|
errfile.write(u"%s\n" % title) |
|
|
errfile.write(u"%s\n" % title) |
|
|
|
|
|
|
|
@@ -95,7 +95,6 @@ class BannerUntag(Task): |
|
|
page = self.site.get_page(title) |
|
|
page = self.site.get_page(title) |
|
|
page.edit(content, self.summary, minor=True, bot=True) |
|
|
page.edit(content, self.summary, minor=True, bot=True) |
|
|
|
|
|
|
|
|
done.append(pageid) |
|
|
|
|
|
donefile.write("%d\n" % pageid) |
|
|
donefile.write("%d\n" % pageid) |
|
|
if self.throttle: |
|
|
if self.throttle: |
|
|
time.sleep(self.throttle) |
|
|
time.sleep(self.throttle) |