Browse Source

Slightly more efficient when a page is encountered multiple times.

tags/v0.3
Ben Kurtovic 7 years ago
parent
commit
4018e1a82e
1 changed files with 18 additions and 2 deletions
  1. +18
    -2
      earwigbot/tasks/wikiproject_tagger.py

+ 18
- 2
earwigbot/tasks/wikiproject_tagger.py View File

@@ -215,7 +215,13 @@ class WikiProjectTagger(Task):


def process_category(self, page, job, recursive): def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category.""" """Try to tag all pages in the given category."""
if page.title in job.processed_cats:
self.logger.debug(u"Skipping category, already processed: [[%s]]",
page.title)
return
self.logger.info(u"Processing category: [[%s]]", page.title) self.logger.info(u"Processing category: [[%s]]", page.title)
job.processed_cats.add(page.title)

if job.tag_categories: if job.tag_categories:
self.process_page(page, job, is_category=True) self.process_page(page, job, is_category=True)
for member in page.get_members(): for member in page.get_members():
@@ -231,13 +237,20 @@ class WikiProjectTagger(Task):


def process_page(self, page, job, is_category=False): def process_page(self, page, job, is_category=False):
"""Try to tag a specific *page* using the *job* description.""" """Try to tag a specific *page* using the *job* description."""
if not page.is_talkpage:
page = page.toggle_talk()

if page.title in job.processed_pages:
self.logger.debug(u"Skipping page, already processed: [[%s]]",
page.title)
return
job.processed_pages.add(page.title)

if job.counter % 10 == 0: # Do a shutoff check every ten pages if job.counter % 10 == 0: # Do a shutoff check every ten pages
if self.shutoff_enabled(page.site): if self.shutoff_enabled(page.site):
raise _ShutoffEnabled() raise _ShutoffEnabled()
job.counter += 1 job.counter += 1


if not page.is_talkpage:
page = page.toggle_talk()
try: try:
code = page.parse() code = page.parse()
except exceptions.PageNotFoundError: except exceptions.PageNotFoundError:
@@ -438,7 +451,10 @@ class _Job(object):
self.nocreate = kwargs["nocreate"] self.nocreate = kwargs["nocreate"]
self.tag_categories = kwargs["tag_categories"] self.tag_categories = kwargs["tag_categories"]
self.dry_run = kwargs["dry_run"] self.dry_run = kwargs["dry_run"]

self.counter = 0 self.counter = 0
self.processed_cats = set()
self.processed_pages = set()




class _ShutoffEnabled(Exception): class _ShutoffEnabled(Exception):


Loading…
Cancel
Save