Browse Source

Use service delegation for Category.get_members().

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
e01ca0fd31
1 changed files with 35 additions and 34 deletions
  1. +35
    -34
      earwigbot/wiki/category.py

+ 35
- 34
earwigbot/wiki/category.py View File

@@ -51,6 +51,26 @@ class Category(Page):
"""Return a nice string representation of the Category.""" """Return a nice string representation of the Category."""
return '<Category "{0}" of {1}>'.format(self.title, str(self.site)) return '<Category "{0}" of {1}>'.format(self.title, str(self.site))


def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers",
"cmtitle": self.title}

while 1:
params["cmlimit"] = limit if limit else "max"
result = self.site.api_query(**params)
for member in result["query"]["categorymembers"]:
title = member["title"]
yield self.site.get_page(title, follow_redirects=follow)

if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"]
params["cmcontinue"] = qcontinue["cmcontinue"]
if limit:
limit -= len(result["query"]["categorymembers"])
else:
break

def _get_members_via_sql(self, limit, follow): def _get_members_via_sql(self, limit, follow):
"""Iterate over Pages in the category using SQL.""" """Iterate over Pages in the category using SQL."""
query = """SELECT page_title, page_namespace, page_id FROM page query = """SELECT page_title, page_namespace, page_id FROM page
@@ -75,27 +95,7 @@ class Category(Page):
yield self.site.get_page(title, follow_redirects=follow, yield self.site.get_page(title, follow_redirects=follow,
pageid=row[2]) pageid=row[2])


def _get_members_via_api(self, limit, follow):
"""Iterate over Pages in the category using the API."""
params = {"action": "query", "list": "categorymembers",
"cmtitle": self.title}

while 1:
params["cmlimit"] = limit if limit else "max"
result = self.site.api_query(**params)
for member in result["query"]["categorymembers"]:
title = member["title"]
yield self.site.get_page(title, follow_redirects=follow)

if "query-continue" in result:
qcontinue = result["query-continue"]["categorymembers"]
params["cmcontinue"] = qcontinue["cmcontinue"]
if limit:
limit -= len(result["query"]["categorymembers"])
else:
break

def _get_size_via_sql(self, member_type):
def _get_size_via_api(self, member_type):
query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?" query = "SELECT COUNT(*) FROM categorylinks WHERE cl_to = ?"
title = self.title.replace(" ", "_").split(":", 1)[1] title = self.title.replace(" ", "_").split(":", 1)[1]
if member_type == "size": if member_type == "size":
@@ -134,20 +134,20 @@ class Category(Page):
def subcats(self): def subcats(self):
return self._get_size("subcats") return self._get_size("subcats")


def get_members(self, use_sql=False, limit=None, follow_redirects=None):
def get_members(self, limit=None, follow_redirects=None):
"""Iterate over Pages in the category. """Iterate over Pages in the category.


If *use_sql* is ``True``, we will use a SQL query instead of the API.
Note that pages are retrieved from the API in chunks (by default, in
500-page chunks for normal users and 5000-page chunks for bots and
admins), so queries may be made as we go along. If *limit* is given, we
will provide this many pages, or less if the category is smaller. By
default, *limit* is ``None``, meaning we will keep iterating over
members until the category is exhausted. *follow_redirects* is passed
directly to :py:meth:`site.get_page()
If *limit* is given, we will provide this many pages, or less if the
category is smaller. By default, *limit* is ``None``, meaning we will
keep iterating over members until the category is exhausted.
*follow_redirects* is passed directly to :py:meth:`site.get_page()
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which <earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which
will use the value passed to our :py:meth:`__init__`. will use the value passed to our :py:meth:`__init__`.


This will use either the API or SQL depending on which are enabled and
the amount of lag on each. This is handled by :py:meth:`site.delegate()
<earwigbot.wiki.site.Site.delegate>`.

.. note:: .. note::
Be careful when iterating over very large categories with no limit. Be careful when iterating over very large categories with no limit.
If using the API, at best, you will make one query per 5000 pages, If using the API, at best, you will make one query per 5000 pages,
@@ -160,9 +160,10 @@ class Category(Page):
thousand, in which case the sheer number of titles in memory becomes thousand, in which case the sheer number of titles in memory becomes
problematic. problematic.
""" """
services = {
self.site.SERVICE_API: self._get_members_via_api,
self.site.SERVICE_SQL: self._get_members_via_sql
}
if follow_redirects is None: if follow_redirects is None:
follow_redirects = self._follow_redirects follow_redirects = self._follow_redirects
if use_sql:
return self._get_members_via_sql(limit, follow_redirects)
else:
return self._get_members_via_api(limit, follow_redirects)
return self.site.delegate(services, (follow_redirects,))

Loading…
Cancel
Save