Browse Source

Docstring updates in Site

tags/v0.1^2
Ben Kurtovic 12 years ago
parent
commit
3cc62b4482
1 changed files with 104 additions and 84 deletions
  1. +104
    -84
      earwigbot/wiki/site.py

+ 104
- 84
earwigbot/wiki/site.py View File

@@ -85,11 +85,11 @@ class Site(object):

This probably isn't necessary to call yourself unless you're building a
Site that's not in your config and you don't want to add it - normally
all you need is tools.get_site(name), which creates the Site for you
all you need is wiki.get_site(name), which creates the Site for you
based on your config file and the sites database. We accept a bunch of
kwargs, but the only ones you really "need" are `base_url` and
`script_path` - this is enough to figure out an API url. `login`, a
tuple of (username, password), is highly recommended. `cookiejar` will
kwargs, but the only ones you really "need" are *base_url* and
*script_path*; this is enough to figure out an API url. *login*, a
tuple of (username, password), is highly recommended. *cookiejar will
be used to store cookies, and we'll use a normal CookieJar if none is
given.

@@ -177,7 +177,7 @@ class Site(object):
return res.format(self.name, self.project, self.lang, self.domain)

def _urlencode_utf8(self, params):
"""Implement urllib.urlencode(params) with support for unicode input."""
"""Implement urllib.urlencode() with support for unicode input."""
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
args = []
for key, val in params.iteritems():
@@ -187,30 +187,10 @@ class Site(object):
return "&".join(args)

def _api_query(self, params, tries=0, wait=5):
"""Do an API query with `params` as a dict of parameters.
"""Do an API query with *params* as a dict of parameters.

This will first attempt to construct an API url from self._base_url and
self._script_path. We need both of these, or else we'll raise
SiteAPIError. If self._base_url is protocol-relative (introduced in
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True,
otherwise HTTP.

We'll encode the given params, adding format=json along the way, as
well as &assert= and &maxlag= based on self._assert_edit and _maxlag.
Additionally, we'll sleep a bit if the last query was made less than
self._wait_between_queries seconds ago. The request is made through
self._opener, which has cookie support (self._cookiejar), a User-Agent
(wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip".

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise SiteAPIError with
details. If that reason was due to maxlag, we'll sleep for a bit and
then repeat the query until we exceed self._max_retries.

There's helpful MediaWiki API documentation at
<http://www.mediawiki.org/wiki/API>.
See the documentation for :py:meth:`api_query` for full implementation
details.
"""
since_last_query = time() - self._last_query_time # Throttling support
if since_last_query < self._wait_between_queries:
@@ -302,8 +282,8 @@ class Site(object):
was not given as a keyword argument. We'll do an API query to get the
missing data, but only if there actually *is* missing data.

Additionally, you can call this with `force=True` to forcibly reload
all attributes.
Additionally, you can call this with *force* set to True to forcibly
reload all attributes.
"""
# All attributes to be loaded, except _namespaces, which is a special
# case because it requires additional params in the API query:
@@ -333,7 +313,7 @@ class Site(object):
def _load_namespaces(self, result):
"""Fill self._namespaces with a dict of namespace IDs and names.

Called by _load_attributes() with API data as `result` when
Called by _load_attributes() with API data as *result* when
self._namespaces was not given as an kwarg to __init__().
"""
self._namespaces = {}
@@ -464,8 +444,8 @@ class Site(object):
Raises LoginError on login errors (duh), like bad passwords and
nonexistent usernames.

`login` is a (username, password) tuple. `token` is the token returned
from our first request, and `attempt` is to prevent getting stuck in a
*login* is a (username, password) tuple. *token* is the token returned
from our first request, and *attempt* is to prevent getting stuck in a
loop if MediaWiki isn't acting right.
"""
name, password = login
@@ -558,7 +538,34 @@ class Site(object):
def api_query(self, **kwargs):
"""Do an API query with `kwargs` as the parameters.

See _api_query()'s documentation for details.
This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP.

We'll encode the given params, adding ``format=json`` along the way, as
well as ``&assert=`` and ``&maxlag=`` based on
:py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively.
Additionally, we'll sleep a bit if the last query was made fewer than
:py:attr:`self._wait_between_queries` seconds ago. The request is made
through :py:attr:`self._opener`, which has cookie support
(:py:attr:`self._cookiejar`), a ``User-Agent``
(:py:const:`earwigbot.wiki.constants.USER_AGENT`), and
``Accept-Encoding`` set to ``"gzip"``.

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`.

There is helpful MediaWiki API documentation at `MediaWiki.org
<http://www.mediawiki.org/wiki/API>`_.
"""
return self._api_query(kwargs)

@@ -566,34 +573,33 @@ class Site(object):
cursor_class=None, show_table=False):
"""Do an SQL query and yield its results.

If `plain_query` is True, we will force an unparameterized query.
Specifying both params and plain_query will cause an error.

If `dict_cursor` is True, we will use oursql.DictCursor as our cursor,
otherwise the default oursql.Cursor. If `cursor_class` is given, it
will override this option.

If `show_table` is True, the name of the table will be prepended to the
name of the column. This will mainly affect a DictCursor.

Example:
>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

See _sql_connect() for information on how a connection is acquired.

<http://packages.python.org/oursql> has helpful documentation on the
oursql module.

This may raise SQLError() or one of oursql's exceptions
(oursql.ProgrammingError, oursql.InterfaceError, ...) if there were
problems with the query.
If *plain_query* is ``True``, we will force an unparameterized query.
Specifying both *params* and *plain_query* will cause an error. If
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
our cursor, otherwise the default :py:class:`oursql.Cursor`. If
*cursor_class* is given, it will override this option. If *show_table*
is True, the name of the table will be prepended to the name of the
column. This will mainly affect an :py:class:`~oursql.DictCursor`.

Example usage::

>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the
query.

See :py:meth:`_sql_connect` for information on how a connection is
acquired. Also relevant is `oursql's documentation
<http://packages.python.org/oursql>`_ for details on that package.
"""
if not cursor_class:
if dict_cursor:
@@ -612,11 +618,16 @@ class Site(object):

def get_replag(self):
"""Return the estimated database replication lag in seconds.
Requires SQL access. This function only makes sense on a replicated
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
large number of edits (ideally, at least one per second), or the result
may be larger than expected.
may be larger than expected, since it works by subtracting the current
time from the timestamp of the latest recent changes event.

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems.
"""
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
@@ -626,14 +637,16 @@ class Site(object):
def namespace_id_to_name(self, ns_id, all=False):
"""Given a namespace ID, returns associated namespace names.

If all is False (default), we'll return the first name in the list,
which is usually the localized version. Otherwise, we'll return the
entire list, which includes the canonical name.
If *all* is ``False`` (default), we'll return the first name in the
list, which is usually the localized version. Otherwise, we'll return
the entire list, which includes the canonical name.

For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki;
returns [u"Wikipedia", u"Project", u"WP"] if ns_id=4 and all=True.
For example, this returns ``u"Wikipedia"`` if *ns_id* = ``4`` and
*all* = ``False`` on ``enwiki``; returns ``[u"Wikipedia", u"Project",
u"WP"]`` if *ns_id* = ``4`` and *all* is ``True``.

Raises NamespaceNotFoundError if the ID is not found.
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID
is not found.
"""
try:
if all:
@@ -647,10 +660,11 @@ class Site(object):
def namespace_name_to_id(self, name):
"""Given a namespace name, returns the associated ID.

Like namespace_id_to_name(), but reversed. Case is ignored, because
namespaces are assumed to be case-insensitive.
Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored,
because namespaces are assumed to be case-insensitive.

Raises NamespaceNotFoundError if the name is not found.
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the
name is not found.
"""
lname = name.lower()
for ns_id, names in self._namespaces.items():
@@ -662,14 +676,18 @@ class Site(object):
raise exceptions.NamespaceNotFoundError(e)

def get_page(self, title, follow_redirects=False):
"""Returns a Page object for the given title (pagename).
"""Return a :py:class:`Page` object for the given title.

Will return a Category object instead if the given title is in the
category namespace. As Category is a subclass of Page, this should not
cause problems.
*follow_redirects* is passed directly to
:py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will
return a :py:class:`~earwigbot.wiki.category.Category` object instead
if the given title is in the category namespace. As
:py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`~earwigbot.wiki.page.Page`, this should not cause problems.

Note that this doesn't do any direct checks for existence or
redirect-following - Page's methods provide that.
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods
provide that.
"""
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True)
prefix = title.split(":", 1)[0]
@@ -679,20 +697,22 @@ class Site(object):
return Page(self, title, follow_redirects)

def get_category(self, catname, follow_redirects=False):
"""Returns a Category object for the given category name.
"""Returns a :py:class:`Category` object for the given category name.

`catname` should be given *without* a namespace prefix. This method is
really just shorthand for get_page("Category:" + catname).
*catname* should be given *without* a namespace prefix. This method is
really just shorthand for :py:meth:`get_page("Category:" + catname)
<get_page>`.
"""
prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
pagename = ':'.join((prefix, catname))
return Category(self, pagename, follow_redirects)

def get_user(self, username=None):
"""Returns a User object for the given username.
"""Returns a :py:class:`User` object for the given username.

If `username` is left as None, then a User object representing the
currently logged-in (or anonymous!) user is returned.
If *username* is left as ``None``, then a
:py:class:`~earwigbot.wiki.user.User` object representing the currently
logged-in (or anonymous!) user is returned.
"""
if not username:
username = self._get_username()


Loading…
Cancel
Save