Selaa lähdekoodia

Docstring updates in Site

tags/v0.1^2
Ben Kurtovic 12 vuotta sitten
vanhempi
commit
3cc62b4482
1 muutettua tiedostoa jossa 104 lisäystä ja 84 poistoa
  1. +104
    -84
      earwigbot/wiki/site.py

+ 104
- 84
earwigbot/wiki/site.py Näytä tiedosto

@@ -85,11 +85,11 @@ class Site(object):


This probably isn't necessary to call yourself unless you're building a This probably isn't necessary to call yourself unless you're building a
Site that's not in your config and you don't want to add it - normally Site that's not in your config and you don't want to add it - normally
all you need is tools.get_site(name), which creates the Site for you
all you need is wiki.get_site(name), which creates the Site for you
based on your config file and the sites database. We accept a bunch of based on your config file and the sites database. We accept a bunch of
kwargs, but the only ones you really "need" are `base_url` and
`script_path` - this is enough to figure out an API url. `login`, a
tuple of (username, password), is highly recommended. `cookiejar` will
kwargs, but the only ones you really "need" are *base_url* and
*script_path*; this is enough to figure out an API url. *login*, a
tuple of (username, password), is highly recommended. *cookiejar will
be used to store cookies, and we'll use a normal CookieJar if none is be used to store cookies, and we'll use a normal CookieJar if none is
given. given.


@@ -177,7 +177,7 @@ class Site(object):
return res.format(self.name, self.project, self.lang, self.domain) return res.format(self.name, self.project, self.lang, self.domain)


def _urlencode_utf8(self, params): def _urlencode_utf8(self, params):
"""Implement urllib.urlencode(params) with support for unicode input."""
"""Implement urllib.urlencode() with support for unicode input."""
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
args = [] args = []
for key, val in params.iteritems(): for key, val in params.iteritems():
@@ -187,30 +187,10 @@ class Site(object):
return "&".join(args) return "&".join(args)


def _api_query(self, params, tries=0, wait=5): def _api_query(self, params, tries=0, wait=5):
"""Do an API query with `params` as a dict of parameters.
"""Do an API query with *params* as a dict of parameters.


This will first attempt to construct an API url from self._base_url and
self._script_path. We need both of these, or else we'll raise
SiteAPIError. If self._base_url is protocol-relative (introduced in
MediaWiki 1.18), we'll choose HTTPS if self._user_https is True,
otherwise HTTP.

We'll encode the given params, adding format=json along the way, as
well as &assert= and &maxlag= based on self._assert_edit and _maxlag.
Additionally, we'll sleep a bit if the last query was made less than
self._wait_between_queries seconds ago. The request is made through
self._opener, which has cookie support (self._cookiejar), a User-Agent
(wiki.constants.USER_AGENT), and Accept-Encoding set to "gzip".

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise SiteAPIError with
details. If that reason was due to maxlag, we'll sleep for a bit and
then repeat the query until we exceed self._max_retries.

There's helpful MediaWiki API documentation at
<http://www.mediawiki.org/wiki/API>.
See the documentation for :py:meth:`api_query` for full implementation
details.
""" """
since_last_query = time() - self._last_query_time # Throttling support since_last_query = time() - self._last_query_time # Throttling support
if since_last_query < self._wait_between_queries: if since_last_query < self._wait_between_queries:
@@ -302,8 +282,8 @@ class Site(object):
was not given as a keyword argument. We'll do an API query to get the was not given as a keyword argument. We'll do an API query to get the
missing data, but only if there actually *is* missing data. missing data, but only if there actually *is* missing data.


Additionally, you can call this with `force=True` to forcibly reload
all attributes.
Additionally, you can call this with *force* set to True to forcibly
reload all attributes.
""" """
# All attributes to be loaded, except _namespaces, which is a special # All attributes to be loaded, except _namespaces, which is a special
# case because it requires additional params in the API query: # case because it requires additional params in the API query:
@@ -333,7 +313,7 @@ class Site(object):
def _load_namespaces(self, result): def _load_namespaces(self, result):
"""Fill self._namespaces with a dict of namespace IDs and names. """Fill self._namespaces with a dict of namespace IDs and names.


Called by _load_attributes() with API data as `result` when
Called by _load_attributes() with API data as *result* when
self._namespaces was not given as an kwarg to __init__(). self._namespaces was not given as an kwarg to __init__().
""" """
self._namespaces = {} self._namespaces = {}
@@ -464,8 +444,8 @@ class Site(object):
Raises LoginError on login errors (duh), like bad passwords and Raises LoginError on login errors (duh), like bad passwords and
nonexistent usernames. nonexistent usernames.


`login` is a (username, password) tuple. `token` is the token returned
from our first request, and `attempt` is to prevent getting stuck in a
*login* is a (username, password) tuple. *token* is the token returned
from our first request, and *attempt* is to prevent getting stuck in a
loop if MediaWiki isn't acting right. loop if MediaWiki isn't acting right.
""" """
name, password = login name, password = login
@@ -558,7 +538,34 @@ class Site(object):
def api_query(self, **kwargs): def api_query(self, **kwargs):
"""Do an API query with `kwargs` as the parameters. """Do an API query with `kwargs` as the parameters.


See _api_query()'s documentation for details.
This will first attempt to construct an API url from
:py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
both of these, or else we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError`. If
:py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
``True``, otherwise HTTP.

We'll encode the given params, adding ``format=json`` along the way, as
well as ``&assert=`` and ``&maxlag=`` based on
:py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively.
Additionally, we'll sleep a bit if the last query was made fewer than
:py:attr:`self._wait_between_queries` seconds ago. The request is made
through :py:attr:`self._opener`, which has cookie support
(:py:attr:`self._cookiejar`), a ``User-Agent``
(:py:const:`earwigbot.wiki.constants.USER_AGENT`), and
``Accept-Encoding`` set to ``"gzip"``.

Assuming everything went well, we'll gunzip the data (if compressed),
load it as a JSON object, and return it.

If our request failed for some reason, we'll raise
:py:exc:`~earwigbot.exceptions.SiteAPIError` with details. If that
reason was due to maxlag, we'll sleep for a bit and then repeat the
query until we exceed :py:attr:`self._max_retries`.

There is helpful MediaWiki API documentation at `MediaWiki.org
<http://www.mediawiki.org/wiki/API>`_.
""" """
return self._api_query(kwargs) return self._api_query(kwargs)


@@ -566,34 +573,33 @@ class Site(object):
cursor_class=None, show_table=False): cursor_class=None, show_table=False):
"""Do an SQL query and yield its results. """Do an SQL query and yield its results.


If `plain_query` is True, we will force an unparameterized query.
Specifying both params and plain_query will cause an error.

If `dict_cursor` is True, we will use oursql.DictCursor as our cursor,
otherwise the default oursql.Cursor. If `cursor_class` is given, it
will override this option.

If `show_table` is True, the name of the table will be prepended to the
name of the column. This will mainly affect a DictCursor.

Example:
>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

See _sql_connect() for information on how a connection is acquired.

<http://packages.python.org/oursql> has helpful documentation on the
oursql module.

This may raise SQLError() or one of oursql's exceptions
(oursql.ProgrammingError, oursql.InterfaceError, ...) if there were
problems with the query.
If *plain_query* is ``True``, we will force an unparameterized query.
Specifying both *params* and *plain_query* will cause an error. If
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
our cursor, otherwise the default :py:class:`oursql.Cursor`. If
*cursor_class* is given, it will override this option. If *show_table*
is True, the name of the table will be prepended to the name of the
column. This will mainly affect an :py:class:`~oursql.DictCursor`.

Example usage::

>>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
>>> params = ("The Earwig",)
>>> result1 = site.sql_query(query, params)
>>> result2 = site.sql_query(query, params, dict_cursor=True)
>>> for row in result1: print row
(7418060L, '20080703215134')
>>> for row in result2: print row
{'user_id': 7418060L, 'user_registration': '20080703215134'}

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the
query.

See :py:meth:`_sql_connect` for information on how a connection is
acquired. Also relevant is `oursql's documentation
<http://packages.python.org/oursql>`_ for details on that package.
""" """
if not cursor_class: if not cursor_class:
if dict_cursor: if dict_cursor:
@@ -612,11 +618,16 @@ class Site(object):


def get_replag(self): def get_replag(self):
"""Return the estimated database replication lag in seconds. """Return the estimated database replication lag in seconds.
Requires SQL access. This function only makes sense on a replicated Requires SQL access. This function only makes sense on a replicated
database (e.g. the Wikimedia Toolserver) and on a wiki that receives a database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
large number of edits (ideally, at least one per second), or the result large number of edits (ideally, at least one per second), or the result
may be larger than expected.
may be larger than expected, since it works by subtracting the current
time from the timestamp of the latest recent changes event.

This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems.
""" """
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
@@ -626,14 +637,16 @@ class Site(object):
def namespace_id_to_name(self, ns_id, all=False): def namespace_id_to_name(self, ns_id, all=False):
"""Given a namespace ID, returns associated namespace names. """Given a namespace ID, returns associated namespace names.


If all is False (default), we'll return the first name in the list,
which is usually the localized version. Otherwise, we'll return the
entire list, which includes the canonical name.
If *all* is ``False`` (default), we'll return the first name in the
list, which is usually the localized version. Otherwise, we'll return
the entire list, which includes the canonical name.


For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki;
returns [u"Wikipedia", u"Project", u"WP"] if ns_id=4 and all=True.
For example, this returns ``u"Wikipedia"`` if *ns_id* = ``4`` and
*all* = ``False`` on ``enwiki``; returns ``[u"Wikipedia", u"Project",
u"WP"]`` if *ns_id* = ``4`` and *all* is ``True``.


Raises NamespaceNotFoundError if the ID is not found.
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID
is not found.
""" """
try: try:
if all: if all:
@@ -647,10 +660,11 @@ class Site(object):
def namespace_name_to_id(self, name): def namespace_name_to_id(self, name):
"""Given a namespace name, returns the associated ID. """Given a namespace name, returns the associated ID.


Like namespace_id_to_name(), but reversed. Case is ignored, because
namespaces are assumed to be case-insensitive.
Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored,
because namespaces are assumed to be case-insensitive.


Raises NamespaceNotFoundError if the name is not found.
Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the
name is not found.
""" """
lname = name.lower() lname = name.lower()
for ns_id, names in self._namespaces.items(): for ns_id, names in self._namespaces.items():
@@ -662,14 +676,18 @@ class Site(object):
raise exceptions.NamespaceNotFoundError(e) raise exceptions.NamespaceNotFoundError(e)


def get_page(self, title, follow_redirects=False): def get_page(self, title, follow_redirects=False):
"""Returns a Page object for the given title (pagename).
"""Return a :py:class:`Page` object for the given title.


Will return a Category object instead if the given title is in the
category namespace. As Category is a subclass of Page, this should not
cause problems.
*follow_redirects* is passed directly to
:py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will
return a :py:class:`~earwigbot.wiki.category.Category` object instead
if the given title is in the category namespace. As
:py:class:`~earwigbot.wiki.category.Category` is a subclass of
:py:class:`~earwigbot.wiki.page.Page`, this should not cause problems.


Note that this doesn't do any direct checks for existence or Note that this doesn't do any direct checks for existence or
redirect-following - Page's methods provide that.
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods
provide that.
""" """
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True)
prefix = title.split(":", 1)[0] prefix = title.split(":", 1)[0]
@@ -679,20 +697,22 @@ class Site(object):
return Page(self, title, follow_redirects) return Page(self, title, follow_redirects)


def get_category(self, catname, follow_redirects=False): def get_category(self, catname, follow_redirects=False):
"""Returns a Category object for the given category name.
"""Returns a :py:class:`Category` object for the given category name.


`catname` should be given *without* a namespace prefix. This method is
really just shorthand for get_page("Category:" + catname).
*catname* should be given *without* a namespace prefix. This method is
really just shorthand for :py:meth:`get_page("Category:" + catname)
<get_page>`.
""" """
prefix = self.namespace_id_to_name(constants.NS_CATEGORY) prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
pagename = ':'.join((prefix, catname)) pagename = ':'.join((prefix, catname))
return Category(self, pagename, follow_redirects) return Category(self, pagename, follow_redirects)


def get_user(self, username=None): def get_user(self, username=None):
"""Returns a User object for the given username.
"""Returns a :py:class:`User` object for the given username.


If `username` is left as None, then a User object representing the
currently logged-in (or anonymous!) user is returned.
If *username* is left as ``None``, then a
:py:class:`~earwigbot.wiki.user.User` object representing the currently
logged-in (or anonymous!) user is returned.
""" """
if not username: if not username:
username = self._get_username() username = self._get_username()


Ladataan…
Peruuta
Tallenna