@@ -97,11 +97,11 @@ and the following methods: | |||||
- :py:meth:`namespace_name_to_id(name) | - :py:meth:`namespace_name_to_id(name) | ||||
<earwigbot.wiki.site.Site.namespace_name_to_id>`: given a namespace name, | <earwigbot.wiki.site.Site.namespace_name_to_id>`: given a namespace name, | ||||
returns the associated namespace ID | returns the associated namespace ID | ||||
- :py:meth:`get_page(title, follow_redirects=False) | |||||
- :py:meth:`get_page(title, follow_redirects=False, ...) | |||||
<earwigbot.wiki.site.Site.get_page>`: returns a ``Page`` object for the given | <earwigbot.wiki.site.Site.get_page>`: returns a ``Page`` object for the given | ||||
title (or a :py:class:`~earwigbot.wiki.category.Category` object if the | title (or a :py:class:`~earwigbot.wiki.category.Category` object if the | ||||
page's namespace is "``Category:``") | page's namespace is "``Category:``") | ||||
- :py:meth:`get_category(catname, follow_redirects=False) | |||||
- :py:meth:`get_category(catname, follow_redirects=False, ...) | |||||
<earwigbot.wiki.site.Site.get_category>`: returns a ``Category`` object for | <earwigbot.wiki.site.Site.get_category>`: returns a ``Category`` object for | ||||
the given title (sans namespace) | the given title (sans namespace) | ||||
- :py:meth:`get_user(username) <earwigbot.wiki.site.Site.get_user>`: returns a | - :py:meth:`get_user(username) <earwigbot.wiki.site.Site.get_user>`: returns a | ||||
@@ -120,7 +120,7 @@ provide the following attributes: | |||||
- :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding | - :py:attr:`~earwigbot.wiki.page.Page.site`: the page's corresponding | ||||
:py:class:`~earwigbot.wiki.site.Site` object | :py:class:`~earwigbot.wiki.site.Site` object | ||||
- :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename | - :py:attr:`~earwigbot.wiki.page.Page.title`: the page's title, or pagename | ||||
- :py:attr:`~earwigbot.wiki.page.Page.exists`: whether the page exists | |||||
- :py:attr:`~earwigbot.wiki.page.Page.exists`: whether or not the page exists | |||||
- :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the | - :py:attr:`~earwigbot.wiki.page.Page.pageid`: an integer ID representing the | ||||
page | page | ||||
- :py:attr:`~earwigbot.wiki.page.Page.url`: the page's URL | - :py:attr:`~earwigbot.wiki.page.Page.url`: the page's URL | ||||
@@ -166,9 +166,10 @@ or :py:meth:`site.get_page(title) <earwigbot.wiki.site.Site.get_page>` where | |||||
``title`` is in the ``Category:`` namespace) provide the following additional | ``title`` is in the ``Category:`` namespace) provide the following additional | ||||
method: | method: | ||||
- :py:meth:`get_members(use_sql=False, limit=None) | |||||
<earwigbot.wiki.category.Category.get_members>`: returns a list of page | |||||
titles in the category (limit is ``50`` by default if using the API) | |||||
- :py:meth:`get_members(use_sql=False, limit=None, ...) | |||||
<earwigbot.wiki.category.Category.get_members>`: iterates over | |||||
:py:class:`~earwigbot.wiki.page.Page`\ s in the category, until either the | |||||
category is exhausted or (if given) ``limit`` is reached | |||||
Users | Users | ||||
~~~~~ | ~~~~~ | ||||
@@ -178,6 +179,8 @@ Create :py:class:`earwigbot.wiki.User <earwigbot.wiki.user.User>` objects with | |||||
:py:meth:`page.get_creator() <earwigbot.wiki.page.Page.get_creator>`. They | :py:meth:`page.get_creator() <earwigbot.wiki.page.Page.get_creator>`. They | ||||
provide the following attributes: | provide the following attributes: | ||||
- :py:attr:`~earwigbot.wiki.user.User.site`: the user's corresponding | |||||
:py:class:`~earwigbot.wiki.site.Site` object | |||||
- :py:attr:`~earwigbot.wiki.user.User.name`: the user's username | - :py:attr:`~earwigbot.wiki.user.User.name`: the user's username | ||||
- :py:attr:`~earwigbot.wiki.user.User.exists`: ``True`` if the user exists, or | - :py:attr:`~earwigbot.wiki.user.User.exists`: ``True`` if the user exists, or | ||||
``False`` if they do not | ``False`` if they do not | ||||
@@ -23,7 +23,7 @@ | |||||
from earwigbot.commands import BaseCommand | from earwigbot.commands import BaseCommand | ||||
class Command(BaseCommand): | class Command(BaseCommand): | ||||
"""Links the user to the pending AFC submissions page and category.""" | |||||
"""Link the user to the pending AFC submissions page and category.""" | |||||
name = "pending" | name = "pending" | ||||
def check(self, data): | def check(self, data): | ||||
@@ -70,7 +70,7 @@ class Command(BaseCommand): | |||||
def get_page(self, title): | def get_page(self, title): | ||||
page = self.site.get_page(title, follow_redirects=False) | page = self.site.get_page(title, follow_redirects=False) | ||||
if page.exists[0]: | |||||
if page.exists == page.PAGE_EXISTS: | |||||
return page | return page | ||||
def report(self, page): | def report(self, page): | ||||
@@ -39,7 +39,7 @@ class Category(Page): | |||||
*Public methods:* | *Public methods:* | ||||
- :py:meth:`get_members`: returns a list of page titles in the category | |||||
- :py:meth:`get_members`: iterates over Pages in the category | |||||
""" | """ | ||||
def __repr__(self): | def __repr__(self): | ||||
@@ -51,8 +51,8 @@ class Category(Page): | |||||
"""Return a nice string representation of the Category.""" | """Return a nice string representation of the Category.""" | ||||
return '<Category "{0}" of {1}>'.format(self.title, str(self._site)) | return '<Category "{0}" of {1}>'.format(self.title, str(self._site)) | ||||
def _get_members_via_sql(self, limit): | |||||
"""Return a list of tuples of (title, pageid) in the category.""" | |||||
def _get_members_via_sql(self, limit, follow): | |||||
"""Iterate over Pages in the category using SQL.""" | |||||
query = """SELECT page_title, page_namespace, page_id FROM page | query = """SELECT page_title, page_namespace, page_id FROM page | ||||
JOIN categorylinks ON page_id = cl_from | JOIN categorylinks ON page_id = cl_from | ||||
WHERE cl_to = ?""" | WHERE cl_to = ?""" | ||||
@@ -64,42 +64,66 @@ class Category(Page): | |||||
else: | else: | ||||
result = self._site.sql_query(query, (title,)) | result = self._site.sql_query(query, (title,)) | ||||
members = [] | |||||
for row in result: | |||||
members = list(result) | |||||
for row in members: | |||||
base = row[0].replace("_", " ").decode("utf8") | base = row[0].replace("_", " ").decode("utf8") | ||||
namespace = self._site.namespace_id_to_name(row[1]) | namespace = self._site.namespace_id_to_name(row[1]) | ||||
if namespace: | if namespace: | ||||
title = u":".join((namespace, base)) | title = u":".join((namespace, base)) | ||||
else: # Avoid doing a silly (albeit valid) ":Pagename" thing | else: # Avoid doing a silly (albeit valid) ":Pagename" thing | ||||
title = base | title = base | ||||
members.append((title, row[2])) | |||||
return members | |||||
yield self._site.get_page(title, follow_redirects=follow, | |||||
pageid=row[2]) | |||||
def _get_members_via_api(self, limit): | |||||
"""Return a list of page titles in the category using the API.""" | |||||
def _get_members_via_api(self, limit, follow): | |||||
"""Iterate over Pages in the category using the API.""" | |||||
params = {"action": "query", "list": "categorymembers", | params = {"action": "query", "list": "categorymembers", | ||||
"cmlimit": limit, "cmtitle": self._title} | |||||
if not limit: | |||||
params["cmlimit"] = 50 # Default value | |||||
result = self._site.api_query(**params) | |||||
members = result['query']['categorymembers'] | |||||
return [member["title"] for member in members] | |||||
def get_members(self, use_sql=False, limit=None): | |||||
"""Return a list of page titles in the category. | |||||
"cmtitle": self._title} | |||||
while 1: | |||||
params["cmlimit"] = limit if limit else "max" | |||||
result = self._site.api_query(**params) | |||||
for member in result["query"]["categorymembers"]: | |||||
title = member["title"] | |||||
yield self._site.get_page(title, follow_redirects=follow) | |||||
if "query-continue" in result: | |||||
qcontinue = result["query-continue"]["categorymembers"] | |||||
params["cmcontinue"] = qcontinue["cmcontinue"] | |||||
if limit: | |||||
limit -= len(result["query"]["categorymembers"]) | |||||
else: | |||||
break | |||||
def get_members(self, use_sql=False, limit=None, follow_redirects=None): | |||||
"""Iterate over Pages in the category. | |||||
If *use_sql* is ``True``, we will use a SQL query instead of the API. | If *use_sql* is ``True``, we will use a SQL query instead of the API. | ||||
Pages will be returned as tuples of ``(title, pageid)`` instead of just | |||||
titles. | |||||
If *limit* is provided, we will provide this many titles, or less if | |||||
the category is smaller. It defaults to 50 for API queries; normal | |||||
users can go up to 500, and bots can go up to 5,000 on a single API | |||||
query. If we're using SQL, the limit is ``None`` by default (returning | |||||
all pages in the category), but an arbitrary limit can still be chosen. | |||||
Note that pages are retrieved from the API in chunks (by default, in | |||||
500-page chunks for normal users and 5000-page chunks for bots and | |||||
admins), so queries may be made as we go along. If *limit* is given, we | |||||
will provide this many pages, or less if the category is smaller. By | |||||
default, *limit* is ``None``, meaning we will keep iterating over | |||||
members until the category is exhausted. *follow_redirects* is passed | |||||
directly to :py:meth:`site.get_page() | |||||
<earwigbot.wiki.site.Site.get_page>`; it defaults to ``None``, which | |||||
will use the value passed to our :py:meth:`__init__`. | |||||
.. note:: | |||||
Be careful when iterating over very large categories with no limit. | |||||
If using the API, at best, you will make one query per 5000 pages, | |||||
which can add up significantly for categories with hundreds of | |||||
thousands of members. As for SQL, note that *all page titles are | |||||
stored internally* as soon as the query is made, so the site-wide | |||||
SQL lock can be freed and unrelated queries can be made without | |||||
requiring a separate connection to be opened. This is generally not | |||||
an issue unless your category's size approaches several hundred | |||||
thousand, in which case the sheer number of titles in memory becomes | |||||
problematic. | |||||
""" | """ | ||||
if follow_redirects is None: | |||||
follow_redirects = self._follow_redirects | |||||
if use_sql: | if use_sql: | ||||
return self._get_members_via_sql(limit) | |||||
return self._get_members_via_sql(limit, follow_redirects) | |||||
else: | else: | ||||
return self._get_members_via_api(limit) | |||||
return self._get_members_via_api(limit, follow_redirects) |
@@ -43,7 +43,7 @@ class Page(CopyrightMixin): | |||||
- :py:attr:`site`: the page's corresponding Site object | - :py:attr:`site`: the page's corresponding Site object | ||||
- :py:attr:`title`: the page's title, or pagename | - :py:attr:`title`: the page's title, or pagename | ||||
- :py:attr:`exists`: whether the page exists | |||||
- :py:attr:`exists`: whether or not the page exists | |||||
- :py:attr:`pageid`: an integer ID representing the page | - :py:attr:`pageid`: an integer ID representing the page | ||||
- :py:attr:`url`: the page's URL | - :py:attr:`url`: the page's URL | ||||
- :py:attr:`namespace`: the page's namespace as an integer | - :py:attr:`namespace`: the page's namespace as an integer | ||||
@@ -70,17 +70,20 @@ class Page(CopyrightMixin): | |||||
URL | URL | ||||
""" | """ | ||||
re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||||
PAGE_UNKNOWN = 0 | |||||
PAGE_INVALID = 1 | |||||
PAGE_MISSING = 2 | |||||
PAGE_EXISTS = 3 | |||||
def __init__(self, site, title, follow_redirects=False): | |||||
def __init__(self, site, title, follow_redirects=False, pageid=None): | |||||
"""Constructor for new Page instances. | """Constructor for new Page instances. | ||||
Takes three arguments: a Site object, the Page's title (or pagename), | |||||
and whether or not to follow redirects (optional, defaults to False). | |||||
Takes four arguments: a Site object, the Page's title (or pagename), | |||||
whether or not to follow redirects (optional, defaults to False), and | |||||
a page ID to supplement the title (optional, defaults to None - i.e., | |||||
we will have to query the API to get it). | |||||
As with User, site.get_page() is preferred. Site's method has support | |||||
for a default *follow_redirects* value in our config, while __init__() | |||||
always defaults to False. | |||||
As with User, site.get_page() is preferred. | |||||
__init__() will not do any API queries, but it will use basic namespace | __init__() will not do any API queries, but it will use basic namespace | ||||
logic to determine our namespace ID and if we are a talkpage. | logic to determine our namespace ID and if we are a talkpage. | ||||
@@ -89,9 +92,9 @@ class Page(CopyrightMixin): | |||||
self._site = site | self._site = site | ||||
self._title = title.strip() | self._title = title.strip() | ||||
self._follow_redirects = self._keep_following = follow_redirects | self._follow_redirects = self._keep_following = follow_redirects | ||||
self._pageid = pageid | |||||
self._exists = 0 | |||||
self._pageid = None | |||||
self._exists = self.PAGE_UNKNOWN | |||||
self._is_redirect = None | self._is_redirect = None | ||||
self._lastrevid = None | self._lastrevid = None | ||||
self._protection = None | self._protection = None | ||||
@@ -140,7 +143,7 @@ class Page(CopyrightMixin): | |||||
Note that validity != existence. If a page's title is invalid (e.g, it | Note that validity != existence. If a page's title is invalid (e.g, it | ||||
contains "[") it will always be invalid, and cannot be edited. | contains "[") it will always be invalid, and cannot be edited. | ||||
""" | """ | ||||
if self._exists == 1: | |||||
if self._exists == self.PAGE_INVALID: | |||||
e = "Page '{0}' is invalid.".format(self._title) | e = "Page '{0}' is invalid.".format(self._title) | ||||
raise exceptions.InvalidPageError(e) | raise exceptions.InvalidPageError(e) | ||||
@@ -152,7 +155,7 @@ class Page(CopyrightMixin): | |||||
It will also call _assert_validity() beforehand. | It will also call _assert_validity() beforehand. | ||||
""" | """ | ||||
self._assert_validity() | self._assert_validity() | ||||
if self._exists == 2: | |||||
if self._exists == self.PAGE_MISSING: | |||||
e = "Page '{0}' does not exist.".format(self._title) | e = "Page '{0}' does not exist.".format(self._title) | ||||
raise exceptions.PageNotFoundError(e) | raise exceptions.PageNotFoundError(e) | ||||
@@ -213,14 +216,14 @@ class Page(CopyrightMixin): | |||||
if "missing" in res: | if "missing" in res: | ||||
# If it has a negative ID and it's missing; we can still get | # If it has a negative ID and it's missing; we can still get | ||||
# data like the namespace, protection, and URL: | # data like the namespace, protection, and URL: | ||||
self._exists = 2 | |||||
self._exists = self.PAGE_MISSING | |||||
else: | else: | ||||
# If it has a negative ID and it's invalid, then break here, | # If it has a negative ID and it's invalid, then break here, | ||||
# because there's no other data for us to get: | # because there's no other data for us to get: | ||||
self._exists = 1 | |||||
self._exists = self.PAGE_INVALID | |||||
return | return | ||||
else: | else: | ||||
self._exists = 3 | |||||
self._exists = self.PAGE_EXISTS | |||||
self._fullurl = res["fullurl"] | self._fullurl = res["fullurl"] | ||||
self._protection = res["protection"] | self._protection = res["protection"] | ||||
@@ -312,7 +315,7 @@ class Page(CopyrightMixin): | |||||
if result["edit"]["result"] == "Success": | if result["edit"]["result"] == "Success": | ||||
self._content = None | self._content = None | ||||
self._basetimestamp = None | self._basetimestamp = None | ||||
self._exists = 0 | |||||
self._exists = self.PAGE_UNKNOWN | |||||
return | return | ||||
# If we're here, then the edit failed. If it's because of AssertEdit, | # If we're here, then the edit failed. If it's because of AssertEdit, | ||||
@@ -346,7 +349,7 @@ class Page(CopyrightMixin): | |||||
params["starttimestamp"] = self._starttimestamp | params["starttimestamp"] = self._starttimestamp | ||||
if self._basetimestamp: | if self._basetimestamp: | ||||
params["basetimestamp"] = self._basetimestamp | params["basetimestamp"] = self._basetimestamp | ||||
if self._exists == 2: | |||||
if self._exists == self.PAGE_MISSING: | |||||
# Page does not exist; don't edit if it already exists: | # Page does not exist; don't edit if it already exists: | ||||
params["createonly"] = "true" | params["createonly"] = "true" | ||||
else: | else: | ||||
@@ -384,7 +387,7 @@ class Page(CopyrightMixin): | |||||
# These attributes are now invalidated: | # These attributes are now invalidated: | ||||
self._content = None | self._content = None | ||||
self._basetimestamp = None | self._basetimestamp = None | ||||
self._exists = 0 | |||||
self._exists = self.PAGE_UNKNOWN | |||||
raise exceptions.EditConflictError(error.info) | raise exceptions.EditConflictError(error.info) | ||||
elif error.code in ["emptypage", "emptynewsection"]: | elif error.code in ["emptypage", "emptynewsection"]: | ||||
@@ -432,12 +435,12 @@ class Page(CopyrightMixin): | |||||
@property | @property | ||||
def site(self): | def site(self): | ||||
"""The Page's corresponding Site object.""" | |||||
"""The page's corresponding Site object.""" | |||||
return self._site | return self._site | ||||
@property | @property | ||||
def title(self): | def title(self): | ||||
"""The Page's title, or "pagename". | |||||
"""The page's title, or "pagename". | |||||
This won't do any API queries on its own. Any other attributes or | This won't do any API queries on its own. Any other attributes or | ||||
methods that do API queries will reload the title, however, like | methods that do API queries will reload the title, however, like | ||||
@@ -448,37 +451,36 @@ class Page(CopyrightMixin): | |||||
@property | @property | ||||
def exists(self): | def exists(self): | ||||
"""Information about whether the Page exists or not. | |||||
"""Whether or not the page exists. | |||||
The "information" is a tuple with two items. The first is a bool, | |||||
either ``True`` if the page exists or ``False`` if it does not. The | |||||
second is a string giving more information, either ``"invalid"``, | |||||
(title is invalid, e.g. it contains ``"["``), ``"missing"``, or | |||||
``"exists"``. | |||||
This will be a number; its value does not matter, but it will equal | |||||
one of :py:attr:`self.PAGE_INVALID <PAGE_INVALID>`, | |||||
:py:attr:`self.PAGE_MISSING <PAGE_MISSING>`, or | |||||
:py:attr:`self.PAGE_EXISTS <PAGE_EXISTS>`. | |||||
Makes an API query only if we haven't already made one. | Makes an API query only if we haven't already made one. | ||||
""" | """ | ||||
cases = { | |||||
0: (None, "unknown"), | |||||
1: (False, "invalid"), | |||||
2: (False, "missing"), | |||||
3: (True, "exists"), | |||||
} | |||||
if self._exists == 0: | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | self._load() | ||||
return cases[self._exists] | |||||
return self._exists | |||||
@property | @property | ||||
def pageid(self): | def pageid(self): | ||||
"""An integer ID representing the Page. | |||||
"""An integer ID representing the page. | |||||
Makes an API query only if we haven't already made one. | |||||
Makes an API query only if we haven't already made one and the *pageid* | |||||
parameter to :py:meth:`__init__` was left as ``None``, which should be | |||||
true for all cases except when pages are returned by an SQL generator | |||||
(like :py:meth:`category.get_members(use_sql=True) | |||||
<earwigbot.wiki.category.Category.get_members>`). | |||||
Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | Raises :py:exc:`~earwigbot.exceptions.InvalidPageError` or | ||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | ||||
invalid or the page does not exist, respectively. | invalid or the page does not exist, respectively. | ||||
""" | """ | ||||
if self._exists == 0: | |||||
if self._pageid: | |||||
return self._pageid | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | self._load() | ||||
self._assert_existence() # Missing pages do not have IDs | self._assert_existence() # Missing pages do not have IDs | ||||
return self._pageid | return self._pageid | ||||
@@ -518,7 +520,7 @@ class Page(CopyrightMixin): | |||||
name is invalid. Won't raise an error if the page is missing because | name is invalid. Won't raise an error if the page is missing because | ||||
those can still be create-protected. | those can still be create-protected. | ||||
""" | """ | ||||
if self._exists == 0: | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | self._load() | ||||
self._assert_validity() # Invalid pages cannot be protected | self._assert_validity() # Invalid pages cannot be protected | ||||
return self._protection | return self._protection | ||||
@@ -541,7 +543,7 @@ class Page(CopyrightMixin): | |||||
We will return ``False`` even if the page does not exist or is invalid. | We will return ``False`` even if the page does not exist or is invalid. | ||||
""" | """ | ||||
if self._exists == 0: | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | self._load() | ||||
return self._is_redirect | return self._is_redirect | ||||
@@ -606,7 +608,7 @@ class Page(CopyrightMixin): | |||||
Raises InvalidPageError or PageNotFoundError if the page name is | Raises InvalidPageError or PageNotFoundError if the page name is | ||||
invalid or the page does not exist, respectively. | invalid or the page does not exist, respectively. | ||||
""" | """ | ||||
if self._exists == 0: | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
# Kill two birds with one stone by doing an API query for both our | # Kill two birds with one stone by doing an API query for both our | ||||
# attributes and our page content: | # attributes and our page content: | ||||
query = self._site.api_query | query = self._site.api_query | ||||
@@ -621,7 +623,7 @@ class Page(CopyrightMixin): | |||||
if self._keep_following and self._is_redirect: | if self._keep_following and self._is_redirect: | ||||
self._title = self.get_redirect_target() | self._title = self.get_redirect_target() | ||||
self._keep_following = False # Don't follow double redirects | self._keep_following = False # Don't follow double redirects | ||||
self._exists = 0 # Force another API query | |||||
self._exists = self.PAGE_UNKNOWN # Force another API query | |||||
self.get() | self.get() | ||||
return self._content | return self._content | ||||
@@ -645,9 +647,10 @@ class Page(CopyrightMixin): | |||||
:py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | :py:exc:`~earwigbot.exceptions.RedirectError` if the page is not a | ||||
redirect. | redirect. | ||||
""" | """ | ||||
re_redirect = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||||
content = self.get() | content = self.get() | ||||
try: | try: | ||||
return re.findall(self.re_redirect, content, flags=re.I)[0] | |||||
return re.findall(re_redirect, content, flags=re.I)[0] | |||||
except IndexError: | except IndexError: | ||||
e = "The page does not appear to have a redirect target." | e = "The page does not appear to have a redirect target." | ||||
raise exceptions.RedirectError(e) | raise exceptions.RedirectError(e) | ||||
@@ -666,7 +669,7 @@ class Page(CopyrightMixin): | |||||
:py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | :py:exc:`~earwigbot.exceptions.PageNotFoundError` if the page name is | ||||
invalid or the page does not exist, respectively. | invalid or the page does not exist, respectively. | ||||
""" | """ | ||||
if self._exists == 0: | |||||
if self._exists == self.PAGE_UNKNOWN: | |||||
self._load() | self._load() | ||||
self._assert_existence() | self._assert_existence() | ||||
if not self._creator: | if not self._creator: | ||||
@@ -184,6 +184,12 @@ class Site(object): | |||||
res = "<Site {0} ({1}:{2}) at {3}>" | res = "<Site {0} ({1}:{2}) at {3}>" | ||||
return res.format(self.name, self.project, self.lang, self.domain) | return res.format(self.name, self.project, self.lang, self.domain) | ||||
def _unicodeify(self, value, encoding="utf8"): | |||||
"""Return input as unicode if it's not unicode to begin with.""" | |||||
if isinstance(value, unicode): | |||||
return value | |||||
return unicode(value, encoding) | |||||
def _urlencode_utf8(self, params): | def _urlencode_utf8(self, params): | ||||
"""Implement urllib.urlencode() with support for unicode input.""" | """Implement urllib.urlencode() with support for unicode input.""" | ||||
enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) | enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) | ||||
@@ -682,7 +688,7 @@ class Site(object): | |||||
e = "There is no namespace with name '{0}'.".format(name) | e = "There is no namespace with name '{0}'.".format(name) | ||||
raise exceptions.NamespaceNotFoundError(e) | raise exceptions.NamespaceNotFoundError(e) | ||||
def get_page(self, title, follow_redirects=False): | |||||
def get_page(self, title, follow_redirects=False, pageid=None): | |||||
"""Return a :py:class:`Page` object for the given title. | """Return a :py:class:`Page` object for the given title. | ||||
*follow_redirects* is passed directly to | *follow_redirects* is passed directly to | ||||
@@ -696,23 +702,26 @@ class Site(object): | |||||
redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods | redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods | ||||
provide that. | provide that. | ||||
""" | """ | ||||
title = self._unicodeify(title) | |||||
prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) | prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) | ||||
prefix = title.split(":", 1)[0] | prefix = title.split(":", 1)[0] | ||||
if prefix != title: # Avoid a page that is simply "Category" | if prefix != title: # Avoid a page that is simply "Category" | ||||
if prefix in prefixes: | if prefix in prefixes: | ||||
return Category(self, title, follow_redirects) | |||||
return Page(self, title, follow_redirects) | |||||
return Category(self, title, follow_redirects, pageid) | |||||
return Page(self, title, follow_redirects, pageid) | |||||
def get_category(self, catname, follow_redirects=False): | |||||
def get_category(self, catname, follow_redirects=False, pageid=None): | |||||
"""Return a :py:class:`Category` object for the given category name. | """Return a :py:class:`Category` object for the given category name. | ||||
*catname* should be given *without* a namespace prefix. This method is | *catname* should be given *without* a namespace prefix. This method is | ||||
really just shorthand for :py:meth:`get_page("Category:" + catname) | really just shorthand for :py:meth:`get_page("Category:" + catname) | ||||
<get_page>`. | <get_page>`. | ||||
""" | """ | ||||
catname = self._unicodeify(catname) | |||||
name = name if isinstance(name, unicode) else name.decode("utf8") | |||||
prefix = self.namespace_id_to_name(constants.NS_CATEGORY) | prefix = self.namespace_id_to_name(constants.NS_CATEGORY) | ||||
pagename = ':'.join((prefix, catname)) | |||||
return Category(self, pagename, follow_redirects) | |||||
pagename = u':'.join((prefix, catname)) | |||||
return Category(self, pagename, follow_redirects, pageid) | |||||
def get_user(self, username=None): | def get_user(self, username=None): | ||||
"""Return a :py:class:`User` object for the given username. | """Return a :py:class:`User` object for the given username. | ||||
@@ -721,6 +730,7 @@ class Site(object): | |||||
:py:class:`~earwigbot.wiki.user.User` object representing the currently | :py:class:`~earwigbot.wiki.user.User` object representing the currently | ||||
logged-in (or anonymous!) user is returned. | logged-in (or anonymous!) user is returned. | ||||
""" | """ | ||||
username = self._unicodeify(username) | |||||
if not username: | if not username: | ||||
username = self._get_username() | username = self._get_username() | ||||
return User(self, username) | return User(self, username) |
@@ -39,6 +39,7 @@ class User(object): | |||||
*Attributes:* | *Attributes:* | ||||
- :py:attr:`site`: the user's corresponding Site object | |||||
- :py:attr:`name`: the user's username | - :py:attr:`name`: the user's username | ||||
- :py:attr:`exists`: ``True`` if the user exists, else ``False`` | - :py:attr:`exists`: ``True`` if the user exists, else ``False`` | ||||
- :py:attr:`userid`: an integer ID representing the user | - :py:attr:`userid`: an integer ID representing the user | ||||
@@ -155,6 +156,11 @@ class User(object): | |||||
self._gender = res["gender"] | self._gender = res["gender"] | ||||
@property | @property | ||||
def site(self): | |||||
"""The user's corresponding Site object.""" | |||||
return self._site | |||||
@property | |||||
def name(self): | def name(self): | ||||
"""The user's username. | """The user's username. | ||||