@@ -4,6 +4,9 @@ | |||||
# Ignore bot-specific config file: | # Ignore bot-specific config file: | ||||
config.json | config.json | ||||
# Ignore cookies file: | |||||
.cookies | |||||
# Ignore OS X's crud: | # Ignore OS X's crud: | ||||
*.DS_Store | *.DS_Store | ||||
@@ -3,12 +3,11 @@ | |||||
"""Report the status of AFC submissions, either as an automatic message on join | """Report the status of AFC submissions, either as an automatic message on join | ||||
or a request via !status.""" | or a request via !status.""" | ||||
import json | |||||
import re | import re | ||||
import urllib | |||||
from core import config | from core import config | ||||
from irc.classes import BaseCommand | from irc.classes import BaseCommand | ||||
from wiki import tools | |||||
class AFCStatus(BaseCommand): | class AFCStatus(BaseCommand): | ||||
def get_hooks(self): | def get_hooks(self): | ||||
@@ -29,6 +28,8 @@ class AFCStatus(BaseCommand): | |||||
return False | return False | ||||
def process(self, data): | def process(self, data): | ||||
self.site = tools.get_site() | |||||
if data.line[1] == "JOIN": | if data.line[1] == "JOIN": | ||||
notice = self.get_join_notice() | notice = self.get_join_notice() | ||||
self.connection.notice(data.nick, notice) | self.connection.notice(data.nick, notice) | ||||
@@ -85,19 +86,15 @@ class AFCStatus(BaseCommand): | |||||
def count_submissions(self): | def count_submissions(self): | ||||
"""Returns the number of open AFC submissions (count of CAT:PEND).""" | """Returns the number of open AFC submissions (count of CAT:PEND).""" | ||||
params = {'action': 'query', 'list': 'categorymembers', 'cmlimit':'500', 'format': 'json'} | |||||
params['cmtitle'] = "Category:Pending_AfC_submissions" | |||||
data = urllib.urlencode(params) | |||||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||||
res = json.loads(raw) | |||||
subs = len(res['query']['categorymembers']) | |||||
cat = self.site.get_category("Pending AfC submissions") | |||||
subs = cat.members(limit=500) | |||||
subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions | subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions | ||||
return subs | return subs | ||||
def count_redirects(self): | def count_redirects(self): | ||||
"""Returns the number of open redirect submissions. Calculated as the | """Returns the number of open redirect submissions. Calculated as the | ||||
total number of submissions minus the closed ones.""" | total number of submissions minus the closed ones.""" | ||||
content = self.get_page("Wikipedia:Articles_for_creation/Redirects") | |||||
content = self.site.get_page("Wikipedia:Articles for creation/Redirects").get() | |||||
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | ||||
closed = content.lower().count("{{afc-c|b}}") | closed = content.lower().count("{{afc-c|b}}") | ||||
redirs = total - closed | redirs = total - closed | ||||
@@ -106,24 +103,12 @@ class AFCStatus(BaseCommand): | |||||
def count_files(self): | def count_files(self): | ||||
"""Returns the number of open WP:FFU (Files For Upload) requests. | """Returns the number of open WP:FFU (Files For Upload) requests. | ||||
Calculated as the total number of requests minus the closed ones.""" | Calculated as the total number of requests minus the closed ones.""" | ||||
content = self.get_page("Wikipedia:Files_for_upload") | |||||
content = self.site.get_page("Wikipedia:Files for upload").get() | |||||
total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) | ||||
closed = content.lower().count("{{ifu-c|b}}") | closed = content.lower().count("{{ifu-c|b}}") | ||||
files = total - closed | files = total - closed | ||||
return files | return files | ||||
def get_page(self, pagename): | |||||
"""Simple method to return the content of the page 'pagename'. Will be | |||||
a part of wiki/tools/ when I finish that.""" | |||||
params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'} | |||||
params['titles'] = pagename | |||||
data = urllib.urlencode(params) | |||||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||||
res = json.loads(raw) | |||||
pageid = res['query']['pages'].keys()[0] | |||||
content = res['query']['pages'][pageid]['revisions'][0]['*'] | |||||
return content | |||||
def get_aggregate(self, num): | def get_aggregate(self, num): | ||||
"""Returns a human-readable AFC status based on the number of pending | """Returns a human-readable AFC status based on the number of pending | ||||
AFC submissions, open redirect requests, and open FFU requests. This | AFC submissions, open redirect requests, and open FFU requests. This | ||||
@@ -4,10 +4,8 @@ | |||||
Retrieve a list of user rights for a given username via the API. | Retrieve a list of user rights for a given username via the API. | ||||
""" | """ | ||||
import json | |||||
import urllib | |||||
from irc.classes import BaseCommand | from irc.classes import BaseCommand | ||||
from wiki import tools | |||||
class Rights(BaseCommand): | class Rights(BaseCommand): | ||||
def get_hooks(self): | def get_hooks(self): | ||||
@@ -27,24 +25,14 @@ class Rights(BaseCommand): | |||||
return | return | ||||
username = ' '.join(data.args) | username = ' '.join(data.args) | ||||
rights = self.get_rights(username) | |||||
site = tools.get_site() | |||||
user = site.get_user(username) | |||||
rights = user.groups() | |||||
if rights: | if rights: | ||||
try: | |||||
rights.remove("*") # remove the implicit '*' group given to everyone | |||||
except ValueError: | |||||
pass | |||||
self.connection.reply(data, "the rights for \x0302{0}\x0301 are {1}.".format(username, ', '.join(rights))) | self.connection.reply(data, "the rights for \x0302{0}\x0301 are {1}.".format(username, ', '.join(rights))) | ||||
else: | else: | ||||
self.connection.reply(data, "the user \x0302{0}\x0301 has no rights, or does not exist.".format(username)) | self.connection.reply(data, "the user \x0302{0}\x0301 has no rights, or does not exist.".format(username)) | ||||
def get_rights(self, username): | |||||
params = {'action': 'query', 'format': 'json', 'list': 'users', 'usprop': 'groups'} | |||||
params['ususers'] = username | |||||
data = urllib.urlencode(params) | |||||
raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() | |||||
res = json.loads(raw) | |||||
try: | |||||
rights = res['query']['users'][0]['groups'] | |||||
except KeyError: # 'groups' not found, meaning the user does not exist | |||||
return None | |||||
try: | |||||
rights.remove("*") # remove the implicit '*' group given to everyone | |||||
except ValueError: # I don't expect this to happen, but if it does, be prepared | |||||
pass | |||||
return rights |
@@ -0,0 +1,20 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
EarwigBot's Wiki Toolset | |||||
This is a collection of classes and functions to read from and write to | |||||
Wikipedia and other wiki sites. No connection whatsoever to python-wikitools | |||||
written by Mr.Z-man, other than a similar purpose. We share no code. | |||||
Import the toolset with `from wiki import tools`. | |||||
""" | |||||
from wiki.tools.constants import * | |||||
from wiki.tools.exceptions import * | |||||
from wiki.tools.functions import * | |||||
from wiki.tools.category import Category | |||||
from wiki.tools.page import Page | |||||
from wiki.tools.site import Site | |||||
from wiki.tools.user import User |
@@ -0,0 +1,30 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from wiki.tools.page import Page | |||||
class Category(Page): | |||||
""" | |||||
EarwigBot's Wiki Toolset: Category Class | |||||
Represents a Category on a given Site, a subclass of Page. Provides | |||||
additional methods, but Page's own methods should work fine on Category | |||||
objects. Site.get_page() will return a Category instead of a Page if the | |||||
given title is in the category namespace; get_category() is shorthand, | |||||
because it accepts category names without the namespace prefix. | |||||
Public methods: | |||||
members -- returns a list of titles in the category | |||||
""" | |||||
def members(self, limit=50): | |||||
"""Returns a list of titles in the category. | |||||
If `limit` is provided, we will provide this many titles, or less if | |||||
the category is too small. `limit` defaults to 50; normal users can go | |||||
up to 500, and bots can go up to 5,000 on a single API query. | |||||
""" | |||||
params = {"action": "query", "list": "categorymembers", | |||||
"cmlimit": limit, "cmtitle": self.title} | |||||
result = self._site._api_query(params) | |||||
members = result['query']['categorymembers'] | |||||
return [member["title"] for member in members] |
@@ -0,0 +1,35 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
EarwigBot's Wiki Toolset: Constants | |||||
This module defines some useful constants, such as default namespace IDs for | |||||
easy lookup and our user agent. | |||||
Import with `from wiki.tools.constants import *`. | |||||
""" | |||||
import platform | |||||
# User agent when making API queries | |||||
USER_AGENT = "EarwigBot/0.1-dev (Python/{0}; https://github.com/earwig/earwigbot)".format(platform.python_version()) | |||||
# Default namespace IDs | |||||
NS_MAIN = 0 | |||||
NS_TALK = 1 | |||||
NS_USER = 2 | |||||
NS_USER_TALK = 3 | |||||
NS_PROJECT = 4 | |||||
NS_PROJECT_TALK = 5 | |||||
NS_FILE = 6 | |||||
NS_FILE_TALK = 7 | |||||
NS_MEDIAWIKI = 8 | |||||
NS_MEDIAWIKI_TALK = 9 | |||||
NS_TEMPLATE = 10 | |||||
NS_TEMPLATE_TALK = 11 | |||||
NS_HELP = 12 | |||||
NS_HELP_TALK = 13 | |||||
NS_CATEGORY = 14 | |||||
NS_CATEGORY_TALK = 15 | |||||
NS_SPECIAL = -1 | |||||
NS_MEDIA = -2 |
@@ -0,0 +1,47 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
EarwigBot's Wiki Toolset: Exceptions | |||||
This module contains all exceptions used by the wiki.tools package. | |||||
""" | |||||
class WikiToolsetError(Exception): | |||||
"""Base exception class for errors in the Wiki Toolset.""" | |||||
class SiteNotFoundError(WikiToolsetError): | |||||
"""A site matching the args given to get_site() could not be found in the | |||||
config file.""" | |||||
class SiteAPIError(WikiToolsetError): | |||||
"""We couldn't connect to a site's API, perhaps because the server doesn't | |||||
exist, our URL is wrong or incomplete, or they're having temporary | |||||
problems.""" | |||||
class LoginError(WikiToolsetError): | |||||
"""An error occured while trying to login. Perhaps the username/password is | |||||
incorrect.""" | |||||
class PermissionsError(WikiToolsetError): | |||||
"""We tried to do something we don't have permission to, like a non-admin | |||||
trying to delete a page, or trying to edit a page when no login information | |||||
was provided.""" | |||||
class NamespaceNotFoundError(WikiToolsetError): | |||||
"""A requested namespace name or namespace ID does not exist.""" | |||||
class PageNotFoundError(WikiToolsetError): | |||||
"""Attempting to get certain information about a page that does not | |||||
exist.""" | |||||
class InvalidPageError(WikiToolsetError): | |||||
"""Attempting to get certain information about a page whose title is | |||||
invalid.""" | |||||
class RedirectError(WikiToolsetError): | |||||
"""Page's get_redirect_target() method failed because the page is either | |||||
not a redirect, or it is malformed.""" | |||||
class UserNotFoundError(WikiToolsetError): | |||||
"""Attempting to get certain information about a user that does not | |||||
exist.""" |
@@ -0,0 +1,181 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
EarwigBot's Wiki Toolset: Misc Functions | |||||
This module, a component of the wiki.tools package, contains miscellaneous | |||||
functions that are not methods of any class, like get_site(). | |||||
There's no need to import this module explicitly. All functions here are | |||||
automatically available from wiki.tools. | |||||
""" | |||||
from cookielib import LWPCookieJar, LoadError | |||||
import errno | |||||
from getpass import getpass | |||||
from os import chmod, path | |||||
import stat | |||||
from core import config | |||||
from wiki.tools.exceptions import SiteNotFoundError | |||||
from wiki.tools.site import Site | |||||
__all__ = ["get_site"] | |||||
_cookiejar = None | |||||
def _load_config(): | |||||
"""Called by a config-requiring function, such as get_site(), when config | |||||
has not been loaded. This will usually happen only if we're running code | |||||
directly from Python's interpreter and not the bot itself, because | |||||
earwigbot.py or core/main.py will already call these functions. | |||||
""" | |||||
is_encrypted = config.verify_config() | |||||
if is_encrypted: # passwords in the config file are encrypted | |||||
key = getpass("Enter key to unencrypt bot passwords: ") | |||||
config.parse_config(key) | |||||
else: | |||||
config.parse_config(None) | |||||
def _get_cookiejar(): | |||||
"""Returns a LWPCookieJar object loaded from our .cookies file. The same | |||||
one is returned every time. | |||||
The .cookies file is located in the project root, same directory as | |||||
config.json and earwigbot.py. If it doesn't exist, we will create the file | |||||
and set it to be readable and writeable only by us. If it exists but the | |||||
information inside is bogus, we will ignore it. | |||||
This is normally called by _get_site_object_from_dict() (in turn called by | |||||
get_site()), and the cookiejar is passed to our Site's constructor, used | |||||
when it makes API queries. This way, we can easily preserve cookies between | |||||
sites (e.g., for CentralAuth), making logins easier. | |||||
""" | |||||
global _cookiejar | |||||
if _cookiejar is not None: | |||||
return _cookiejar | |||||
cookie_file = path.join(config.root_dir, ".cookies") | |||||
_cookiejar = LWPCookieJar(cookie_file) | |||||
try: | |||||
_cookiejar.load() | |||||
except LoadError: | |||||
# file contains bad data, so ignore it completely | |||||
pass | |||||
except IOError as e: | |||||
if e.errno == errno.ENOENT: # "No such file or directory" | |||||
# create the file and restrict reading/writing only to the owner, | |||||
# so others can't peak at our cookies | |||||
open(cookie_file, "w").close() | |||||
chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
else: | |||||
raise | |||||
return _cookiejar | |||||
def _get_site_object_from_dict(name, d): | |||||
"""Return a Site object based on the contents of a dict, probably acquired | |||||
through our config file, and a separate name. | |||||
""" | |||||
project = d.get("project") | |||||
lang = d.get("lang") | |||||
base_url = d.get("baseURL") | |||||
article_path = d.get("articlePath") | |||||
script_path = d.get("scriptPath") | |||||
sql = (d.get("sqlServer"), d.get("sqlDB")) | |||||
namespaces = d.get("namespaces") | |||||
login = (config.wiki.get("username"), config.wiki.get("password")) | |||||
cookiejar = _get_cookiejar() | |||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, sql=sql, | |||||
namespaces=namespaces, login=login, cookiejar=cookiejar) | |||||
def get_site(name=None, project=None, lang=None): | |||||
"""Returns a Site instance based on information from our config file. | |||||
With no arguments, returns the default site as specified by our config | |||||
file. This is default = config.wiki["defaultSite"]; | |||||
config.wiki["sites"][default]. | |||||
With `name` specified, returns the site specified by | |||||
config.wiki["sites"][name]. | |||||
With `project` and `lang` specified, returns the site specified by the | |||||
member of config.wiki["sites"], `s`, for which s["project"] == project and | |||||
s["lang"] == lang. | |||||
We will attempt to login to the site automatically | |||||
using config.wiki["username"] and config.wiki["password"] if both are | |||||
defined. | |||||
Specifying a project without a lang or a lang without a project will raise | |||||
TypeError. If all three args are specified, `name` will be first tried, | |||||
then `project` and `lang`. If, with any number of args, a site cannot be | |||||
found in the config, SiteNotFoundError is raised. | |||||
""" | |||||
# check if config has been loaded, and load it if it hasn't | |||||
if not config.is_config_loaded(): | |||||
_load_config() | |||||
# someone specified a project without a lang (or a lang without a project)! | |||||
if (project is None and lang is not None) or (project is not None and | |||||
lang is None): | |||||
e = "Keyword arguments 'lang' and 'project' must be specified together." | |||||
raise TypeError(e) | |||||
# no args given, so return our default site (project is None implies lang | |||||
# is None, so we don't need to add that in) | |||||
if name is None and project is None: | |||||
try: | |||||
default = config.wiki["defaultSite"] | |||||
except KeyError: | |||||
e = "Default site is not specified in config." | |||||
raise SiteNotFoundError(e) | |||||
try: | |||||
site = config.wiki["sites"][default] | |||||
except KeyError: | |||||
e = "Default site specified by config is not in the config's sites list." | |||||
raise SiteNotFoundError(e) | |||||
return _get_site_object_from_dict(default, site) | |||||
# name arg given, but don't look at others unless `name` isn't found | |||||
if name is not None: | |||||
try: | |||||
site = config.wiki["sites"][name] | |||||
except KeyError: | |||||
if project is None: # implies lang is None, so only name was given | |||||
e = "Site '{0}' not found in config.".format(name) | |||||
raise SiteNotFoundError(e) | |||||
for sitename, site in config.wiki["sites"].items(): | |||||
if site["project"] == project and site["lang"] == lang: | |||||
return _get_site_object_from_dict(sitename, site) | |||||
e = "Neither site '{0}' nor site '{1}:{2}' found in config." | |||||
e.format(name, project, lang) | |||||
raise SiteNotFoundError(e) | |||||
else: | |||||
return _get_site_object_from_dict(name, site) | |||||
# if we end up here, then project and lang are both not None | |||||
for sitename, site in config.wiki["sites"].items(): | |||||
if site["project"] == project and site["lang"] == lang: | |||||
return _get_site_object_from_dict(sitename, site) | |||||
e = "Site '{0}:{1}' not found in config.".format(project, lang) | |||||
raise SiteNotFoundError(e) | |||||
def add_site(): | |||||
"""STUB: config editing is required first. | |||||
Returns True if the site was added successfully or False if the site was | |||||
already in our config. Raises ConfigError if saving the updated file failed | |||||
for some reason.""" | |||||
pass | |||||
def del_site(name): | |||||
"""STUB: config editing is required first. | |||||
Returns True if the site was removed successfully or False if the site was | |||||
not in our config originally. Raises ConfigError if saving the updated file | |||||
failed for some reason.""" | |||||
pass |
@@ -0,0 +1,414 @@ | |||||
# -*- coding: utf-8 -*- | |||||
import re | |||||
from urllib import quote | |||||
from wiki.tools.exceptions import * | |||||
class Page(object): | |||||
""" | |||||
EarwigBot's Wiki Toolset: Page Class | |||||
Represents a Page on a given Site. Has methods for getting information | |||||
about the page, getting page content, and so on. Category is a subclass of | |||||
Page with additional methods. | |||||
Public methods: | |||||
title -- returns the page's title, or pagename | |||||
exists -- returns whether the page exists | |||||
pageid -- returns an integer ID representing the page | |||||
url -- returns the page's URL | |||||
namespace -- returns the page's namespace as an integer | |||||
protection -- returns the page's current protection status | |||||
is_talkpage -- returns True if the page is a talkpage, else False | |||||
is_redirect -- returns True if the page is a redirect, else False | |||||
toggle_talk -- returns a content page's talk page, or vice versa | |||||
get -- returns page content | |||||
get_redirect_target -- if the page is a redirect, returns its destination | |||||
""" | |||||
def __init__(self, site, title, follow_redirects=False): | |||||
"""Constructor for new Page instances. | |||||
Takes three arguments: a Site object, the Page's title (or pagename), | |||||
and whether or not to follow redirects (optional, defaults to False). | |||||
As with User, site.get_page() is preferred. Site's method has support | |||||
for a default `follow_redirects` value in our config, while __init__ | |||||
always defaults to False. | |||||
__init__ will not do any API queries, but it will use basic namespace | |||||
logic to determine our namespace ID and if we are a talkpage. | |||||
""" | |||||
self._site = site | |||||
self._title = title.strip() | |||||
self._follow_redirects = self._keep_following = follow_redirects | |||||
self._exists = 0 | |||||
self._pageid = None | |||||
self._is_redirect = None | |||||
self._lastrevid = None | |||||
self._protection = None | |||||
self._fullurl = None | |||||
self._content = None | |||||
# Try to determine the page's namespace using our site's namespace | |||||
# converter: | |||||
prefix = self._title.split(":", 1)[0] | |||||
if prefix != title: # ignore a page that's titled "Category" or "User" | |||||
try: | |||||
self._namespace = self._site.namespace_name_to_id(prefix) | |||||
except NamespaceNotFoundError: | |||||
self._namespace = 0 | |||||
else: | |||||
self._namespace = 0 | |||||
# Is this a talkpage? Talkpages have odd IDs, while content pages have | |||||
# even IDs, excluding the "special" namespaces: | |||||
if self._namespace < 0: | |||||
self._is_talkpage = False | |||||
else: | |||||
self._is_talkpage = self._namespace % 2 == 1 | |||||
def _force_validity(self): | |||||
"""Used to ensure that our page's title is valid. | |||||
If this method is called when our page is not valid (and after | |||||
_load_attributes() has been called), InvalidPageError will be raised. | |||||
Note that validity != existence. If a page's title is invalid (e.g, it | |||||
contains "[") it will always be invalid, and cannot be edited. | |||||
""" | |||||
if self._exists == 1: | |||||
e = "Page '{0}' is invalid.".format(self._title) | |||||
raise InvalidPageError(e) | |||||
def _force_existence(self): | |||||
"""Used to ensure that our page exists. | |||||
If this method is called when our page doesn't exist (and after | |||||
_load_attributes() has been called), PageNotFoundError will be raised. | |||||
It will also call _force_validity() beforehand. | |||||
""" | |||||
self._force_validity() | |||||
if self._exists == 2: | |||||
e = "Page '{0}' does not exist.".format(self._title) | |||||
raise PageNotFoundError(e) | |||||
def _load_wrapper(self): | |||||
"""Calls _load_attributes() and follows redirects if we're supposed to. | |||||
This method will only follow redirects if follow_redirects=True was | |||||
passed to __init__() (perhaps indirectly passed by site.get_page()). | |||||
It avoids the API's &redirects param in favor of manual following, | |||||
so we can act more realistically (we don't follow double redirects, and | |||||
circular redirects don't break us). | |||||
This will raise RedirectError if we have a problem following, but that | |||||
is a bug and should NOT happen. | |||||
If we're following a redirect, this will make a grand total of three | |||||
API queries. It's a lot, but each one is quite small. | |||||
""" | |||||
self._load_attributes() | |||||
if self._keep_following and self._is_redirect: | |||||
self._title = self.get_redirect_target() | |||||
self._keep_following = False # don't follow double redirects | |||||
self._content = None # reset the content we just loaded | |||||
self._load_attributes() | |||||
def _load_attributes(self, result=None): | |||||
"""Loads various data from the API in a single query. | |||||
Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, | |||||
._protection, ._namespace, ._is_talkpage, and ._lastrevid using the | |||||
API. It will do a query of its own unless `result` is provided, in | |||||
which case we'll pretend `result` is what the query returned. | |||||
Assuming the API is sound, this should not raise any exceptions. | |||||
""" | |||||
if result is None: | |||||
params = {"action": "query", "prop": "info", "titles": self._title, | |||||
"inprop": "protection|url"} | |||||
result = self._site._api_query(params) | |||||
res = result["query"]["pages"].values()[0] | |||||
# Normalize our pagename/title thing: | |||||
self._title = res["title"] | |||||
try: | |||||
res["redirect"] | |||||
except KeyError: | |||||
self._is_redirect = False | |||||
else: | |||||
self._is_redirect = True | |||||
self._pageid = result["query"]["pages"].keys()[0] | |||||
if int(self._pageid) < 0: | |||||
try: | |||||
res["missing"] | |||||
except KeyError: | |||||
# If it has a negative ID and it's invalid, then break here, | |||||
# because there's no other data for us to get: | |||||
self._exists = 1 | |||||
return | |||||
else: | |||||
# If it has a negative ID and it's missing; we can still get | |||||
# data like the namespace, protection, and URL: | |||||
self._exists = 2 | |||||
else: | |||||
self._exists = 3 | |||||
self._fullurl = res["fullurl"] | |||||
self._protection = res["protection"] | |||||
# We've determined the namespace and talkpage status in __init__() | |||||
# based on the title, but now we can be sure: | |||||
self._namespace = res["ns"] | |||||
self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs | |||||
# This last field will only be specified if the page exists: | |||||
try: | |||||
self._lastrevid = res["lastrevid"] | |||||
except KeyError: | |||||
pass | |||||
def _load_content(self, result=None): | |||||
"""Loads current page content from the API. | |||||
If `result` is provided, we'll pretend that is the result of an API | |||||
query and try to get content from that. Otherwise, we'll do an API | |||||
query on our own. | |||||
Don't call this directly, ever - use .get(force=True) if you want to | |||||
force content reloading. | |||||
""" | |||||
if result is None: | |||||
params = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||||
"rvprop": "content", "titles": self._title} | |||||
result = self._site._api_query(params) | |||||
res = result["query"]["pages"].values()[0] | |||||
try: | |||||
content = res["revisions"][0]["*"] | |||||
self._content = content | |||||
except KeyError: | |||||
# This can only happen if the page was deleted since we last called | |||||
# self._load_attributes(). In that case, some of our attributes are | |||||
# outdated, so force another self._load_attributes(): | |||||
self._load_attributes() | |||||
self._force_existence() | |||||
def title(self, force=False): | |||||
"""Returns the Page's title, or pagename. | |||||
This won't do any API queries on its own unless force is True, in which | |||||
case the title will be forcibly reloaded from the API (normalizing it, | |||||
and following redirects if follow_redirects=True was passed to | |||||
__init__()). Any other methods that do API queries will reload title on | |||||
their own, however, like exists() and get(). | |||||
""" | |||||
if force: | |||||
self._load_wrapper() | |||||
return self._title | |||||
def exists(self, force=False): | |||||
"""Returns information about whether the Page exists or not. | |||||
The returned "information" is a tuple with two items. The first is a | |||||
bool, either True if the page exists or False if it does not. The | |||||
second is a string giving more information, either "invalid", (title | |||||
is invalid, e.g. it contains "["), "missing", or "exists". | |||||
Makes an API query if force is True or if we haven't already made one. | |||||
""" | |||||
cases = { | |||||
0: (None, "unknown"), | |||||
1: (False, "invalid"), | |||||
2: (False, "missing"), | |||||
3: (True, "exists"), | |||||
} | |||||
if self._exists == 0 or force: | |||||
self._load_wrapper() | |||||
return cases[self._exists] | |||||
def pageid(self, force=False): | |||||
"""Returns an integer ID representing the Page. | |||||
Makes an API query if force is True or if we haven't already made one. | |||||
Raises InvalidPageError or PageNotFoundError if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
""" | |||||
if self._exists == 0 or force: | |||||
self._load_wrapper() | |||||
self._force_existence() # missing pages do not have IDs | |||||
return self._pageid | |||||
def url(self, force=False): | |||||
"""Returns the page's URL. | |||||
Like title(), this won't do any API queries on its own unless force is | |||||
True. If the API was never queried for this page, we will attempt to | |||||
determine the URL ourselves based on the title. | |||||
""" | |||||
if force: | |||||
self._load_wrapper() | |||||
if self._fullurl is not None: | |||||
return self._fullurl | |||||
else: | |||||
slug = quote(self._title.replace(" ", "_"), safe="/:") | |||||
path = self._site._article_path.replace("$1", slug) | |||||
return ''.join((self._site._base_url, path)) | |||||
def namespace(self, force=False): | |||||
"""Returns the page's namespace ID (an integer). | |||||
Like title(), this won't do any API queries on its own unless force is | |||||
True. If the API was never queried for this page, we will attempt to | |||||
determine the namespace ourselves based on the title. | |||||
""" | |||||
if force: | |||||
self._load_wrapper() | |||||
return self._namespace | |||||
def protection(self, force=False): | |||||
"""Returns the page's current protection status. | |||||
Makes an API query if force is True or if we haven't already made one. | |||||
Raises InvalidPageError if the page name is invalid. Will not raise an | |||||
error if the page is missing because those can still be protected. | |||||
""" | |||||
if self._exists == 0 or force: | |||||
self._load_wrapper() | |||||
self._force_validity() # invalid pages cannot be protected | |||||
return self._protection | |||||
def is_talkpage(self, force=False): | |||||
"""Returns True if the page is a talkpage, else False. | |||||
Like title(), this won't do any API queries on its own unless force is | |||||
True. If the API was never queried for this page, we will attempt to | |||||
determine the talkpage status ourselves based on its namespace ID. | |||||
""" | |||||
if force: | |||||
self._load_wrapper() | |||||
return self._is_talkpage | |||||
def is_redirect(self, force=False): | |||||
"""Returns True if the page is a redirect, else False. | |||||
Makes an API query if force is True or if we haven't already made one. | |||||
We will return False even if the page does not exist or is invalid. | |||||
""" | |||||
if self._exists == 0 or force: | |||||
self._load_wrapper() | |||||
return self._is_redirect | |||||
def toggle_talk(self, force=False, follow_redirects=None): | |||||
"""Returns a content page's talk page, or vice versa. | |||||
The title of the new page is determined by namespace logic, not API | |||||
queries. We won't make any API queries on our own unless force is True, | |||||
and the only reason then would be to forcibly update the title or | |||||
follow redirects if we haven't already made an API query. | |||||
If `follow_redirects` is anything other than None (the default), it | |||||
will be passed to the new Page's __init__(). Otherwise, we'll use the | |||||
value passed to our own __init__(). | |||||
Will raise InvalidPageError if we try to get the talk page of a special | |||||
page (in the Special: or Media: namespaces), but we won't raise an | |||||
exception if our page is otherwise missing or invalid. | |||||
""" | |||||
if force: | |||||
self._load_wrapper() | |||||
if self._namespace < 0: | |||||
ns = self._site.namespace_id_to_name(self._namespace) | |||||
e = "Pages in the {0} namespace can't have talk pages.".format(ns) | |||||
raise InvalidPageError(e) | |||||
if self._is_talkpage: | |||||
new_ns = self._namespace - 1 | |||||
else: | |||||
new_ns = self._namespace + 1 | |||||
try: | |||||
body = self._title.split(":", 1)[1] | |||||
except IndexError: | |||||
body = self._title | |||||
new_prefix = self._site.namespace_id_to_name(new_ns) | |||||
# If the new page is in namespace 0, don't do ":Title" (it's correct, | |||||
# but unnecessary), just do "Title": | |||||
if new_prefix: | |||||
new_title = ':'.join((new_prefix, body)) | |||||
else: | |||||
new_title = body | |||||
if follow_redirects is None: | |||||
follow_redirects = self._follow_redirects | |||||
return Page(self._site, new_title, follow_redirects) | |||||
def get(self, force=False): | |||||
"""Returns page content, which is cached if you try to call get again. | |||||
Use `force` to forcibly reload page content even if we've already | |||||
loaded some. This is good if you want to edit a page multiple times, | |||||
and you want to get updated content before you make your second edit. | |||||
Raises InvalidPageError or PageNotFoundError if the page name is | |||||
invalid or the page does not exist, respectively. | |||||
""" | |||||
if force or self._exists == 0: | |||||
# Kill two birds with one stone by doing an API query for both our | |||||
# attributes and our page content: | |||||
params = {"action": "query", "rvprop": "content", "rvlimit": 1, | |||||
"prop": "info|revisions", "inprop": "protection|url", | |||||
"titles": self._title} | |||||
result = self._site._api_query(params) | |||||
self._load_attributes(result=result) | |||||
self._force_existence() | |||||
self._load_content(result=result) | |||||
# Follow redirects if we're told to: | |||||
if self._keep_following and self._is_redirect: | |||||
self._title = self.get_redirect_target() | |||||
self._keep_following = False # don't follow double redirects | |||||
self._content = None # reset the content we just loaded | |||||
self.get(force=True) | |||||
return self._content | |||||
# Make sure we're dealing with a real page here. This may be outdated | |||||
# if the page was deleted since we last called self._load_attributes(), | |||||
# but self._load_content() can handle that: | |||||
self._force_existence() | |||||
if self._content is None: | |||||
self._load_content() | |||||
return self._content | |||||
def get_redirect_target(self, force=False): | |||||
"""If the page is a redirect, returns its destination. | |||||
Use `force` to forcibly reload content even if we've already loaded | |||||
some before. Note that this method calls get() for page content. | |||||
Raises InvalidPageError or PageNotFoundError if the page name is | |||||
invalid or the page does not exist, respectively. Raises RedirectError | |||||
if the page is not a redirect. | |||||
""" | |||||
content = self.get(force) | |||||
regexp = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" | |||||
try: | |||||
return re.findall(regexp, content, flags=re.IGNORECASE)[0] | |||||
except IndexError: | |||||
e = "The page does not appear to have a redirect target." | |||||
raise RedirectError(e) |
@@ -0,0 +1,446 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from cookielib import CookieJar | |||||
from gzip import GzipFile | |||||
from json import loads | |||||
from re import escape as re_escape, match as re_match | |||||
from StringIO import StringIO | |||||
from urllib import unquote_plus, urlencode | |||||
from urllib2 import build_opener, HTTPCookieProcessor, URLError | |||||
from urlparse import urlparse | |||||
from wiki.tools.category import Category | |||||
from wiki.tools.constants import * | |||||
from wiki.tools.exceptions import * | |||||
from wiki.tools.page import Page | |||||
from wiki.tools.user import User | |||||
class Site(object): | |||||
""" | |||||
EarwigBot's Wiki Toolset: Site Class | |||||
Represents a Site, with support for API queries and returning Pages, Users, | |||||
and Categories. The constructor takes a bunch of arguments and you probably | |||||
won't need to call it directly, rather tools.get_site() for returning Site | |||||
instances, tools.add_site() for adding new ones to config, and | |||||
tools.del_site() for removing old ones from config, should suffice. | |||||
Public methods: | |||||
name -- returns our name (or "wikiid"), like "enwiki" | |||||
project -- returns our project name, like "wikipedia" | |||||
lang -- returns our language code, like "en" | |||||
domain -- returns our web domain, like "en.wikipedia.org" | |||||
api_query -- does an API query with the given kwargs as params | |||||
namespace_id_to_name -- given a namespace ID, returns associated name(s) | |||||
namespace_name_to_id -- given a namespace name, returns associated id | |||||
get_page -- returns a Page object for the given title | |||||
get_category -- returns a Category object for the given title | |||||
get_user -- returns a User object for the given username | |||||
""" | |||||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||||
article_path=None, script_path=None, sql=(None, None), | |||||
namespaces=None, login=(None, None), cookiejar=None): | |||||
"""Constructor for new Site instances. | |||||
This probably isn't necessary to call yourself unless you're building a | |||||
Site that's not in your config and you don't want to add it - normally | |||||
all you need is tools.get_site(name), which creates the Site for you | |||||
based on your config file. We accept a bunch of kwargs, but the only | |||||
ones you really "need" are `base_url` and `script_path` - this is | |||||
enough to figure out an API url. `login`, a tuple of | |||||
(username, password), is highly recommended. `cookiejar` will be used | |||||
to store cookies, and we'll use a normal CookieJar if none is given. | |||||
First, we'll store the given arguments as attributes, then set up our | |||||
URL opener. We'll load any of the attributes that weren't given from | |||||
the API, and then log in if a username/pass was given and we aren't | |||||
already logged in. | |||||
""" | |||||
# attributes referring to site information, filled in by an API query | |||||
# if they are missing (and an API url can be determined) | |||||
self._name = name | |||||
self._project = project | |||||
self._lang = lang | |||||
self._base_url = base_url | |||||
self._article_path = article_path | |||||
self._script_path = script_path | |||||
self._sql = sql | |||||
self._namespaces = namespaces | |||||
# set up cookiejar and URL opener for making API queries | |||||
if cookiejar is not None: | |||||
self._cookiejar = cookiejar | |||||
else: | |||||
self._cookiejar = CookieJar() | |||||
self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) | |||||
self._opener.addheaders = [("User-Agent", USER_AGENT), | |||||
("Accept-Encoding", "gzip")] | |||||
# get all of the above attributes that were not specified as arguments | |||||
self._load_attributes() | |||||
# if we have a name/pass and the API says we're not logged in, log in | |||||
self._login_info = name, password = login | |||||
if name is not None and password is not None: | |||||
logged_in_as = self._get_username_from_cookies() | |||||
if logged_in_as is None or name != logged_in_as: | |||||
self._login(login) | |||||
def _api_query(self, params): | |||||
"""Do an API query with `params` as a dict of parameters. | |||||
This will first attempt to construct an API url from self._base_url and | |||||
self._script_path. We need both of these, or else we'll raise | |||||
SiteAPIError. | |||||
We'll encode the given params, adding format=json along the way, and | |||||
make the request through self._opener, which has built-in cookie | |||||
support via self._cookiejar, a User-Agent | |||||
(wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip". | |||||
Assuming everything went well, we'll gunzip the data (if compressed), | |||||
load it as a JSON object, and return it. | |||||
If our request failed, we'll raise SiteAPIError with details. | |||||
There's helpful MediaWiki API documentation at | |||||
<http://www.mediawiki.org/wiki/API>. | |||||
""" | |||||
if self._base_url is None or self._script_path is None: | |||||
e = "Tried to do an API query, but no API URL is known." | |||||
raise SiteAPIError(e) | |||||
url = ''.join((self._base_url, self._script_path, "/api.php")) | |||||
params["format"] = "json" # this is the only format we understand | |||||
data = urlencode(params) | |||||
print url, data # debug code | |||||
try: | |||||
response = self._opener.open(url, data) | |||||
except URLError as error: | |||||
if hasattr(error, "reason"): | |||||
e = "API query at {0} failed because {1}." | |||||
e = e.format(error.geturl, error.reason) | |||||
elif hasattr(error, "code"): | |||||
e = "API query at {0} failed; got an error code of {1}." | |||||
e = e.format(error.geturl, error.code) | |||||
else: | |||||
e = "API query failed." | |||||
raise SiteAPIError(e) | |||||
else: | |||||
result = response.read() | |||||
if response.headers.get("Content-Encoding") == "gzip": | |||||
stream = StringIO(result) | |||||
gzipper = GzipFile(fileobj=stream) | |||||
result = gzipper.read() | |||||
return loads(result) # parse as a JSON object | |||||
def _load_attributes(self, force=False): | |||||
"""Load data about our Site from the API. | |||||
This function is called by __init__() when one of the site attributes | |||||
was not given as a keyword argument. We'll do an API query to get the | |||||
missing data, but only if there actually *is* missing data. | |||||
Additionally, you can call this with `force=True` to forcibly reload | |||||
all attributes. | |||||
""" | |||||
# all attributes to be loaded, except _namespaces, which is a special | |||||
# case because it requires additional params in the API query | |||||
attrs = [self._name, self._project, self._lang, self._base_url, | |||||
self._article_path, self._script_path] | |||||
params = {"action": "query", "meta": "siteinfo"} | |||||
if self._namespaces is None or force: | |||||
params["siprop"] = "general|namespaces|namespacealiases" | |||||
result = self._api_query(params) | |||||
self._load_namespaces(result) | |||||
elif all(attrs): # everything is already specified and we're not told | |||||
return # to force a reload, so do nothing | |||||
else: # we're only loading attributes other than _namespaces | |||||
params["siprop"] = "general" | |||||
result = self._api_query(params) | |||||
res = result["query"]["general"] | |||||
self._name = res["wikiid"] | |||||
self._project = res["sitename"].lower() | |||||
self._lang = res["lang"] | |||||
self._base_url = res["server"] | |||||
self._article_path = res["articlepath"] | |||||
self._script_path = res["scriptpath"] | |||||
def _load_namespaces(self, result): | |||||
"""Fill self._namespaces with a dict of namespace IDs and names. | |||||
Called by _load_attributes() with API data as `result` when | |||||
self._namespaces was not given as an kwarg to __init__(). | |||||
""" | |||||
self._namespaces = {} | |||||
for namespace in result["query"]["namespaces"].values(): | |||||
ns_id = namespace["id"] | |||||
name = namespace["*"] | |||||
try: | |||||
canonical = namespace["canonical"] | |||||
except KeyError: | |||||
self._namespaces[ns_id] = [name] | |||||
else: | |||||
if name != canonical: | |||||
self._namespaces[ns_id] = [name, canonical] | |||||
else: | |||||
self._namespaces[ns_id] = [name] | |||||
for namespace in result["query"]["namespacealiases"]: | |||||
ns_id = namespace["id"] | |||||
alias = namespace["*"] | |||||
self._namespaces[ns_id].append(alias) | |||||
def _get_cookie(self, name, domain): | |||||
"""Return the named cookie unless it is expired or doesn't exist.""" | |||||
for cookie in self._cookiejar: | |||||
if cookie.name == name and cookie.domain == domain: | |||||
if cookie.is_expired(): | |||||
break | |||||
return cookie | |||||
def _get_username_from_cookies(self): | |||||
"""Try to return our username based solely on cookies. | |||||
First, we'll look for a cookie named self._name + "Token", like | |||||
"enwikiToken". If it exists and isn't expired, we'll assume it's valid | |||||
and try to return the value of the cookie self._name + "UserName" (like | |||||
"enwikiUserName"). This should work fine on wikis without single-user | |||||
login. | |||||
If `enwikiToken` doesn't exist, we'll try to find a cookie named | |||||
`centralauth_Token`. If this exists and is not expired, we'll try to | |||||
return the value of `centralauth_User`. | |||||
If we didn't get any matches, we'll return None. Our goal here isn't to | |||||
return the most likely username, or what we *want* our username to be | |||||
(for that, we'd do self._login_info[0]), but rather to get our current | |||||
username without an unnecessary ?action=query&meta=userinfo API query. | |||||
""" | |||||
domain = self.domain() | |||||
name = ''.join((self._name, "Token")) | |||||
cookie = self._get_cookie(name, domain) | |||||
if cookie is not None: | |||||
name = ''.join((self._name, "UserName")) | |||||
user_name = self._get_cookie(name, domain) | |||||
if user_name is not None: | |||||
return user_name.value | |||||
name = "centralauth_Token" | |||||
for cookie in self._cookiejar: | |||||
if cookie.domain_initial_dot is False or cookie.is_expired(): | |||||
continue | |||||
if cookie.name != name: | |||||
continue | |||||
# build a regex that will match domains this cookie affects | |||||
search = ''.join(("(.*?)", re_escape(cookie.domain))) | |||||
if re_match(search, domain): # test it against our site | |||||
user_name = self._get_cookie("centralauth_User", cookie.domain) | |||||
if user_name is not None: | |||||
return user_name.value | |||||
def _get_username_from_api(self): | |||||
"""Do a simple API query to get our username and return it. | |||||
This is a reliable way to make sure we are actually logged in, because | |||||
it doesn't deal with annoying cookie logic, but it results in an API | |||||
query that is unnecessary in some cases. | |||||
Called by _get_username() (in turn called by get_user() with no | |||||
username argument) when cookie lookup fails, probably indicating that | |||||
we are logged out. | |||||
""" | |||||
params = {"action": "query", "meta": "userinfo"} | |||||
result = self._api_query(params) | |||||
return result["query"]["userinfo"]["name"] | |||||
def _get_username(self): | |||||
"""Return the name of the current user, whether logged in or not. | |||||
First, we'll try to deduce it solely from cookies, to avoid an | |||||
unnecessary API query. For the cookie-detection method, see | |||||
_get_username_from_cookies()'s docs. | |||||
If our username isn't in cookies, then we're probably not logged in, or | |||||
something fishy is going on (like forced logout). In this case, do a | |||||
single API query for our username (or IP address) and return that. | |||||
""" | |||||
name = self._get_username_from_cookies() | |||||
if name is not None: | |||||
return name | |||||
return self._get_username_from_api() | |||||
def _save_cookiejar(self): | |||||
"""Try to save our cookiejar after doing a (normal) login or logout. | |||||
Calls the standard .save() method with no filename. Don't fret if our | |||||
cookiejar doesn't support saving (CookieJar raises AttributeError, | |||||
FileCookieJar raises NotImplementedError) or no default filename was | |||||
given (LWPCookieJar and MozillaCookieJar raise ValueError). | |||||
""" | |||||
try: | |||||
self._cookiejar.save() | |||||
except (AttributeError, NotImplementedError, ValueError): | |||||
pass | |||||
def _login(self, login, token=None, attempt=0): | |||||
"""Safely login through the API. | |||||
Normally, this is called by __init__() if a username and password have | |||||
been provided and no valid login cookies were found. The only other | |||||
time it needs to be called is when those cookies expire, which is done | |||||
automatically by api_query() if a query fails. | |||||
Recent versions of MediaWiki's API have fixed a CSRF vulnerability, | |||||
requiring login to be done in two separate requests. If the response | |||||
from from our initial request is "NeedToken", we'll do another one with | |||||
the token. If login is successful, we'll try to save our cookiejar. | |||||
Raises LoginError on login errors (duh), like bad passwords and | |||||
nonexistent usernames. | |||||
`login` is a (username, password) tuple. `token` is the token returned | |||||
from our first request, and `attempt` is to prevent getting stuck in a | |||||
loop if MediaWiki isn't acting right. | |||||
""" | |||||
name, password = login | |||||
params = {"action": "login", "lgname": name, "lgpassword": password} | |||||
if token is not None: | |||||
params["lgtoken"] = token | |||||
result = self._api_query(params) | |||||
res = result["login"]["result"] | |||||
if res == "Success": | |||||
self._save_cookiejar() | |||||
elif res == "NeedToken" and attempt == 0: | |||||
token = result["login"]["token"] | |||||
return self._login(login, token, attempt=1) | |||||
else: | |||||
if res == "Illegal": | |||||
e = "The provided username is illegal." | |||||
elif res == "NotExists": | |||||
e = "The provided username does not exist." | |||||
elif res == "EmptyPass": | |||||
e = "No password was given." | |||||
elif res == "WrongPass" or res == "WrongPluginPass": | |||||
e = "The given password is incorrect." | |||||
else: | |||||
e = "Couldn't login; server says '{0}'.".format(res) | |||||
raise LoginError(e) | |||||
def _logout(self): | |||||
"""Safely logout through the API. | |||||
We'll do a simple API request (api.php?action=logout), clear our | |||||
cookiejar (which probably contains now-invalidated cookies) and try to | |||||
save it, if it supports that sort of thing. | |||||
""" | |||||
params = {"action": "logout"} | |||||
self._api_query(params) | |||||
self._cookiejar.clear() | |||||
self._save_cookiejar() | |||||
def api_query(self, **kwargs): | |||||
"""Do an API query with `kwargs` as the parameters. | |||||
See _api_query()'s documentation for details. | |||||
""" | |||||
return self._api_query(kwargs) | |||||
def name(self): | |||||
"""Returns the Site's name (or "wikiid" in the API), like "enwiki".""" | |||||
return self._name | |||||
def project(self): | |||||
"""Returns the Site's project name in lowercase, like "wikipedia".""" | |||||
return self._project | |||||
def lang(self): | |||||
"""Returns the Site's language code, like "en" or "es".""" | |||||
return self._lang | |||||
def domain(self): | |||||
"""Returns the Site's web domain, like "en.wikipedia.org".""" | |||||
return urlparse(self._base_url).netloc | |||||
def namespace_id_to_name(self, ns_id, all=False): | |||||
"""Given a namespace ID, returns associated namespace names. | |||||
If all is False (default), we'll return the first name in the list, | |||||
which is usually the localized version. Otherwise, we'll return the | |||||
entire list, which includes the canonical name. | |||||
For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki; | |||||
returns [u"Wikipedia", u"Project"] if ns_id=4 and all=True. | |||||
Raises NamespaceNotFoundError if the ID is not found. | |||||
""" | |||||
try: | |||||
if all: | |||||
return self._namespaces[ns_id] | |||||
else: | |||||
return self._namespaces[ns_id][0] | |||||
except KeyError: | |||||
e = "There is no namespace with id {0}.".format(ns_id) | |||||
raise NamespaceNotFoundError(e) | |||||
def namespace_name_to_id(self, name): | |||||
"""Given a namespace name, returns the associated ID. | |||||
Like namespace_id_to_name(), but reversed. Case is ignored, because | |||||
namespaces are assumed to be case-insensitive. | |||||
Raises NamespaceNotFoundError if the name is not found. | |||||
""" | |||||
lname = name.lower() | |||||
for ns_id, names in self._namespaces.items(): | |||||
lnames = [n.lower() for n in names] # be case-insensitive | |||||
if lname in lnames: | |||||
return ns_id | |||||
e = "There is no namespace with name '{0}'.".format(name) | |||||
raise NamespaceNotFoundError(e) | |||||
def get_page(self, title, follow_redirects=False): | |||||
"""Returns a Page object for the given title (pagename). | |||||
Will return a Category object instead if the given title is in the | |||||
category namespace. As Category is a subclass of Page, this should not | |||||
cause problems. | |||||
Note that this doesn't do any direct checks for existence or | |||||
redirect-following - Page's methods provide that. | |||||
""" | |||||
prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True) | |||||
prefix = title.split(":", 1)[0] | |||||
if prefix != title: # avoid a page that is simply "Category" | |||||
if prefix in prefixes: | |||||
return Category(self, title, follow_redirects) | |||||
return Page(self, title, follow_redirects) | |||||
def get_category(self, catname, follow_redirects=False): | |||||
"""Returns a Category object for the given category name. | |||||
`catname` should be given *without* a namespace prefix. This method is | |||||
really just shorthand for get_page("Category:" + catname). | |||||
""" | |||||
prefix = self.namespace_id_to_name(NS_CATEGORY) | |||||
pagename = ':'.join((prefix, catname)) | |||||
return Category(self, pagename, follow_redirects) | |||||
def get_user(self, username=None): | |||||
"""Returns a User object for the given username. | |||||
If `username` is left as None, then a User object representing the | |||||
currently logged-in (or anonymous!) user is returned. | |||||
""" | |||||
if username is None: | |||||
username = self._get_username() | |||||
return User(self, username) |
@@ -0,0 +1,226 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from time import strptime | |||||
from wiki.tools.constants import * | |||||
from wiki.tools.exceptions import UserNotFoundError | |||||
from wiki.tools.page import Page | |||||
class User(object): | |||||
""" | |||||
EarwigBot's Wiki Toolset: User Class | |||||
Represents a User on a given Site. Has methods for getting a bunch of | |||||
information about the user, such as editcount and user rights, methods for | |||||
returning the user's userpage and talkpage, etc. | |||||
Public methods: | |||||
name -- returns the user's username | |||||
exists -- returns True if the user exists, False if they do not | |||||
userid -- returns an integer ID representing the user | |||||
blockinfo -- returns information about a current block on the user | |||||
groups -- returns a list of the user's groups | |||||
rights -- returns a list of the user's rights | |||||
editcount -- returns the number of edits made by the user | |||||
registration -- returns the time the user registered as a time.struct_time | |||||
emailable -- returns True if you can email the user, False if you cannot | |||||
gender -- returns the user's gender ("male", "female", or "unknown") | |||||
get_userpage -- returns a Page object representing the user's userpage | |||||
get_talkpage -- returns a Page object representing the user's talkpage | |||||
""" | |||||
def __init__(self, site, name): | |||||
"""Constructor for new User instances. | |||||
Takes two arguments, a Site object (necessary for doing API queries), | |||||
and the name of the user, preferably without "User:" in front, although | |||||
this prefix will be automatically removed by the API if given. | |||||
You can also use site.get_user() instead, which returns a User object, | |||||
and is preferred. | |||||
We won't do any API queries yet for basic information about the user - | |||||
save that for when the information is requested. | |||||
""" | |||||
self._site = site | |||||
self._name = name | |||||
def _get_attribute(self, attr, force): | |||||
"""Internally used to get an attribute by name. | |||||
We'll call _load_attributes() to get this (and all other attributes) | |||||
from the API if it is not already defined. If `force` is True, we'll | |||||
re-load them even if they've already been loaded. | |||||
Raises UserNotFoundError if a nonexistant user prevents us from | |||||
returning a certain attribute. | |||||
""" | |||||
if not hasattr(self, attr) or force: | |||||
self._load_attributes() | |||||
if self._exists is False: | |||||
e = "User '{0}' does not exist.".format(self._name) | |||||
raise UserNotFoundError(e) | |||||
return getattr(self, attr) | |||||
def _load_attributes(self): | |||||
"""Internally used to load all attributes from the API. | |||||
Normally, this is called by _get_attribute() when a requested attribute | |||||
is not defined. This defines it. | |||||
""" | |||||
params = {"action": "query", "list": "users", "ususers": self._name, | |||||
"usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"} | |||||
result = self._site._api_query(params) | |||||
res = result["query"]["users"][0] | |||||
# normalize our username in case it was entered oddly | |||||
self._name = res["name"] | |||||
try: | |||||
self._userid = res["userid"] | |||||
except KeyError: # userid is missing, so user does not exist | |||||
self._exists = False | |||||
return | |||||
self._exists = True | |||||
try: | |||||
self._blockinfo = { | |||||
"by": res["blockedby"], | |||||
"reason": res["blockreason"], | |||||
"expiry": res["blockexpiry"] | |||||
} | |||||
except KeyError: | |||||
self._blockinfo = False | |||||
self._groups = res["groups"] | |||||
self._rights = res["rights"].values() | |||||
self._editcount = res["editcount"] | |||||
reg = res["registration"] | |||||
self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") | |||||
try: | |||||
res["emailable"] | |||||
except KeyError: | |||||
self._emailable = False | |||||
else: | |||||
self._emailable = True | |||||
self._gender = res["gender"] | |||||
def name(self, force=False): | |||||
"""Returns the user's name. | |||||
If `force` is True, we will load the name from the API and return that. | |||||
This could potentially return a "normalized" version of the name - for | |||||
example, without a "User:" prefix or without underscores. Unlike other | |||||
attribute getters, this will never make an API query without `force`. | |||||
Note that if another attribute getter, like exists(), has already been | |||||
called, then the username has already been normalized. | |||||
""" | |||||
if force: | |||||
self._load_attributes() | |||||
return self._name | |||||
def exists(self, force=False): | |||||
"""Returns True if the user exists, or False if they do not. | |||||
Makes an API query if `force` is True or if we haven't made one | |||||
already. | |||||
""" | |||||
if not hasattr(self, "_exists") or force: | |||||
self._load_attributes() | |||||
return self._exists | |||||
def userid(self, force=False): | |||||
"""Returns an integer ID used by MediaWiki to represent the user. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_userid", force) | |||||
def blockinfo(self, force=False): | |||||
"""Returns information about a current block on the user. | |||||
If the user is not blocked, returns False. If they are, returns a dict | |||||
with three keys: "by" is the blocker's username, "reason" is the reason | |||||
why they were blocked, and "expiry" is when the block expires. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_blockinfo", force) | |||||
def groups(self, force=False): | |||||
"""Returns a list of groups this user is in, including "*". | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_groups", force) | |||||
def rights(self, force=False): | |||||
"""Returns a list of this user's rights. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_rights", force) | |||||
def editcount(self, force=False): | |||||
"""Returns the number of edits made by the user. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_editcount", force) | |||||
def registration(self, force=False): | |||||
"""Returns the time the user registered as a time.struct_time object. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_registration", force) | |||||
def emailable(self, force=False): | |||||
"""Returns True if the user can be emailed, or False if they cannot. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_emailable", force) | |||||
def gender(self, force=False): | |||||
"""Returns the user's gender. | |||||
Can return either "male", "female", or "unknown", if they did not | |||||
specify it. | |||||
Raises UserNotFoundError if the user does not exist. Makes an API query | |||||
if `force` is True or if we haven't made one already. | |||||
""" | |||||
return self._get_attribute("_gender", force) | |||||
def get_userpage(self): | |||||
"""Returns a Page object representing the user's userpage. | |||||
No checks are made to see if it exists or not. Proper site namespace | |||||
conventions are followed. | |||||
""" | |||||
prefix = self._site.namespace_id_to_name(NS_USER) | |||||
pagename = ':'.join((prefix, self._name)) | |||||
return Page(self._site, pagename) | |||||
def get_talkpage(self): | |||||
"""Returns a Page object representing the user's talkpage. | |||||
No checks are made to see if it exists or not. Proper site namespace | |||||
conventions are followed. | |||||
""" | |||||
prefix = self._site.namespace_id_to_name(NS_USER_TALK) | |||||
pagename = ':'.join((prefix, self._name)) | |||||
return Page(self._site, pagename) |