From edfa1d2d9e040c3a9c78cd176792c1ffa56c64f7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Jul 2011 10:19:07 -0400 Subject: [PATCH 01/19] beginning wikitools core development with a few skeleton classes and one (nearly) working function, tools.get_site() (doesn't return a Site object yet) --- wiki/tools/__init__.py | 19 ++++++++++++ wiki/tools/category.py | 9 ++++++ wiki/tools/exceptions.py | 18 ++++++++++++ wiki/tools/functions.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ wiki/tools/page.py | 9 ++++++ wiki/tools/site.py | 9 ++++++ wiki/tools/user.py | 9 ++++++ 7 files changed, 148 insertions(+) create mode 100644 wiki/tools/category.py create mode 100644 wiki/tools/exceptions.py create mode 100644 wiki/tools/functions.py create mode 100644 wiki/tools/page.py create mode 100644 wiki/tools/site.py create mode 100644 wiki/tools/user.py diff --git a/wiki/tools/__init__.py b/wiki/tools/__init__.py index e69de29..2e64c45 100644 --- a/wiki/tools/__init__.py +++ b/wiki/tools/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's Wiki Toolset + +This is a collection of classes and functions to read from and write to +Wikipedia and other wiki sites. No connection whatsoever to python-wikitools +written by Mr.Z-man, other than a similar purpose. We share no code. + +Import the toolset with `from wiki import tools`. +""" + +from wiki.tools.exceptions import * +from wiki.tools.functions import * + +from wiki.tools.category import Category +from wiki.tools.page import Page +from wiki.tools.site import Site +from wiki.tools.user import User diff --git a/wiki/tools/category.py b/wiki/tools/category.py new file mode 100644 index 0000000..3df8477 --- /dev/null +++ b/wiki/tools/category.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +class Category(object): + """ + EarwigBot's Wiki Toolset: Category Class + """ + + def __init__(self): + pass diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py new file mode 100644 index 0000000..b515c45 --- /dev/null +++ b/wiki/tools/exceptions.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's Wiki Toolset: Exceptions + +This module contains all exceptions used by the wiki.tools package. +""" + +class WikiToolsetError(Exception): + """Base exception class for errors in the Wiki Toolset.""" + +class ConfigError(WikiToolsetError): + """An error occured when trying to do something involving our config + file. Maybe it hasn't been loaded?""" + +class SiteNotFoundError(WikiToolsetError): + """A site matching the args given to get_site() could not be found in the + config file.""" diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py new file mode 100644 index 0000000..220e4f3 --- /dev/null +++ b/wiki/tools/functions.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's Wiki Toolset: Misc Functions + +This module, a component of the wiki.tools package, contains miscellaneous +functions that are not methods of any class, like get_site(). + +There's no need to import this module explicitly. All functions here are +automatically available from wiki.tools. +""" + +from core import config +from wiki.tools.exceptions import ConfigError, SiteNotFoundError +from wiki.tools.site import Site + +__all__ = ["get_site"] + +def get_site(name=None, project=None, lang=None): + """Returns a Site instance based on information from our config file. + + With no arguments, returns the default site as specified by our config + file. This is default = config.wiki["defaultSite"]; + config.wiki["sites"][default]. + + With `name` specified, returns the site specified by + config.wiki["sites"][name]. + + With `project` and `lang` specified, returns the site specified by the + member of config.wiki["sites"], `s`, for which s["project"] == project and + s["lang"] == lang. + + Specifying a project without a lang or a lang without a project will raise + TypeError. If all three args are specified, `name` will be first tried, + then `project` and `lang`. If, with any number of args, a site cannot be + found in the config, SiteNotFoundError is raised. + """ + if config._config is None: + e = "Config file has not been loaded: use config.verify_config() and then config.parse_config() to do so." + raise ConfigError(e) + + if (project is None and lang is not None) or (project is not None and lang is None): + e = "Keyword arguments 'lang' and 'project' must be specified together." + raise TypeError(e) + + if name is None and project is None: # no args given (project is None implies lang is None) + try: # ...so use the default site + default = config.wiki["defaultSite"] + except KeyError: + e = "Default site is not specified in config." + raise SiteNotFoundError(e) + try: + return config.wiki["sites"][default] + except KeyError: + e = "Default site specified by config is not in the config's sites list." + raise SiteNotFoundError(e) + + if name is not None: # name arg given, but don't look at others yet + try: + return config.wiki["sites"][name] + except KeyError: + if project is None: # implies lang is None, i.e., only name was given + e = "Site '{0}' not found in config.".format(name) + raise SiteNotFoundError(e) + for site in config.wiki["sites"].values(): + if site["project"] == project and site["lang"] == lang: + return site + e = "Neither site '{0}' nor site '{1}:{2}' found in config.".format(name, project, lang) + raise SiteNotFoundError(e) + + for site in config.wiki["sites"].values(): # implied lang and proj are not None + if site["project"] == project and site["lang"] == lang: + return site + e = "Site '{0}:{1}' not found in config.".format(project, lang) + raise SiteNotFoundError(e) diff --git a/wiki/tools/page.py b/wiki/tools/page.py new file mode 100644 index 0000000..3a30a70 --- /dev/null +++ b/wiki/tools/page.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +class Page(object): + """ + EarwigBot's Wiki Toolset: Page Class + """ + + def __init__(self): + pass diff --git a/wiki/tools/site.py b/wiki/tools/site.py new file mode 100644 index 0000000..caba1e4 --- /dev/null +++ b/wiki/tools/site.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +class Site(object): + """ + EarwigBot's Wiki Toolset: Site Class + """ + + def __init__(self): + pass diff --git a/wiki/tools/user.py b/wiki/tools/user.py new file mode 100644 index 0000000..5044e50 --- /dev/null +++ b/wiki/tools/user.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +class User(object): + """ + EarwigBot's Wiki Toolset: User Class + """ + + def __init__(self): + pass From f4219ffad031592344e2a8508bf3daa8e17de268 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Jul 2011 12:03:20 -0400 Subject: [PATCH 02/19] More work on the early stages of wikitools: tools.get_site() returns an actual Site object, thanks to tools.functions._get_site_object_from_dict(). Site objects now have a working (but primitive) .api_query(), .get_page(), .get_category(), and .get_user(). Page objects now have a working .get(), for getting page content from the API. Category is now a subclass of Page, and has its own .get_members(), which returns a list of titles. Still need to implement proper namespace logic in pages. --- wiki/tools/category.py | 15 ++++++++++++--- wiki/tools/functions.py | 34 ++++++++++++++++++++++++++++------ wiki/tools/page.py | 19 +++++++++++++++++-- wiki/tools/site.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 103 insertions(+), 13 deletions(-) diff --git a/wiki/tools/category.py b/wiki/tools/category.py index 3df8477..1dfdba3 100644 --- a/wiki/tools/category.py +++ b/wiki/tools/category.py @@ -1,9 +1,18 @@ # -*- coding: utf-8 -*- -class Category(object): +from wiki.tools.page import Page + +class Category(Page): """ EarwigBot's Wiki Toolset: Category Class """ - def __init__(self): - pass + def get_members(limit=50): + """ + Docstring needed + """ + params = {"action": "query", "list": "categorymembers", "cmlimit": limit} + params["cmtitle"] = self.title + result = self.site.api_query(params) + members = result['query']['categorymembers'] + return [member["title"] for member in members] diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index 220e4f3..195400c 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -16,6 +16,25 @@ from wiki.tools.site import Site __all__ = ["get_site"] +def _get_site_object_from_dict(name, d): + """Return a Site object based on the contents of a dict, probably acquired + through our config file, and a separate name.""" + project = d["project"] + lang = d["lang"] + try: + api = d["apiURL"] + except KeyError: + api = None + try: + sql_server = d["sqlServer"] + except KeyError: + sql_server = None + try: + sql_db = d["sqlDB"] + except KeyError: + sql_db = None + return Site(name, project, lang, api, (sql_server, sql_db)) + def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. @@ -50,26 +69,29 @@ def get_site(name=None, project=None, lang=None): e = "Default site is not specified in config." raise SiteNotFoundError(e) try: - return config.wiki["sites"][default] + site = config.wiki["sites"][default] except KeyError: e = "Default site specified by config is not in the config's sites list." raise SiteNotFoundError(e) + return _get_site_object_from_dict(default, site) if name is not None: # name arg given, but don't look at others yet try: - return config.wiki["sites"][name] + site = config.wiki["sites"][name] except KeyError: if project is None: # implies lang is None, i.e., only name was given e = "Site '{0}' not found in config.".format(name) raise SiteNotFoundError(e) - for site in config.wiki["sites"].values(): + for sitename, site in config.wiki["sites"].items(): if site["project"] == project and site["lang"] == lang: - return site + return _get_site_object_from_dict(sitename, site) e = "Neither site '{0}' nor site '{1}:{2}' found in config.".format(name, project, lang) raise SiteNotFoundError(e) + else: + return _get_site_object_from_dict(name, site) - for site in config.wiki["sites"].values(): # implied lang and proj are not None + for sitename, site in config.wiki["sites"].items(): # implied lang and proj are not None if site["project"] == project and site["lang"] == lang: - return site + return _get_site_object_from_dict(sitename, site) e = "Site '{0}:{1}' not found in config.".format(project, lang) raise SiteNotFoundError(e) diff --git a/wiki/tools/page.py b/wiki/tools/page.py index 3a30a70..49a89ee 100644 --- a/wiki/tools/page.py +++ b/wiki/tools/page.py @@ -5,5 +5,20 @@ class Page(object): EarwigBot's Wiki Toolset: Page Class """ - def __init__(self): - pass + def __init__(self, site, title): + """ + Docstring needed + """ + self.site = site + self.title = title + + def get(self): + """ + Docstring needed + """ + params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1'} + params["titles"] = self.title + result = self.site.api_query(params) + pageid = result['query']['pages'].keys()[0] + content = result['query']['pages'][pageid]['revisions'][0]['*'] + return content diff --git a/wiki/tools/site.py b/wiki/tools/site.py index caba1e4..34df709 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -1,9 +1,53 @@ # -*- coding: utf-8 -*- +from json import loads +from urllib import urlencode +from urllib2 import urlopen + +from wiki.tools.category import Category +from wiki.tools.page import Page +from wiki.tools.user import User + class Site(object): """ EarwigBot's Wiki Toolset: Site Class """ - def __init__(self): - pass + def __init__(self, name, project, lang, api=None, sql=(None, None)): + """ + Docstring needed + """ + self.name = name + self.project = project + self.lang = lang + self.__api = api + self.__sql = sql + + def api_query(self, params): + """ + Docstring needed + """ + params["format"] = "json" + data = urlencode(params) + result = urlopen(self.__api, data).read() + return loads(result) + + def get_page(self, pagename): + """ + Docstring needed + """ + if pagename.startswith("Category:"): # proper namespace checking! + return get_category(pagename[9:]) + return Page(self, pagename) + + def get_category(self, catname): + """ + Docstring needed + """ + return Category(self, "Category:" + catname) # namespace checking! + + def get_user(self, username): + """ + Docstring needed + """ + return User(self, username) From 76113a3f6ac2f649607757a2b14bcc5846c25da2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Jul 2011 18:01:06 -0400 Subject: [PATCH 03/19] Cleaning up slightly and adding some more methods: User: added .get_rights() (working) and .exists() (skeleton). Page: added .exists() (skeleton); store text as ._content; get() has a force_reload argument. Category: fixed missing self in .get_members(). Site: self.__api -> self._api; self.__sql -> self._sql --- wiki/tools/category.py | 6 +++--- wiki/tools/page.py | 23 ++++++++++++++++------- wiki/tools/site.py | 6 +++--- wiki/tools/user.py | 25 ++++++++++++++++++++++++- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/wiki/tools/category.py b/wiki/tools/category.py index 1dfdba3..01a3179 100644 --- a/wiki/tools/category.py +++ b/wiki/tools/category.py @@ -7,12 +7,12 @@ class Category(Page): EarwigBot's Wiki Toolset: Category Class """ - def get_members(limit=50): + def get_members(self, limit=50): """ Docstring needed """ - params = {"action": "query", "list": "categorymembers", "cmlimit": limit} - params["cmtitle"] = self.title + params = {"action": "query", "list": "categorymembers", + "cmlimit": limit, "cmtitle": self.title} result = self.site.api_query(params) members = result['query']['categorymembers'] return [member["title"] for member in members] diff --git a/wiki/tools/page.py b/wiki/tools/page.py index 49a89ee..9dbb7ab 100644 --- a/wiki/tools/page.py +++ b/wiki/tools/page.py @@ -11,14 +11,23 @@ class Page(object): """ self.site = site self.title = title + self._content = None - def get(self): + def exists(self): """ Docstring needed """ - params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1'} - params["titles"] = self.title - result = self.site.api_query(params) - pageid = result['query']['pages'].keys()[0] - content = result['query']['pages'][pageid]['revisions'][0]['*'] - return content + pass + + def get(self, force_reload=False): + """ + Docstring needed + """ + if content is None or force_reload: + params = {"action": "query", "prop": "revisions", + "rvprop": "content", "rvlimit": 1, "titles": self.title} + result = self.site.api_query(params) + content = result["query"]["pages"].values()[0]["revisions"][0]["*"] + self._content = content + return content + return self._content diff --git a/wiki/tools/site.py b/wiki/tools/site.py index 34df709..ea62c77 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -20,8 +20,8 @@ class Site(object): self.name = name self.project = project self.lang = lang - self.__api = api - self.__sql = sql + self._api = api + self._sql = sql def api_query(self, params): """ @@ -29,7 +29,7 @@ class Site(object): """ params["format"] = "json" data = urlencode(params) - result = urlopen(self.__api, data).read() + result = urlopen(self._api, data).read() return loads(result) def get_page(self, pagename): diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 5044e50..4fb69b7 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -5,5 +5,28 @@ class User(object): EarwigBot's Wiki Toolset: User Class """ - def __init__(self): + def __init__(self, site, username): + """ + Docstring needed + """ + self.site = site + self.username = username + + def exists(self): + """ + Docstring needed + """ pass + + def get_rights(self): + """ + Docstring needed + """ + params = {"action": "query", "list": "users", "usprop": "groups", + "ususers": self.username} + result = self.site.api_query(params) + try: + rights = res['query']['users'][0]['groups'] + except KeyError: # 'groups' not found, meaning the user does not exist + return None + return rights From 28cbbd7221789ec2b49fa13d284214bfe7c7ec3c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Jul 2011 18:08:35 -0400 Subject: [PATCH 04/19] afcAFCStatus and Rights IRC commands now use wikitools --- irc/commands/afc_status.py | 29 +++++++---------------------- irc/commands/rights.py | 28 ++++++++-------------------- 2 files changed, 15 insertions(+), 42 deletions(-) diff --git a/irc/commands/afc_status.py b/irc/commands/afc_status.py index 9273a53..2b8b880 100644 --- a/irc/commands/afc_status.py +++ b/irc/commands/afc_status.py @@ -3,12 +3,11 @@ """Report the status of AFC submissions, either as an automatic message on join or a request via !status.""" -import json import re -import urllib from core import config from irc.classes import BaseCommand +from wiki import tools class AFCStatus(BaseCommand): def get_hooks(self): @@ -29,6 +28,8 @@ class AFCStatus(BaseCommand): return False def process(self, data): + self.site = tools.get_site() + if data.line[1] == "JOIN": notice = self.get_join_notice() self.connection.notice(data.nick, notice) @@ -85,19 +86,15 @@ class AFCStatus(BaseCommand): def count_submissions(self): """Returns the number of open AFC submissions (count of CAT:PEND).""" - params = {'action': 'query', 'list': 'categorymembers', 'cmlimit':'500', 'format': 'json'} - params['cmtitle'] = "Category:Pending_AfC_submissions" - data = urllib.urlencode(params) - raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() - res = json.loads(raw) - subs = len(res['query']['categorymembers']) + cat = self.site.get_category("Pending AfC submissions") + subs = cat.get_members(limit=500) subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions return subs def count_redirects(self): """Returns the number of open redirect submissions. Calculated as the total number of submissions minus the closed ones.""" - content = self.get_page("Wikipedia:Articles_for_creation/Redirects") + content = self.site.get_page("Wikipedia:Articles for creation/Redirects").get() total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) closed = content.lower().count("{{afc-c|b}}") redirs = total - closed @@ -106,24 +103,12 @@ class AFCStatus(BaseCommand): def count_files(self): """Returns the number of open WP:FFU (Files For Upload) requests. Calculated as the total number of requests minus the closed ones.""" - content = self.get_page("Wikipedia:Files_for_upload") + content = self.site.get_page("Wikipedia:Files for upload").get() total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE)) closed = content.lower().count("{{ifu-c|b}}") files = total - closed return files - def get_page(self, pagename): - """Simple method to return the content of the page 'pagename'. Will be - a part of wiki/tools/ when I finish that.""" - params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'} - params['titles'] = pagename - data = urllib.urlencode(params) - raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() - res = json.loads(raw) - pageid = res['query']['pages'].keys()[0] - content = res['query']['pages'][pageid]['revisions'][0]['*'] - return content - def get_aggregate(self, num): """Returns a human-readable AFC status based on the number of pending AFC submissions, open redirect requests, and open FFU requests. This diff --git a/irc/commands/rights.py b/irc/commands/rights.py index 31d9437..2715d60 100644 --- a/irc/commands/rights.py +++ b/irc/commands/rights.py @@ -4,10 +4,8 @@ Retrieve a list of user rights for a given username via the API. """ -import json -import urllib - from irc.classes import BaseCommand +from wiki import tools class Rights(BaseCommand): def get_hooks(self): @@ -27,24 +25,14 @@ class Rights(BaseCommand): return username = ' '.join(data.args) - rights = self.get_rights(username) + site = tools.get_site() + user = site.get_user(username) + rights = user.get_rights() if rights: + try: + rights.remove("*") # remove the implicit '*' group given to everyone + except ValueError: + pass self.connection.reply(data, "the rights for \x0302{0}\x0301 are {1}.".format(username, ', '.join(rights))) else: self.connection.reply(data, "the user \x0302{0}\x0301 has no rights, or does not exist.".format(username)) - - def get_rights(self, username): - params = {'action': 'query', 'format': 'json', 'list': 'users', 'usprop': 'groups'} - params['ususers'] = username - data = urllib.urlencode(params) - raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read() - res = json.loads(raw) - try: - rights = res['query']['users'][0]['groups'] - except KeyError: # 'groups' not found, meaning the user does not exist - return None - try: - rights.remove("*") # remove the implicit '*' group given to everyone - except ValueError: # I don't expect this to happen, but if it does, be prepared - pass - return rights From b290582dbf5a8e420f88cf9660fa45937fdd3b8c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Jul 2011 22:34:54 -0400 Subject: [PATCH 05/19] added a bunch of new methods to User in wikitools; added one user-related exception to wikitools; moved .get_rights() call in IRC command !rights to .get_groups(), because get_rights() now returns actual rights (thanks to the API) --- irc/commands/rights.py | 2 +- wiki/tools/exceptions.py | 7 +++ wiki/tools/user.py | 135 ++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 131 insertions(+), 13 deletions(-) diff --git a/irc/commands/rights.py b/irc/commands/rights.py index 2715d60..6c44227 100644 --- a/irc/commands/rights.py +++ b/irc/commands/rights.py @@ -27,7 +27,7 @@ class Rights(BaseCommand): username = ' '.join(data.args) site = tools.get_site() user = site.get_user(username) - rights = user.get_rights() + rights = user.get_groups() if rights: try: rights.remove("*") # remove the implicit '*' group given to everyone diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index b515c45..3dc463d 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -16,3 +16,10 @@ class ConfigError(WikiToolsetError): class SiteNotFoundError(WikiToolsetError): """A site matching the args given to get_site() could not be found in the config file.""" + +class UserNotFoundError(WikiToolsetError): + """Attempting to get information about a user that does not exist.""" + def __init__(self, name): + self.name = name + def __str__(self): + return "User '{0}' does not exist.".format(self.name) diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 4fb69b7..c12e234 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -1,32 +1,143 @@ # -*- coding: utf-8 -*- +from wiki.tools.exceptions import UserNotFoundError +from wiki.tools.page import Page + class User(object): """ EarwigBot's Wiki Toolset: User Class """ - def __init__(self, site, username): + def __init__(self, site, name): """ Docstring needed """ - self.site = site - self.username = username + # Public attributes + self.site = site # Site instance, for doing API queries, etc + self.name = name # our username + + # Attributes filled in by an API query + self._exists = None + self._userid = None + self._blockinfo = None + self._groups = None + self._rights = None + self._editcount = None + self._registration = None + self._emailable = None + self._gender = None - def exists(self): + def _get_attribute_from_api(self, attr, force): """ Docstring needed """ - pass + if self._exists is None or force: + self._load_attributes_from_api() + if self._exists is False: + raise UserNotFoundError(self.name) + return getattr(self, attr) - def get_rights(self): + def _load_attributes_from_api(self): """ Docstring needed """ - params = {"action": "query", "list": "users", "usprop": "groups", - "ususers": self.username} + params = {"action": "query", "list": "users", "ususers": self.name, + "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"} result = self.site.api_query(params) + + # normalize our username in case it was entered oddly + self.name = result["query"]["users"][0]["name"] + + try: + self._userid = result["query"]["users"][0]["userid"] + except KeyError: # userid is missing, so user does not exist + self._exists = False + return + + self._exists = True + res = result['query']['users'][0] + + self._groups = res["groups"] + self._rights = res["rights"] + self._editcount = res["editcount"] + self._registration = res["registration"] + self._gender = res["gender"] + + try: + res["emailable"] + except KeyError: + self._emailable = False + else: + self._emailable = True + try: - rights = res['query']['users'][0]['groups'] - except KeyError: # 'groups' not found, meaning the user does not exist - return None - return rights + self._blockinfo = {"by": res["blockedby"], + "reason": res["blockreason"], "expiry": res["blockexpiry"]} + except KeyError: + self._blockinfo = False + + def exists(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_exists", force) + + def get_userid(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_userid", force) + + def get_blockinfo(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_blockinfo", force) + + def get_groups(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_groups", force) + + def get_rights(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_rights", force) + + def get_editcount(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_editcount", force) + + def get_registration(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_registration", force) + + def get_emailable(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_emailable", force) + + def get_gender(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_gender", force) + + def get_userpage(self): + """ + Docstring needed + """ + return Page(self.site, "User:" + self.name) # Namespace checking! + + def get_talkpage(self): + """ + Docstring needed + """ + return Page(self.site, "User talk:" + self.name) # Namespace checking! From 6aa2370900785c606464fac3da3aa600b2ef588e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Jul 2011 02:22:18 -0400 Subject: [PATCH 06/19] Exception and function cleanup in wikitools: * Got rid of ConfigError from exceptions.py. * Try to load config ourselves if it isn't already, via the new _load_config() method of Site. It uses getpass if passwords are encrypted, as done by earwigbot.py. * Cleaned up UserNotFoundError in user.py and exceptions.py. --- wiki/tools/exceptions.py | 8 -------- wiki/tools/functions.py | 37 +++++++++++++++++++++++++++++-------- wiki/tools/user.py | 2 +- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index 3dc463d..d628d0d 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -9,17 +9,9 @@ This module contains all exceptions used by the wiki.tools package. class WikiToolsetError(Exception): """Base exception class for errors in the Wiki Toolset.""" -class ConfigError(WikiToolsetError): - """An error occured when trying to do something involving our config - file. Maybe it hasn't been loaded?""" - class SiteNotFoundError(WikiToolsetError): """A site matching the args given to get_site() could not be found in the config file.""" class UserNotFoundError(WikiToolsetError): """Attempting to get information about a user that does not exist.""" - def __init__(self, name): - self.name = name - def __str__(self): - return "User '{0}' does not exist.".format(self.name) diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index 195400c..d1cc020 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -10,15 +10,31 @@ There's no need to import this module explicitly. All functions here are automatically available from wiki.tools. """ +from getpass import getpass + from core import config -from wiki.tools.exceptions import ConfigError, SiteNotFoundError +from wiki.tools.exceptions import SiteNotFoundError from wiki.tools.site import Site __all__ = ["get_site"] +def _load_config(): + """Called by a config-requiring function, such as get_site(), when config + has not been loaded. This will usually happen only if we're running code + directly from Python's interpreter and not the bot itself, because + earwigbot.py or core/main.py will already call these functions. + """ + is_encrypted = config.verify_config() + if is_encrypted: # passwords in the config file are encrypted + key = getpass("Enter key to unencrypt bot passwords: ") + config.parse_config(key) + else: + config.parse_config(None) + def _get_site_object_from_dict(name, d): """Return a Site object based on the contents of a dict, probably acquired - through our config file, and a separate name.""" + through our config file, and a separate name. + """ project = d["project"] lang = d["lang"] try: @@ -54,15 +70,18 @@ def get_site(name=None, project=None, lang=None): then `project` and `lang`. If, with any number of args, a site cannot be found in the config, SiteNotFoundError is raised. """ - if config._config is None: - e = "Config file has not been loaded: use config.verify_config() and then config.parse_config() to do so." - raise ConfigError(e) + # check if config has been loaded, and load it if it hasn't + if not config.is_config_loaded(): + _load_config() + # someone specified a project without a lang (or a lang without a project)! if (project is None and lang is not None) or (project is not None and lang is None): e = "Keyword arguments 'lang' and 'project' must be specified together." raise TypeError(e) - if name is None and project is None: # no args given (project is None implies lang is None) + # no args given, so return our default site (project is None implies lang + # is None, so we don't need to add that in) + if name is None and project is None: try: # ...so use the default site default = config.wiki["defaultSite"] except KeyError: @@ -75,7 +94,8 @@ def get_site(name=None, project=None, lang=None): raise SiteNotFoundError(e) return _get_site_object_from_dict(default, site) - if name is not None: # name arg given, but don't look at others yet + # name arg given, but don't look at others unless `name` isn't found + if name is not None: try: site = config.wiki["sites"][name] except KeyError: @@ -90,7 +110,8 @@ def get_site(name=None, project=None, lang=None): else: return _get_site_object_from_dict(name, site) - for sitename, site in config.wiki["sites"].items(): # implied lang and proj are not None + # if we end up here, then project and lang are both not None + for sitename, site in config.wiki["sites"].items(): if site["project"] == project and site["lang"] == lang: return _get_site_object_from_dict(sitename, site) e = "Site '{0}:{1}' not found in config.".format(project, lang) diff --git a/wiki/tools/user.py b/wiki/tools/user.py index c12e234..ae19bfe 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -34,7 +34,7 @@ class User(object): if self._exists is None or force: self._load_attributes_from_api() if self._exists is False: - raise UserNotFoundError(self.name) + raise UserNotFoundError("User '{0}' does not exist.".format(self.name)) return getattr(self, attr) def _load_attributes_from_api(self): From ffc63c38f6943ac5a81179043ec6709f8dac2687 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Jul 2011 19:51:25 -0400 Subject: [PATCH 07/19] New additions and changes to wikitools, mostly namespaces. Site: Store namespace information in self._namespaces, a dict where key is a namespace ID and value is a list of matching names and aliases; added _get_namespaces_from_api(), namespaces(), namespace_id_to_name() and namespace_name_to_id(); get_page() and get_category() are smarter; Constants: new module, with 18 variables starting with "NS_" that hold IDs of common namespaces, e.g NS_USER = 2, NS_PROJECT = 4; Exceptions: added NamespaceNotFoundError, raised by Site when bad input is given to namespace_id_to_name() or namespace_name_to_id(); User: self.name -> self._name; new name() method returns name from API; dropping "get" from methods that return just a variable; Category: get_members() -> members(). --- wiki/tools/__init__.py | 1 + wiki/tools/category.py | 2 +- wiki/tools/constants.py | 27 +++++++++++++++ wiki/tools/exceptions.py | 3 ++ wiki/tools/page.py | 2 +- wiki/tools/site.py | 85 ++++++++++++++++++++++++++++++++++++++++++++++-- wiki/tools/user.py | 50 ++++++++++++++++++---------- 7 files changed, 147 insertions(+), 23 deletions(-) create mode 100644 wiki/tools/constants.py diff --git a/wiki/tools/__init__.py b/wiki/tools/__init__.py index 2e64c45..7fb431e 100644 --- a/wiki/tools/__init__.py +++ b/wiki/tools/__init__.py @@ -10,6 +10,7 @@ written by Mr.Z-man, other than a similar purpose. We share no code. Import the toolset with `from wiki import tools`. """ +from wiki.tools.constants import * from wiki.tools.exceptions import * from wiki.tools.functions import * diff --git a/wiki/tools/category.py b/wiki/tools/category.py index 01a3179..f6e301f 100644 --- a/wiki/tools/category.py +++ b/wiki/tools/category.py @@ -7,7 +7,7 @@ class Category(Page): EarwigBot's Wiki Toolset: Category Class """ - def get_members(self, limit=50): + def members(self, limit=50): """ Docstring needed """ diff --git a/wiki/tools/constants.py b/wiki/tools/constants.py new file mode 100644 index 0000000..76a327d --- /dev/null +++ b/wiki/tools/constants.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +""" +EarwigBot's Wiki Toolset: Constants + +This module defines some useful constants. +""" + +# Default namespace IDs +NS_MAIN = 0 +NS_TALK = 1 +NS_USER = 2 +NS_USER_TALK = 3 +NS_PROJECT = 4 +NS_PROJECT_TALK = 5 +NS_FILE = 6 +NS_FILE_TALK = 7 +NS_MEDIAWIKI = 8 +NS_MEDIAWIKI_TALK = 9 +NS_TEMPLATE = 10 +NS_TEMPLATE_TALK = 11 +NS_HELP = 12 +NS_HELP_TALK = 13 +NS_CATEGORY = 14 +NS_CATEGORY_TALK = 15 +NS_SPECIAL = -1 +NS_MEDIA = -2 diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index d628d0d..3e5eaf2 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -13,5 +13,8 @@ class SiteNotFoundError(WikiToolsetError): """A site matching the args given to get_site() could not be found in the config file.""" +class NamespaceNotFoundError(WikiToolsetError): + """A requested namespace name or namespace ID does not exist.""" + class UserNotFoundError(WikiToolsetError): """Attempting to get information about a user that does not exist.""" diff --git a/wiki/tools/page.py b/wiki/tools/page.py index 9dbb7ab..d267674 100644 --- a/wiki/tools/page.py +++ b/wiki/tools/page.py @@ -23,7 +23,7 @@ class Page(object): """ Docstring needed """ - if content is None or force_reload: + if self._content is None or force_reload: params = {"action": "query", "prop": "revisions", "rvprop": "content", "rvlimit": 1, "titles": self.title} result = self.site.api_query(params) diff --git a/wiki/tools/site.py b/wiki/tools/site.py index ea62c77..83a24c9 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -5,6 +5,8 @@ from urllib import urlencode from urllib2 import urlopen from wiki.tools.category import Category +from wiki.tools.constants import * +from wiki.tools.exceptions import NamespaceNotFoundError from wiki.tools.page import Page from wiki.tools.user import User @@ -22,6 +24,37 @@ class Site(object): self.lang = lang self._api = api self._sql = sql + + self._namespaces = None + + def _get_namespaces_from_api(self): + """ + Docstring needed + """ + params = {"action": "query", "meta": "siteinfo", + "siprop": "namespaces|namespacealiases"} + result = self.api_query(params) + + if self._namespaces is None: + self._namespaces = {} + + for namespace in result["query"]["namespaces"].values(): + ns_id = namespace["id"] + name = namespace["*"] + try: + canonical = namespace["canonical"] + except KeyError: + self._namespaces[ns_id] = [name] + else: + if name != canonical: + self._namespaces[ns_id] = [name, canonical] + else: + self._namespaces[ns_id] = [name] + + for namespace in result["query"]["namespacealiases"]: + ns_id = namespace["id"] + alias = namespace["*"] + self._namespaces[ns_id].append(alias) def api_query(self, params): """ @@ -32,19 +65,65 @@ class Site(object): result = urlopen(self._api, data).read() return loads(result) + def namespaces(self): + """ + Docstring needed + """ + if self._namespaces is None: + self._get_namespaces_from_api() + + return self._namespaces + + def namespace_id_to_name(self, ns_id, all=False): + """ + Docstring needed + """ + if self._namespaces is None: + self._get_namespaces_from_api() + + try: + if all: + return self._namespaces[ns_id] + else: + return self._namespaces[ns_id][0] + except KeyError: + e = "There is no namespace with id {0}.".format(ns_id) + raise NamespaceNotFoundError(e) + + def namespace_name_to_id(self, name): + """ + Docstring needed + """ + if self._namespaces is None: + self._get_namespaces_from_api() + + lname = name.lower() + for ns_id, names in self._namespaces.items(): + lnames = [n.lower() for n in names] # be case-insensitive + if lname in lnames: + return ns_id + + e = "There is no namespace with name '{0}'.".format(name) + raise NamespaceNotFoundError(e) + def get_page(self, pagename): """ Docstring needed """ - if pagename.startswith("Category:"): # proper namespace checking! - return get_category(pagename[9:]) + prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True) + prefix = pagename.split(":", 1)[0] + if prefix != pagename: # avoid a page that is simply "Category" + if prefix in prefixes: + return Category(self, pagename) return Page(self, pagename) def get_category(self, catname): """ Docstring needed """ - return Category(self, "Category:" + catname) # namespace checking! + prefix = self.namespace_id_to_name(NS_CATEGORY) + pagename = "{0}:{1}".format(prefix, catname) + return Category(self, pagename) def get_user(self, username): """ diff --git a/wiki/tools/user.py b/wiki/tools/user.py index ae19bfe..8f6e96f 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from wiki.tools.constants import * from wiki.tools.exceptions import UserNotFoundError from wiki.tools.page import Page @@ -12,9 +13,11 @@ class User(object): """ Docstring needed """ - # Public attributes - self.site = site # Site instance, for doing API queries, etc - self.name = name # our username + # Site instance, for doing API queries, etc + self.site = site + + # Username + self._name = name # Attributes filled in by an API query self._exists = None @@ -34,19 +37,20 @@ class User(object): if self._exists is None or force: self._load_attributes_from_api() if self._exists is False: - raise UserNotFoundError("User '{0}' does not exist.".format(self.name)) + e = "User '{0}' does not exist.".format(self._name) + raise UserNotFoundError(e) return getattr(self, attr) def _load_attributes_from_api(self): """ Docstring needed """ - params = {"action": "query", "list": "users", "ususers": self.name, + params = {"action": "query", "list": "users", "ususers": self._name, "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"} result = self.site.api_query(params) # normalize our username in case it was entered oddly - self.name = result["query"]["users"][0]["name"] + self._name = result["query"]["users"][0]["name"] try: self._userid = result["query"]["users"][0]["userid"] @@ -76,68 +80,78 @@ class User(object): except KeyError: self._blockinfo = False + def name(self, force=False): + """ + Docstring needed + """ + return self._get_attribute_from_api("_name", force) + def exists(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_exists", force) - def get_userid(self, force=False): + def userid(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_userid", force) - def get_blockinfo(self, force=False): + def blockinfo(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_blockinfo", force) - def get_groups(self, force=False): + def groups(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_groups", force) - def get_rights(self, force=False): + def rights(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_rights", force) - def get_editcount(self, force=False): + def editcount(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_editcount", force) - def get_registration(self, force=False): + def registration(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_registration", force) - def get_emailable(self, force=False): + def is_emailable(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_emailable", force) - def get_gender(self, force=False): + def gender(self, force=False): """ Docstring needed """ return self._get_attribute_from_api("_gender", force) - def get_userpage(self): + def userpage(self): """ Docstring needed """ - return Page(self.site, "User:" + self.name) # Namespace checking! + prefix = self.site.namespace_id_to_name(NS_USER) + pagename = "{0}:{1}".format(prefix, self._name) + return Page(self.site, pagename) - def get_talkpage(self): + def talkpage(self): """ Docstring needed """ - return Page(self.site, "User talk:" + self.name) # Namespace checking! + prefix = self.site.namespace_id_to_name(NS_USER_TALK) + pagename = "{0}:{1}".format(prefix, self._name) + return Page(self.site, pagename) From a2ceb7a85596fb49f8315aa4149969534ae5be18 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Jul 2011 23:34:16 -0400 Subject: [PATCH 08/19] Update AFCStatus and Rights IRC commands per wikitools updates. --- irc/commands/afc_status.py | 2 +- irc/commands/rights.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/irc/commands/afc_status.py b/irc/commands/afc_status.py index 2b8b880..0f5722e 100644 --- a/irc/commands/afc_status.py +++ b/irc/commands/afc_status.py @@ -87,7 +87,7 @@ class AFCStatus(BaseCommand): def count_submissions(self): """Returns the number of open AFC submissions (count of CAT:PEND).""" cat = self.site.get_category("Pending AfC submissions") - subs = cat.get_members(limit=500) + subs = cat.members(limit=500) subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions return subs diff --git a/irc/commands/rights.py b/irc/commands/rights.py index 6c44227..4289002 100644 --- a/irc/commands/rights.py +++ b/irc/commands/rights.py @@ -27,7 +27,7 @@ class Rights(BaseCommand): username = ' '.join(data.args) site = tools.get_site() user = site.get_user(username) - rights = user.get_groups() + rights = user.groups() if rights: try: rights.remove("*") # remove the implicit '*' group given to everyone From cafa9deeddb24bc7cada7e9ef9531b4df616a7bf Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 28 Jul 2011 16:11:10 -0400 Subject: [PATCH 09/19] More additions to wikitools, mostly in Site. * Site's __init__() takes more args, all optional. As long as enough are provided to do an API query, the missing ones will be filled in automatically by _load_attributes(), which is called in __init__(). * User: _get_attribute_from_api() -> _get_attribute(); _load_attributes_from_api() -> _load_attributes. * Sites in config.json are stored with different keys/values. --- wiki/tools/functions.py | 31 +++++++++++-- wiki/tools/site.py | 118 ++++++++++++++++++++++++++++++++++++++---------- wiki/tools/user.py | 26 +++++------ 3 files changed, 133 insertions(+), 42 deletions(-) diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index d1cc020..178a8e2 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -35,12 +35,26 @@ def _get_site_object_from_dict(name, d): """Return a Site object based on the contents of a dict, probably acquired through our config file, and a separate name. """ - project = d["project"] - lang = d["lang"] try: - api = d["apiURL"] + project = d["project"] except KeyError: - api = None + project = None + try: + lang = d["lang"] + except KeyError: + lang = None + try: + base_url = d["baseURL"] + except KeyError: + base_url = None + try: + article_path = d["articlePath"] + except KeyError: + article_path = None + try: + script_path = d["scriptPath"] + except KeyError: + script_path = None try: sql_server = d["sqlServer"] except KeyError: @@ -49,7 +63,14 @@ def _get_site_object_from_dict(name, d): sql_db = d["sqlDB"] except KeyError: sql_db = None - return Site(name, project, lang, api, (sql_server, sql_db)) + try: + namespaces = d["namespaces"] + except KeyError: + namespaces = None + + return Site(name=name, project=project, lang=lang, base_url=base_url, + article_path=article_path, script_path=script_path, + sql=(sql_server, sql_db), namespaces=namespaces) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. diff --git a/wiki/tools/site.py b/wiki/tools/site.py index 83a24c9..62a2ecc 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -15,29 +15,71 @@ class Site(object): EarwigBot's Wiki Toolset: Site Class """ - def __init__(self, name, project, lang, api=None, sql=(None, None)): + def __init__(self, name=None, project=None, lang=None, base_url=None, + article_path=None, script_path=None, sql=(None, None), + namespaces=None): """ Docstring needed """ - self.name = name - self.project = project - self.lang = lang - self._api = api + self._name = name + self._project = project + self._lang = lang + self._base_url = base_url + self._article_path = article_path + self._script_path = script_path self._sql = sql - - self._namespaces = None + self._namespaces = namespaces + + # get all of the above attributes that were not specified by the user + self._load_attributes() - def _get_namespaces_from_api(self): + def _load_attributes(self, force=False): """ Docstring needed """ - params = {"action": "query", "meta": "siteinfo", - "siprop": "namespaces|namespacealiases"} - result = self.api_query(params) - - if self._namespaces is None: - self._namespaces = {} + # all attributes to be loaded, except _namespaces, which is a special + # case because it requires additional params in the API query + attrs = [self._name, self._project, self._lang, self._base_url, + self._article_path, self._script_path] + + params = {"action": "query", "meta": "siteinfo"} + if self._namespaces is None or force: + params["siprop"] = "general|namespaces|namespacealiases" + result = self.api_query(params) + self._load_namespaces(result) + elif all(attrs): # everything is already specified and we're not told + return # to force a reload, so do nothing + else: # we're only loading attributes other than _namespaces + params["siprop"] = "general" + result = self.api_query(params) + + res = result["query"]["general"] + + if self._name is None or force: + self._name = res["wikiid"] + + if self._project is None or force: + self._project = res["sitename"].lower() + + if self._lang is None or force: + self._lang = res["lang"] + + if self._base_url is None or force: + self._base_url = res["server"] + + if self._article_path is None or force: + self._article_path = res["articlepath"] + + if self._script_path is None or force: + self._script_path = res["scriptpath"] + + def _load_namespaces(self, result): + """ + Docstring needed + """ + self._namespaces = {} + for namespace in result["query"]["namespaces"].values(): ns_id = namespace["id"] name = namespace["*"] @@ -60,27 +102,58 @@ class Site(object): """ Docstring needed """ + url = ''.join((self._base_url, self._script_path, "/api.php")) params["format"] = "json" data = urlencode(params) - result = urlopen(self._api, data).read() + result = urlopen(url, data).read() return loads(result) + def name(self): + """ + Docstring needed + """ + return self._name + + def project(self): + """ + Docstring needed + """ + return self._project + + def lang(self): + """ + Docstring needed + """ + return self._lang + + def base_url(self): + """ + Docstring needed + """ + return self._base_url + + def article_path(self): + """ + Docstring needed + """ + return self._article_path + + def script_path(self): + """ + Docstring needed + """ + return self._script_path + def namespaces(self): """ Docstring needed """ - if self._namespaces is None: - self._get_namespaces_from_api() - return self._namespaces def namespace_id_to_name(self, ns_id, all=False): """ Docstring needed """ - if self._namespaces is None: - self._get_namespaces_from_api() - try: if all: return self._namespaces[ns_id] @@ -94,9 +167,6 @@ class Site(object): """ Docstring needed """ - if self._namespaces is None: - self._get_namespaces_from_api() - lname = name.lower() for ns_id, names in self._namespaces.items(): lnames = [n.lower() for n in names] # be case-insensitive diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 8f6e96f..b406b97 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -30,18 +30,18 @@ class User(object): self._emailable = None self._gender = None - def _get_attribute_from_api(self, attr, force): + def _get_attribute(self, attr, force): """ Docstring needed """ if self._exists is None or force: - self._load_attributes_from_api() + self._load_attributes() if self._exists is False: e = "User '{0}' does not exist.".format(self._name) raise UserNotFoundError(e) return getattr(self, attr) - def _load_attributes_from_api(self): + def _load_attributes(self): """ Docstring needed """ @@ -84,61 +84,61 @@ class User(object): """ Docstring needed """ - return self._get_attribute_from_api("_name", force) + return self._get_attribute("_name", force) def exists(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_exists", force) + return self._get_attribute("_exists", force) def userid(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_userid", force) + return self._get_attribute("_userid", force) def blockinfo(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_blockinfo", force) + return self._get_attribute("_blockinfo", force) def groups(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_groups", force) + return self._get_attribute("_groups", force) def rights(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_rights", force) + return self._get_attribute("_rights", force) def editcount(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_editcount", force) + return self._get_attribute("_editcount", force) def registration(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_registration", force) + return self._get_attribute("_registration", force) def is_emailable(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_emailable", force) + return self._get_attribute("_emailable", force) def gender(self, force=False): """ Docstring needed """ - return self._get_attribute_from_api("_gender", force) + return self._get_attribute("_gender", force) def userpage(self): """ From 74ddc5b702d19375a407b6c87fd34f0447ba0fb3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 29 Jul 2011 02:31:01 -0400 Subject: [PATCH 10/19] More work on wikitools, now with improved API queries and login. * Site's api_query() is much smarter. It uses a custom urllib2 URL opener with cookie support and catches URLErrors, raising its own brand new exception (SiteAPIError) when something is wrong. * The opener now uses a custom User-Agent, which is a constant in wiki.tools.constants. * Site instances automatically login via _login(), which accepts a username and password (provided via config by get_site()) and uses two api_query()s and stores the login data as cookies in self._cookiejar. Login data is not preserved between bot restarts yet. Login errors, e.g. a bad password or username, raise the new LoginError. * Site's get_user()'s username argument is now optional. If left blank, will return the current logged-in user, provided by an API query. * Misc cleanup throughout. --- wiki/tools/constants.py | 10 ++++++- wiki/tools/exceptions.py | 8 ++++++ wiki/tools/functions.py | 18 +++++++++--- wiki/tools/site.py | 74 ++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 97 insertions(+), 13 deletions(-) diff --git a/wiki/tools/constants.py b/wiki/tools/constants.py index 76a327d..6397c5d 100644 --- a/wiki/tools/constants.py +++ b/wiki/tools/constants.py @@ -3,9 +3,17 @@ """ EarwigBot's Wiki Toolset: Constants -This module defines some useful constants. +This module defines some useful constants, such as default namespace IDs for +easy lookup and our user agent. + +Import with `from wiki.tools.constants import *`. """ +import platform + +# User agent when making API queries +USER_AGENT = "EarwigBot/0.1-dev (Python/{0}; https://github.com/earwig/earwigbot)".format(platform.python_version()) + # Default namespace IDs NS_MAIN = 0 NS_TALK = 1 diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index 3e5eaf2..0620262 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -13,6 +13,14 @@ class SiteNotFoundError(WikiToolsetError): """A site matching the args given to get_site() could not be found in the config file.""" +class SiteAPIError(WikiToolsetError): + """We couldn't connect to a site's API, perhaps because the server doesn't + exist, our URL is wrong, or they're having temporary problems.""" + +class LoginError(WikiToolsetError): + """An error occured while trying to login. Perhaps the username/password is + incorrect.""" + class NamespaceNotFoundError(WikiToolsetError): """A requested namespace name or namespace ID does not exist.""" diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index 178a8e2..2618a57 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -67,10 +67,14 @@ def _get_site_object_from_dict(name, d): namespaces = d["namespaces"] except KeyError: namespaces = None + try: + login = (config.wiki["username"], config.wiki["password"]) + except KeyError: + login = (None, None) return Site(name=name, project=project, lang=lang, base_url=base_url, article_path=article_path, script_path=script_path, - sql=(sql_server, sql_db), namespaces=namespaces) + sql=(sql_server, sql_db), namespaces=namespaces, login=login) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. @@ -86,6 +90,10 @@ def get_site(name=None, project=None, lang=None): member of config.wiki["sites"], `s`, for which s["project"] == project and s["lang"] == lang. + We will attempt to login to the site automatically + using config.wiki["username"] and config.wiki["password"] if both are + defined. + Specifying a project without a lang or a lang without a project will raise TypeError. If all three args are specified, `name` will be first tried, then `project` and `lang`. If, with any number of args, a site cannot be @@ -96,7 +104,8 @@ def get_site(name=None, project=None, lang=None): _load_config() # someone specified a project without a lang (or a lang without a project)! - if (project is None and lang is not None) or (project is not None and lang is None): + if (project is None and lang is not None) or (project is not None and + lang is None): e = "Keyword arguments 'lang' and 'project' must be specified together." raise TypeError(e) @@ -120,13 +129,14 @@ def get_site(name=None, project=None, lang=None): try: site = config.wiki["sites"][name] except KeyError: - if project is None: # implies lang is None, i.e., only name was given + if project is None: # implies lang is None, so only name was given e = "Site '{0}' not found in config.".format(name) raise SiteNotFoundError(e) for sitename, site in config.wiki["sites"].items(): if site["project"] == project and site["lang"] == lang: return _get_site_object_from_dict(sitename, site) - e = "Neither site '{0}' nor site '{1}:{2}' found in config.".format(name, project, lang) + e = "Neither site '{0}' nor site '{1}:{2}' found in config." + e.format(name, project, lang) raise SiteNotFoundError(e) else: return _get_site_object_from_dict(name, site) diff --git a/wiki/tools/site.py b/wiki/tools/site.py index 62a2ecc..f4b854f 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- +from cookielib import CookieJar from json import loads from urllib import urlencode -from urllib2 import urlopen +from urllib2 import build_opener, HTTPCookieProcessor, URLError from wiki.tools.category import Category from wiki.tools.constants import * -from wiki.tools.exceptions import NamespaceNotFoundError +from wiki.tools.exceptions import * from wiki.tools.page import Page from wiki.tools.user import User @@ -17,10 +18,12 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=(None, None), - namespaces=None): + namespaces=None, login=(None, None)): """ Docstring needed """ + # attributes referring to site information, filled in by an API query + # if they are missing (and an API url is available) self._name = name self._project = project self._lang = lang @@ -30,9 +33,45 @@ class Site(object): self._sql = sql self._namespaces = namespaces - # get all of the above attributes that were not specified by the user + # set up cookiejar and URL opener for making API queries + self._cookiejar = CookieJar(cookie_file) + self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) + self._opener.addheaders = [('User-agent', USER_AGENT)] + + # use a username and password to login if they were provided + if login[0] is not None and login[1] is not None: + self._login(login[0], login[1]) + + # get all of the above attributes that were not specified as arguments self._load_attributes() + def _login(self, name, password, token="", attempt=0): + """ + Docstring needed + """ + params = {"action": "login", "lgname": name, "lgpassword": password, + "lgtoken": token} + result = self.api_query(params) + res = result["login"]["result"] + + if res == "Success": + return + elif res == "NeedToken" and attempt == 0: + token = result["login"]["token"] + return self._login(name, password, token, attempt=1) + else: + if res == "Illegal": + e = "The provided username is illegal." + elif res == "NotExists": + e = "The provided username does not exist." + elif res == "EmptyPass": + e = "No password was given." + elif res == "WrongPass" or res == "WrongPluginPass": + e = "The given password is incorrect." + else: + e = "Couldn't login; server says '{0}'.".format(res) + raise LoginError(e) + def _load_attributes(self, force=False): """ Docstring needed @@ -103,10 +142,24 @@ class Site(object): Docstring needed """ url = ''.join((self._base_url, self._script_path, "/api.php")) - params["format"] = "json" + params["format"] = "json" # this is the only format we understand data = urlencode(params) - result = urlopen(url, data).read() - return loads(result) + + try: + response = self._opener.open(url, data) + except URLError as error: + if hasattr(error, "reason"): + e = "API query at {0} failed because {1}.".format(error.geturl, + error.reason) + elif hasattr(error, "code"): + e = "API query at {0} failed; got an error code of {1}." + e = e.format(error.geturl, error.code) + else: + e = "API query failed." + raise SiteAPIError(e) + else: + result = response.read() + return loads(result) # parse as a JSON object def name(self): """ @@ -195,8 +248,13 @@ class Site(object): pagename = "{0}:{1}".format(prefix, catname) return Category(self, pagename) - def get_user(self, username): + def get_user(self, username=None): """ Docstring needed """ + if username is None: + params = {"action": "query", "meta": "userinfo"} + result = self.api_query(params) + username = result["query"]["userinfo"]["name"] + return User(self, username) From 575e975930269645002d5aa57678069ad3403fb6 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 29 Jul 2011 02:44:04 -0400 Subject: [PATCH 11/19] Bugfix in user.name() and user.exists(). --- wiki/tools/user.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wiki/tools/user.py b/wiki/tools/user.py index b406b97..94a46d4 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -30,13 +30,13 @@ class User(object): self._emailable = None self._gender = None - def _get_attribute(self, attr, force): + def _get_attribute(self, attr, force, raise_exception=True): """ Docstring needed """ if self._exists is None or force: self._load_attributes() - if self._exists is False: + if self._exists is False and raise_exception: e = "User '{0}' does not exist.".format(self._name) raise UserNotFoundError(e) return getattr(self, attr) @@ -84,13 +84,13 @@ class User(object): """ Docstring needed """ - return self._get_attribute("_name", force) + return self._get_attribute("_name", force, raise_exception=False) def exists(self, force=False): """ Docstring needed """ - return self._get_attribute("_exists", force) + return self._get_attribute("_exists", force, raise_exception=False) def userid(self, force=False): """ From c7bbb211179e343b640db6881726bcc407449607 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 29 Jul 2011 03:42:44 -0400 Subject: [PATCH 12/19] Improvements to Site and User by removing unnecessary API queries. * Site: New _get_logged_in_user() method, name self-explanatory. This acts as a replacement for the former crud in get_user(), which now calls this when the username arg is None. This method will first try to determine our username based on a special cookie in self._cookiejar (cookie.name is self._name + "UserName", e.g. "enwikiUserName"), and will only do an API query if no cookie was found. This removes an API query that is usually only necessary if we are not logged in. * Site: silly bugfix in __init__(). * User: Reverted earlier change to _get_attribute() (addition of raise_exception arg); name() and exists() now use their own code, which is simpler. * User: Calling name() does not do an API query unless force=True, unlike the other "get" methods. * User: .join() instead of .format() because I feel it looks cleaner and is probably more efficient. --- wiki/tools/site.py | 28 ++++++++++++++++++++++------ wiki/tools/user.py | 16 ++++++++++------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/wiki/tools/site.py b/wiki/tools/site.py index f4b854f..f32e3c2 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -2,8 +2,9 @@ from cookielib import CookieJar from json import loads -from urllib import urlencode +from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError +from urlparse import urlparse from wiki.tools.category import Category from wiki.tools.constants import * @@ -34,7 +35,7 @@ class Site(object): self._namespaces = namespaces # set up cookiejar and URL opener for making API queries - self._cookiejar = CookieJar(cookie_file) + self._cookiejar = CookieJar() self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) self._opener.addheaders = [('User-agent', USER_AGENT)] @@ -72,6 +73,24 @@ class Site(object): e = "Couldn't login; server says '{0}'.".format(res) raise LoginError(e) + def _get_logged_in_user(self): + """ + Docstring needed + """ + # first try to get username from the cookie jar to avoid an + # unnecessary API query + cookie_name = ''.join((self._name, "UserName")) + cookie_domain = urlparse(self._base_url).netloc + for cookie in self._cookiejar: + if cookie.name == cookie_name and cookie.domain == cookie_domain: + return unquote_plus(cookie.value) + + # if we end up here, we're probably an anon and thus an API query + # will be required to get our username + params = {"action": "query", "meta": "userinfo"} + result = self.api_query(params) + return result["query"]["userinfo"]["name"] + def _load_attributes(self, force=False): """ Docstring needed @@ -253,8 +272,5 @@ class Site(object): Docstring needed """ if username is None: - params = {"action": "query", "meta": "userinfo"} - result = self.api_query(params) - username = result["query"]["userinfo"]["name"] - + username = self._get_logged_in_user() return User(self, username) diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 94a46d4..98f9670 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -30,13 +30,13 @@ class User(object): self._emailable = None self._gender = None - def _get_attribute(self, attr, force, raise_exception=True): + def _get_attribute(self, attr, force): """ Docstring needed """ if self._exists is None or force: self._load_attributes() - if self._exists is False and raise_exception: + if self._exists is False: e = "User '{0}' does not exist.".format(self._name) raise UserNotFoundError(e) return getattr(self, attr) @@ -84,13 +84,17 @@ class User(object): """ Docstring needed """ - return self._get_attribute("_name", force, raise_exception=False) + if force: + self._load_attributes() + return self._name def exists(self, force=False): """ Docstring needed """ - return self._get_attribute("_exists", force, raise_exception=False) + if self._exists is None or force: + self._load_attributes() + return self._exists def userid(self, force=False): """ @@ -145,7 +149,7 @@ class User(object): Docstring needed """ prefix = self.site.namespace_id_to_name(NS_USER) - pagename = "{0}:{1}".format(prefix, self._name) + pagename = ''.join((prefix, ":", self._name)) return Page(self.site, pagename) def talkpage(self): @@ -153,5 +157,5 @@ class User(object): Docstring needed """ prefix = self.site.namespace_id_to_name(NS_USER_TALK) - pagename = "{0}:{1}".format(prefix, self._name) + pagename = ''.join((prefix, ":", self._name)) return Page(self.site, pagename) From 612c9c8ff6999b14c89a534a7249e55b3984d41b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 31 Jul 2011 18:37:59 -0400 Subject: [PATCH 13/19] Major improvements to cookies, login/logout, and crosswiki support. * Exceptions: New PermissionsError; reworded docstring of SiteAPIError. * Site: __init__() accepts an optional cookiejar parameter, otherwise we use CookieJar(). Added five new cookie/username-related methods. Only login from __init__() if we are missing valid login cookies and a user/ pass was provided. _login() and _logout() both try to save cookies via _save_cookiejar(). _load_attributes() automatically refreshes all attributes other than namespaces if at least one is missing, instead of only the missing ones. api_query() raises SiteAPIError if either self._base_url or self._script_path is missing. Removed some pointless methods and renamed one; added domain(). * Functions: _get_site_object_from_dict() is cleaner, adds our cookiejar to Site instances using _get_cookiejar() to load a LWPCookieJar() object from the ".cookies" file in our project root. The same cookiejar is returned for every site, enabling crosswiki login, via a global variable. * User: Renamed some methods. * .gitignore: Added .cookies file. --- .gitignore | 3 + wiki/tools/exceptions.py | 8 +- wiki/tools/functions.py | 94 ++++++++++------- wiki/tools/site.py | 260 ++++++++++++++++++++++++++++++----------------- wiki/tools/user.py | 6 +- 5 files changed, 235 insertions(+), 136 deletions(-) diff --git a/.gitignore b/.gitignore index bc67eea..1884197 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ # Ignore bot-specific config file: config.json +# Ignore cookies file: +.cookies + # Ignore OS X's crud: *.DS_Store diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index 0620262..d28cac2 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -15,12 +15,18 @@ class SiteNotFoundError(WikiToolsetError): class SiteAPIError(WikiToolsetError): """We couldn't connect to a site's API, perhaps because the server doesn't - exist, our URL is wrong, or they're having temporary problems.""" + exist, our URL is wrong or incomplete, or they're having temporary + problems.""" class LoginError(WikiToolsetError): """An error occured while trying to login. Perhaps the username/password is incorrect.""" +class PermissionsError(WikiToolsetError): + """We tried to do something we don't have permission to, like a non-admin + trying to delete a page, or trying to edit a page when no login information + was provided.""" + class NamespaceNotFoundError(WikiToolsetError): """A requested namespace name or namespace ID does not exist.""" diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index 2618a57..ff69c19 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -10,7 +10,11 @@ There's no need to import this module explicitly. All functions here are automatically available from wiki.tools. """ +from cookielib import LWPCookieJar, LoadError +import errno from getpass import getpass +from os import chmod, path +import stat from core import config from wiki.tools.exceptions import SiteNotFoundError @@ -18,6 +22,8 @@ from wiki.tools.site import Site __all__ = ["get_site"] +_cookiejar = None + def _load_config(): """Called by a config-requiring function, such as get_site(), when config has not been loaded. This will usually happen only if we're running code @@ -31,50 +37,60 @@ def _load_config(): else: config.parse_config(None) +def _get_cookiejar(): + """Returns a LWPCookieJar object loaded from our .cookies file. The same + one is returned every time. + + The .cookies file is located in the project root, same directory as + config.json and earwigbot.py. If it doesn't exist, we will create the file + and set it to be readable and writeable only by us. If it exists but the + information inside is bogus, we will ignore it. + + This is normally called by _get_site_object_from_dict() (in turn called by + get_site()), and the cookiejar is passed to our Site's constructor, used + when it makes API queries. This way, we can easily preserve cookies between + sites (e.g., for CentralAuth), making logins easier. + """ + global _cookiejar + if _cookiejar is not None: + return _cookiejar + + cookie_file = path.join(config.root_dir, ".cookies") + _cookiejar = LWPCookieJar(cookie_file) + + try: + _cookiejar.load() + except LoadError: + # file contains bad data, so ignore it completely + pass + except IOError as e: + if e.errno == errno.ENOENT: # "No such file or directory" + # create the file and restrict reading/writing only to the owner, + # so others can't peak at our cookies + open(cookie_file, "w").close() + chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR) + else: + raise + + return _cookiejar + def _get_site_object_from_dict(name, d): """Return a Site object based on the contents of a dict, probably acquired through our config file, and a separate name. """ - try: - project = d["project"] - except KeyError: - project = None - try: - lang = d["lang"] - except KeyError: - lang = None - try: - base_url = d["baseURL"] - except KeyError: - base_url = None - try: - article_path = d["articlePath"] - except KeyError: - article_path = None - try: - script_path = d["scriptPath"] - except KeyError: - script_path = None - try: - sql_server = d["sqlServer"] - except KeyError: - sql_server = None - try: - sql_db = d["sqlDB"] - except KeyError: - sql_db = None - try: - namespaces = d["namespaces"] - except KeyError: - namespaces = None - try: - login = (config.wiki["username"], config.wiki["password"]) - except KeyError: - login = (None, None) + project = d.get("project") + lang = d.get("lang") + base_url = d.get("baseURL") + article_path = d.get("articlePath") + script_path = d.get("scriptPath") + sql = (d.get("sqlServer"), d.get("sqlDB")) + namespaces = d.get("namespaces") + login = (config.wiki.get("username"), config.wiki.get("password")) + cookiejar = _get_cookiejar() return Site(name=name, project=project, lang=lang, base_url=base_url, - article_path=article_path, script_path=script_path, - sql=(sql_server, sql_db), namespaces=namespaces, login=login) + article_path=article_path, script_path=script_path, sql=sql, + namespaces=namespaces, login=login, cookiejar=cookiejar) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. @@ -112,7 +128,7 @@ def get_site(name=None, project=None, lang=None): # no args given, so return our default site (project is None implies lang # is None, so we don't need to add that in) if name is None and project is None: - try: # ...so use the default site + try: default = config.wiki["defaultSite"] except KeyError: e = "Default site is not specified in config." diff --git a/wiki/tools/site.py b/wiki/tools/site.py index f32e3c2..982cd30 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -2,6 +2,7 @@ from cookielib import CookieJar from json import loads +from re import escape as re_escape, match as re_match from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse @@ -19,12 +20,12 @@ class Site(object): def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=(None, None), - namespaces=None, login=(None, None)): + namespaces=None, login=(None, None), cookiejar=None): """ Docstring needed """ # attributes referring to site information, filled in by an API query - # if they are missing (and an API url is available) + # if they are missing (and an API url can be determined) self._name = name self._project = project self._lang = lang @@ -35,61 +36,22 @@ class Site(object): self._namespaces = namespaces # set up cookiejar and URL opener for making API queries - self._cookiejar = CookieJar() + if cookiejar is not None: + self._cookiejar = cookiejar + else: + self._cookiejar = CookieJar() self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) self._opener.addheaders = [('User-agent', USER_AGENT)] - # use a username and password to login if they were provided - if login[0] is not None and login[1] is not None: - self._login(login[0], login[1]) - # get all of the above attributes that were not specified as arguments self._load_attributes() - def _login(self, name, password, token="", attempt=0): - """ - Docstring needed - """ - params = {"action": "login", "lgname": name, "lgpassword": password, - "lgtoken": token} - result = self.api_query(params) - res = result["login"]["result"] - - if res == "Success": - return - elif res == "NeedToken" and attempt == 0: - token = result["login"]["token"] - return self._login(name, password, token, attempt=1) - else: - if res == "Illegal": - e = "The provided username is illegal." - elif res == "NotExists": - e = "The provided username does not exist." - elif res == "EmptyPass": - e = "No password was given." - elif res == "WrongPass" or res == "WrongPluginPass": - e = "The given password is incorrect." - else: - e = "Couldn't login; server says '{0}'.".format(res) - raise LoginError(e) - - def _get_logged_in_user(self): - """ - Docstring needed - """ - # first try to get username from the cookie jar to avoid an - # unnecessary API query - cookie_name = ''.join((self._name, "UserName")) - cookie_domain = urlparse(self._base_url).netloc - for cookie in self._cookiejar: - if cookie.name == cookie_name and cookie.domain == cookie_domain: - return unquote_plus(cookie.value) - - # if we end up here, we're probably an anon and thus an API query - # will be required to get our username - params = {"action": "query", "meta": "userinfo"} - result = self.api_query(params) - return result["query"]["userinfo"]["name"] + # if we have a name/pass and the API says we're not logged in, log in + self._login_info = name, password = login + if name is not None and password is not None: + logged_in_as = self._get_username_from_cookies() + if logged_in_as is None or name != logged_in_as: + self._login(login) def _load_attributes(self, force=False): """ @@ -101,7 +63,7 @@ class Site(object): self._article_path, self._script_path] params = {"action": "query", "meta": "siteinfo"} - + if self._namespaces is None or force: params["siprop"] = "general|namespaces|namespacealiases" result = self.api_query(params) @@ -113,24 +75,12 @@ class Site(object): result = self.api_query(params) res = result["query"]["general"] - - if self._name is None or force: - self._name = res["wikiid"] - - if self._project is None or force: - self._project = res["sitename"].lower() - - if self._lang is None or force: - self._lang = res["lang"] - - if self._base_url is None or force: - self._base_url = res["server"] - - if self._article_path is None or force: - self._article_path = res["articlepath"] - - if self._script_path is None or force: - self._script_path = res["scriptpath"] + self._name = res["wikiid"] + self._project = res["sitename"].lower() + self._lang = res["lang"] + self._base_url = res["server"] + self._article_path = res["articlepath"] + self._script_path = res["scriptpath"] def _load_namespaces(self, result): """ @@ -156,20 +106,162 @@ class Site(object): alias = namespace["*"] self._namespaces[ns_id].append(alias) + def _get_cookie(self, name, domain): + """Return the cookie `name` in `domain`, unless it is expired. Return + None if no cookie was found. + """ + for cookie in self._cookiejar: + if cookie.name == name and cookie.domain == domain: + if cookie.is_expired(): + break + return cookie + return None + + def _get_username_from_cookies(self): + """Try to return our username based solely on cookies. + + First, we'll look for a cookie named self._name + "Token", like + "enwikiToken". If it exists and isn't expired, we'll assume it's valid + and try to return the value of the cookie self._name + "UserName" (like + "enwikiUserName"). This should work fine on wikis without single-user + login. + + If `enwikiToken` doesn't exist, we'll try to find a cookie named + `centralauth_Token`. If this exists and is not expired, we'll try to + return the value of `centralauth_User`. + + If we didn't get any matches, we'll return None. Our goal here isn't to + return the most likely username, or what we *want* our username to be + (for that, we'd do self._login_info[0]), but rather to get our current + username without an unnecessary ?action=query&meta=userinfo API query. + """ + domain = self.domain() + name = ''.join((self._name, "Token")) + cookie = self._get_cookie(name, domain) + + if cookie is not None: + name = ''.join((self._name, "UserName")) + user_name = self._get_cookie(name, domain) + if user_name is not None: + return user_name.value + + name = "centralauth_Token" + for cookie in self._cookiejar: + if cookie.domain_initial_dot is False or cookie.is_expired(): + continue + if cookie.name != name: + continue + # build a regex that will match domains this cookie affects + search = ''.join(("(.*?)", re_escape(cookie.domain))) + if re_match(search, domain): # test it against our site + user_name = self._get_cookie("centralauth_User", cookie.domain) + if user_name is not None: + return user_name.value + + return None + + def _get_username_from_api(self): + """Do a simple API query to get our username and return it. + + This is a reliable way to make sure we are actually logged in, because + it doesn't deal with annoying cookie logic, but it results in an API + query that is unnecessary in many cases. + + Called by _get_username() (in turn called by get_user() with no + username argument) when cookie lookup fails, probably indicating that + we are logged out. + """ + params = {"action": "query", "meta": "userinfo"} + result = self.api_query(params) + return result["query"]["userinfo"]["name"] + + def _get_username(self): + """Return the name of the current user, whether logged in or not. + + First, we'll try to deduce it solely from cookies, to avoid an + unnecessary API query. For the cookie-detection method, see + _get_username_from_cookies()'s docs. + + If our username isn't in cookies, then we're probably not logged in, or + something fishy is going on (like forced logout). In this case, do a + single API query for our username (or IP address) and return that. + """ + name = self._get_username_from_cookies() + if name is not None: + return name + return self._get_username_from_api() + + def _save_cookiejar(self): + """Try to save our cookiejar after doing a (normal) login or logout. + + Calls the standard .save() method with no filename. Don't fret if our + cookiejar doesn't support saving (CookieJar raises AttributeError, + FileCookieJar raises NotImplementedError) or no default filename was + given (LWPCookieJar and MozillaCookieJar raise ValueError). + """ + try: + self._cookiejar.save() + except (AttributeError, NotImplementedError, ValueError): + pass + + def _login(self, login, token=None, attempt=0): + """ + Docstring needed + """ + name, password = login + params = {"action": "login", "lgname": name, "lgpassword": password} + if token is not None: + params["lgtoken"] = token + result = self.api_query(params) + res = result["login"]["result"] + + if res == "Success": + self._save_cookiejar() + elif res == "NeedToken" and attempt == 0: + token = result["login"]["token"] + return self._login(login, token, attempt=1) + else: + if res == "Illegal": + e = "The provided username is illegal." + elif res == "NotExists": + e = "The provided username does not exist." + elif res == "EmptyPass": + e = "No password was given." + elif res == "WrongPass" or res == "WrongPluginPass": + e = "The given password is incorrect." + else: + e = "Couldn't login; server says '{0}'.".format(res) + raise LoginError(e) + + def _logout(self): + """ + Docstring needed + """ + params = {"action": "logout"} + self.api_query(params) + self._cookiejar.clear() + self._save_cookiejar() + def api_query(self, params): """ Docstring needed """ + if self._base_url is None or self._script_path is None: + e = "Tried to do an API query, but no API URL is known." + raise SiteAPIError(e) + url = ''.join((self._base_url, self._script_path, "/api.php")) params["format"] = "json" # this is the only format we understand data = urlencode(params) + print url, data # debug code + try: response = self._opener.open(url, data) except URLError as error: if hasattr(error, "reason"): - e = "API query at {0} failed because {1}.".format(error.geturl, - error.reason) + e = "API query at {0} failed because {1}." + e = e.format(error.geturl, error.reason) elif hasattr(error, "code"): e = "API query at {0} failed; got an error code of {1}." e = e.format(error.geturl, error.code) @@ -198,29 +290,11 @@ class Site(object): """ return self._lang - def base_url(self): - """ - Docstring needed - """ - return self._base_url - - def article_path(self): - """ - Docstring needed - """ - return self._article_path - - def script_path(self): - """ - Docstring needed - """ - return self._script_path - - def namespaces(self): + def domain(self): """ Docstring needed """ - return self._namespaces + return urlparse(self._base_url).netloc def namespace_id_to_name(self, ns_id, all=False): """ @@ -272,5 +346,5 @@ class Site(object): Docstring needed """ if username is None: - username = self._get_logged_in_user() + username = self._get_username() return User(self, username) diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 98f9670..be71515 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -132,7 +132,7 @@ class User(object): """ return self._get_attribute("_registration", force) - def is_emailable(self, force=False): + def emailable(self, force=False): """ Docstring needed """ @@ -144,7 +144,7 @@ class User(object): """ return self._get_attribute("_gender", force) - def userpage(self): + def get_userpage(self): """ Docstring needed """ @@ -152,7 +152,7 @@ class User(object): pagename = ''.join((prefix, ":", self._name)) return Page(self.site, pagename) - def talkpage(self): + def get_talkpage(self): """ Docstring needed """ From 77c541a5133af8fd3b4d77383cb38ee61dfb0b0f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 31 Jul 2011 23:22:41 -0400 Subject: [PATCH 14/19] Accept gzipped data and decompress it in api_query(). --- wiki/tools/site.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/wiki/tools/site.py b/wiki/tools/site.py index 982cd30..db3b7d2 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- from cookielib import CookieJar +from gzip import GzipFile from json import loads from re import escape as re_escape, match as re_match +from StringIO import StringIO from urllib import unquote_plus, urlencode from urllib2 import build_opener, HTTPCookieProcessor, URLError from urlparse import urlparse @@ -41,7 +43,8 @@ class Site(object): else: self._cookiejar = CookieJar() self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) - self._opener.addheaders = [('User-agent', USER_AGENT)] + self._opener.addheaders = [("User-Agent", USER_AGENT), + ("Accept-Encoding", "gzip")] # get all of the above attributes that were not specified as arguments self._load_attributes() @@ -270,6 +273,10 @@ class Site(object): raise SiteAPIError(e) else: result = response.read() + if response.headers.get("Content-Encoding") == "gzip": + stream = StringIO(result) + gzipper = GzipFile(fileobj=stream) + result = gzipper.read() return loads(result) # parse as a JSON object def name(self): From 434863dcd1478e59ff475414e032289397057a6d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 2 Aug 2011 04:45:19 -0400 Subject: [PATCH 15/19] Docstrings for everything in Site; some cleanup in Site/Functions. --- wiki/tools/functions.py | 12 +++- wiki/tools/site.py | 156 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 123 insertions(+), 45 deletions(-) diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index ff69c19..bc7b187 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -89,8 +89,8 @@ def _get_site_object_from_dict(name, d): cookiejar = _get_cookiejar() return Site(name=name, project=project, lang=lang, base_url=base_url, - article_path=article_path, script_path=script_path, sql=sql, - namespaces=namespaces, login=login, cookiejar=cookiejar) + article_path=article_path, script_path=script_path, sql=sql, + namespaces=namespaces, login=login, cookiejar=cookiejar) def get_site(name=None, project=None, lang=None): """Returns a Site instance based on information from our config file. @@ -163,3 +163,11 @@ def get_site(name=None, project=None, lang=None): return _get_site_object_from_dict(sitename, site) e = "Site '{0}:{1}' not found in config.".format(project, lang) raise SiteNotFoundError(e) + +def add_site(): + """STUB: config editing is required first""" + pass + +def del_site(): + """STUB: config editing is required first""" + pass diff --git a/wiki/tools/site.py b/wiki/tools/site.py index db3b7d2..65ed9b5 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -18,13 +18,32 @@ from wiki.tools.user import User class Site(object): """ EarwigBot's Wiki Toolset: Site Class + + Represents a Site, with support for API queries and returning Pages, Users, + and Categories. The constructor takes a bunch of arguments and you probably + won't need to call it directly, rather tools.get_site() for returning Site + instances, tools.add_site() for adding new ones to config, and + tools.del_site() for removing old ones from config, should suffice. """ def __init__(self, name=None, project=None, lang=None, base_url=None, - article_path=None, script_path=None, sql=(None, None), - namespaces=None, login=(None, None), cookiejar=None): - """ - Docstring needed + article_path=None, script_path=None, sql=(None, None), + namespaces=None, login=(None, None), cookiejar=None): + """Constructor for new Site instances. + + This probably isn't necessary to call yourself unless you're building a + Site that's not in your config and you don't want to add it - normally + all you need is tools.get_site(name), which creates the Site for you + based on your config file. We accept a bunch of kwargs, but the only + ones you really "need" are `base_url` and `script_path` - this is + enough to figure out an API url. `login`, a tuple of + (username, password), is highly recommended. `cookiejar` will be used + to store cookies, and we'll use a normal CookieJar if none is given. + + First, we'll store the given arguments as attributes, then set up our + URL opener. We'll load any of the attributes that weren't given from + the API, and then log in if a username/pass was given and we aren't + already logged in. """ # attributes referring to site information, filled in by an API query # if they are missing (and an API url can be determined) @@ -57,8 +76,14 @@ class Site(object): self._login(login) def _load_attributes(self, force=False): - """ - Docstring needed + """Load data about our Site from the API. + + This function is called by __init__() when one of the site attributes + was not given as a keyword argument. We'll do an API query to get the + missing data, but only if there actually *is* missing data. + + Additionally, you can call this with `force=True` to forcibly reload + all attributes. """ # all attributes to be loaded, except _namespaces, which is a special # case because it requires additional params in the API query @@ -86,8 +111,10 @@ class Site(object): self._script_path = res["scriptpath"] def _load_namespaces(self, result): - """ - Docstring needed + """Fill self._namespaces with a dict of namespace IDs and names. + + Called by _load_attributes() with API data as `result` when + self._namespaces was not given as an kwarg to __init__(). """ self._namespaces = {} @@ -110,15 +137,12 @@ class Site(object): self._namespaces[ns_id].append(alias) def _get_cookie(self, name, domain): - """Return the cookie `name` in `domain`, unless it is expired. Return - None if no cookie was found. - """ + """Return the named cookie unless it is expired or doesn't exist.""" for cookie in self._cookiejar: if cookie.name == name and cookie.domain == domain: if cookie.is_expired(): break return cookie - return None def _get_username_from_cookies(self): """Try to return our username based solely on cookies. @@ -161,14 +185,12 @@ class Site(object): if user_name is not None: return user_name.value - return None - def _get_username_from_api(self): """Do a simple API query to get our username and return it. This is a reliable way to make sure we are actually logged in, because it doesn't deal with annoying cookie logic, but it results in an API - query that is unnecessary in many cases. + query that is unnecessary in some cases. Called by _get_username() (in turn called by get_user() with no username argument) when cookie lookup fails, probably indicating that @@ -208,8 +230,24 @@ class Site(object): pass def _login(self, login, token=None, attempt=0): - """ - Docstring needed + """Safely login through the API. + + Normally, this is called by __init__() if a username and password have + been provided and no valid login cookies were found. The only other + time it needs to be called is when those cookies expire, which is done + automatically by api_query() if a query fails. + + Recent versions of MediaWiki's API have fixed a CSRF vulnerability, + requiring login to be done in two separate requests. If the response + from from our initial request is "NeedToken", we'll do another one with + the token. If login is successful, we'll try to save our cookiejar. + + Raises LoginError on login errors (duh), like bad passwords and + nonexistent usernames. + + `login` is a (username, password) tuple. `token` is the token returned + from our first request, and `attempt` is to prevent getting stuck in a + loop if MediaWiki isn't acting right. """ name, password = login params = {"action": "login", "lgname": name, "lgpassword": password} @@ -237,8 +275,11 @@ class Site(object): raise LoginError(e) def _logout(self): - """ - Docstring needed + """Safely logout through the API. + + We'll do a simple API request (api.php?action=logout), clear our + cookiejar (which probably contains now-invalidated cookies) and try to + save it, if it supports that sort of thing. """ params = {"action": "logout"} self.api_query(params) @@ -246,8 +287,23 @@ class Site(object): self._save_cookiejar() def api_query(self, params): - """ - Docstring needed + """Do an API query with `params` as a dict of parameters. + + This will first attempt to construct an API url from self._base_url and + self._script_path. We need both of these, or else we'll raise + SiteAPIError. + + We'll encode the given params, adding format=json along the way, and + make the request through self._opener, which has built-in cookie + support via self._cookiejar, a User-Agent + (wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip". + Assuming everything went well, we'll gunzip the data (if compressed), + load it as a JSON object, and return it. + + If our request failed, we'll raise SiteAPIError with details. + + There's helpful MediaWiki API documentation at + . """ if self._base_url is None or self._script_path is None: e = "Tried to do an API query, but no API URL is known." @@ -280,32 +336,32 @@ class Site(object): return loads(result) # parse as a JSON object def name(self): - """ - Docstring needed - """ + """Returns the Site's name (or "wikiid" in the API), like "enwiki".""" return self._name def project(self): - """ - Docstring needed - """ + """Returns the Site's project name in lowercase, like "wikipedia".""" return self._project def lang(self): - """ - Docstring needed - """ + """Returns the Site's language, like "en" or "es".""" return self._lang def domain(self): - """ - Docstring needed - """ + """Returns the Site's web domain, like "en.wikipedia.org".""" return urlparse(self._base_url).netloc def namespace_id_to_name(self, ns_id, all=False): - """ - Docstring needed + """Given a namespace ID, returns associated namespace names. + + If all is False (default), we'll return the first name in the list, + which is usually the localized version. Otherwise, we'll return the + entire list, which includes the canonical name. + + For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki; + returns [u"Wikipedia", u"Project"] if ns_id=4 and all=True. + + Raises NamespaceNotFoundError if the ID is not found. """ try: if all: @@ -317,8 +373,12 @@ class Site(object): raise NamespaceNotFoundError(e) def namespace_name_to_id(self, name): - """ - Docstring needed + """Given a namespace name, returns the associated ID. + + Like namespace_id_to_name(), but reversed. Case is ignored, because + namespaces are assumed to be case-insensitive. + + Raises NamespaceNotFoundError if the name is not found. """ lname = name.lower() for ns_id, names in self._namespaces.items(): @@ -330,8 +390,14 @@ class Site(object): raise NamespaceNotFoundError(e) def get_page(self, pagename): - """ - Docstring needed + """Returns a Page object for the given pagename. + + Will return a Category object instead if the given pagename is in the + category namespace. As Category is a subclass of Page, this should not + cause problems. + + Note that this doesn't do any checks for existence or + redirect-following - Page's methods provide that. """ prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True) prefix = pagename.split(":", 1)[0] @@ -341,16 +407,20 @@ class Site(object): return Page(self, pagename) def get_category(self, catname): - """ - Docstring needed + """Returns a Category object for the given category name. + + `catname` should be given *without* a namespace prefix. This method is + really just shorthand for get_page("Category:" + catname). """ prefix = self.namespace_id_to_name(NS_CATEGORY) pagename = "{0}:{1}".format(prefix, catname) return Category(self, pagename) def get_user(self, username=None): - """ - Docstring needed + """Returns a User object for the given username. + + If `username` is left as None, then a User object representing the + currently logged-in (or anonymous!) user is returned. """ if username is None: username = self._get_username() From a515a004c87526960e0aa297bf5a41a0224de771 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 2 Aug 2011 15:50:59 -0400 Subject: [PATCH 16/19] Docstrings for everything in User, cleaned stuff up a bit. --- wiki/tools/functions.py | 14 +++- wiki/tools/user.py | 189 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 138 insertions(+), 65 deletions(-) diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py index bc7b187..ab18609 100644 --- a/wiki/tools/functions.py +++ b/wiki/tools/functions.py @@ -165,9 +165,17 @@ def get_site(name=None, project=None, lang=None): raise SiteNotFoundError(e) def add_site(): - """STUB: config editing is required first""" + """STUB: config editing is required first. + + Returns True if the site was added successfully or False if the site was + already in our config. Raises ConfigError if saving the updated file failed + for some reason.""" pass -def del_site(): - """STUB: config editing is required first""" +def del_site(name): + """STUB: config editing is required first. + + Returns True if the site was removed successfully or False if the site was + not in our config originally. Raises ConfigError if saving the updated file + failed for some reason.""" pass diff --git a/wiki/tools/user.py b/wiki/tools/user.py index be71515..16919e1 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +from time import strptime + from wiki.tools.constants import * from wiki.tools.exceptions import UserNotFoundError from wiki.tools.page import Page @@ -7,34 +9,53 @@ from wiki.tools.page import Page class User(object): """ EarwigBot's Wiki Toolset: User Class + + Represents a User on a given Site. Has methods for getting a bunch of + information about the user, such as editcount and user rights, methods for + returning the user's userpage and talkpage, etc. + + Public methods: + name -- returns the user's username + exists -- returns True if the user exists, False if they do not + userid -- returns an integer ID representing the user + blockinfo -- returns information about a current block on the user + groups -- returns a list of the user's groups + rights -- returns a list of the user's rights + editcount -- returns the number of edits made by the user + registration -- returns the time the user registered as a time.struct_time + emailable -- returns True if you can email the user, False if you cannot + gender -- returns the user's gender ("male", "female", or "unknown") + get_userpage -- returns a Page object representing the user's userpage + get_talkpage -- returns a Page object representing the user's talkpage """ def __init__(self, site, name): - """ - Docstring needed - """ - # Site instance, for doing API queries, etc - self.site = site + """Constructor for new User instances. - # Username - self._name = name + Takes two arguments, a Site object (necessary for doing API queries), + and the name of the user, preferably without "User:" in front, although + this prefix will be automatically removed by the API if given. - # Attributes filled in by an API query - self._exists = None - self._userid = None - self._blockinfo = None - self._groups = None - self._rights = None - self._editcount = None - self._registration = None - self._emailable = None - self._gender = None + You can also use site.get_user() instead, which returns a User object, + and is preferred. - def _get_attribute(self, attr, force): + We won't do any API queries yet for basic information about the user - + save that for when the information is requested. """ - Docstring needed + self._site = site + self._name = name + + def _get_attribute(self, attr, force): + """Internally used to get an attribute by name. + + We'll call _load_attributes() to get this (and all other attributes) + from the API if it is not already defined. If `force` is True, we'll + re-load them even if they've already been loaded. + + Raises UserNotFoundError if a nonexistant user prevents us from + returning a certain attribute. """ - if self._exists is None or force: + if not hasattr(self, attr) or force: self._load_attributes() if self._exists is False: e = "User '{0}' does not exist.".format(self._name) @@ -42,30 +63,42 @@ class User(object): return getattr(self, attr) def _load_attributes(self): - """ - Docstring needed + """Internally used to load all attributes from the API. + + Normally, this is called by _get_attribute() when a requested attribute + is not defined. This defines it. """ params = {"action": "query", "list": "users", "ususers": self._name, "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"} - result = self.site.api_query(params) + result = self._site.api_query(params) + res = result["query"]["users"][0] # normalize our username in case it was entered oddly - self._name = result["query"]["users"][0]["name"] + self._name = res["name"] try: - self._userid = result["query"]["users"][0]["userid"] + self._userid = res["userid"] except KeyError: # userid is missing, so user does not exist self._exists = False return self._exists = True - res = result['query']['users'][0] + + try: + self._blockinfo = { + "by": res["blockedby"], + "reason": res["blockreason"], + "expiry": res["blockexpiry"] + } + except KeyError: + self._blockinfo = False self._groups = res["groups"] - self._rights = res["rights"] + self._rights = res["rights"].values() self._editcount = res["editcount"] - self._registration = res["registration"] - self._gender = res["gender"] + + reg = res["registration"] + self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ") try: res["emailable"] @@ -74,88 +107,120 @@ class User(object): else: self._emailable = True - try: - self._blockinfo = {"by": res["blockedby"], - "reason": res["blockreason"], "expiry": res["blockexpiry"]} - except KeyError: - self._blockinfo = False + self._gender = res["gender"] def name(self, force=False): - """ - Docstring needed + """Returns the user's name. + + If `force` is True, we will load the name from the API and return that. + This could potentially return a "normalized" version of the name - for + example, without a "User:" prefix or without underscores. Unlike other + attribute getters, this will never make an API query without `force`. + + Note that if another attribute getter, like exists(), has already been + called, then the username has already been normalized. """ if force: self._load_attributes() return self._name def exists(self, force=False): - """ - Docstring needed + """Returns True if the user exists, or False if they do not. + + Makes an API query if `force` is True or if we haven't made one + already. """ if self._exists is None or force: self._load_attributes() return self._exists def userid(self, force=False): - """ - Docstring needed + """Returns an integer ID used by MediaWiki to represent the user. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_userid", force) def blockinfo(self, force=False): - """ - Docstring needed + """Returns information about a current block on the user. + + If the user is not blocked, returns False. If they are, returns a dict + with three keys: "by" is the blocker's username, "reason" is the reason + why they were blocked, and "expiry" is when the block expires. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_blockinfo", force) def groups(self, force=False): - """ - Docstring needed + """Returns a list of groups this user is in, including "*". + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_groups", force) def rights(self, force=False): - """ - Docstring needed + """Returns a list of this user's rights. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_rights", force) def editcount(self, force=False): - """ - Docstring needed + """Returns the number of edits made by the user. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_editcount", force) def registration(self, force=False): - """ - Docstring needed + """Returns the time the user registered as a time.struct_time object. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_registration", force) def emailable(self, force=False): - """ - Docstring needed + """Returns True if the user can be emailed, or False if they cannot. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_emailable", force) def gender(self, force=False): - """ - Docstring needed + """Returns the user's gender. + + Can return either "male", "female", or "unknown", if they did not + specify it. + + Raises UserNotFoundError if the user does not exist. Makes an API query + if `force` is True or if we haven't made one already. """ return self._get_attribute("_gender", force) def get_userpage(self): + """Returns a Page object representing the user's userpage. + + No checks are made to see if it exists or not. Proper site namespace + conventions are followed. """ - Docstring needed - """ - prefix = self.site.namespace_id_to_name(NS_USER) + prefix = self._site.namespace_id_to_name(NS_USER) pagename = ''.join((prefix, ":", self._name)) - return Page(self.site, pagename) + return Page(self._site, pagename) def get_talkpage(self): + """Returns a Page object representing the user's talkpage. + + No checks are made to see if it exists or not. Proper site namespace + conventions are followed. """ - Docstring needed - """ - prefix = self.site.namespace_id_to_name(NS_USER_TALK) + prefix = self._site.namespace_id_to_name(NS_USER_TALK) pagename = ''.join((prefix, ":", self._name)) - return Page(self.site, pagename) + return Page(self._site, pagename) From 4bada57a9bfa21aa62f0ccbe0befc57061ea8d50 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Aug 2011 17:31:58 -0400 Subject: [PATCH 17/19] Some quick updates to wikitools before I commit Page. * Site: api_query() -> _api_query(); api_query() acts as a wrapper for _api_query(), accepting API params as **kwargs. * Various cleanup throughout and minor fixes. --- wiki/tools/category.py | 2 +- wiki/tools/exceptions.py | 11 +++- wiki/tools/site.py | 131 +++++++++++++++++++++++++---------------------- wiki/tools/user.py | 10 ++-- 4 files changed, 85 insertions(+), 69 deletions(-) diff --git a/wiki/tools/category.py b/wiki/tools/category.py index f6e301f..588956a 100644 --- a/wiki/tools/category.py +++ b/wiki/tools/category.py @@ -13,6 +13,6 @@ class Category(Page): """ params = {"action": "query", "list": "categorymembers", "cmlimit": limit, "cmtitle": self.title} - result = self.site.api_query(params) + result = self._site._api_query(params) members = result['query']['categorymembers'] return [member["title"] for member in members] diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index d28cac2..d16a1b9 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -30,5 +30,14 @@ class PermissionsError(WikiToolsetError): class NamespaceNotFoundError(WikiToolsetError): """A requested namespace name or namespace ID does not exist.""" +class PageNotFoundError(WikiToolsetError): + """Attempting to get certain information about a page that does not + exist.""" + +class InvalidPageError(WikiToolsetError): + """Attempting to get certain information about a page whose title is + invalid.""" + class UserNotFoundError(WikiToolsetError): - """Attempting to get information about a user that does not exist.""" + """Attempting to get certain information about a user that does not + exist.""" diff --git a/wiki/tools/site.py b/wiki/tools/site.py index 65ed9b5..933bc43 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -75,6 +75,55 @@ class Site(object): if logged_in_as is None or name != logged_in_as: self._login(login) + def _api_query(self, params): + """Do an API query with `params` as a dict of parameters. + + This will first attempt to construct an API url from self._base_url and + self._script_path. We need both of these, or else we'll raise + SiteAPIError. + + We'll encode the given params, adding format=json along the way, and + make the request through self._opener, which has built-in cookie + support via self._cookiejar, a User-Agent + (wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip". + Assuming everything went well, we'll gunzip the data (if compressed), + load it as a JSON object, and return it. + + If our request failed, we'll raise SiteAPIError with details. + + There's helpful MediaWiki API documentation at + . + """ + if self._base_url is None or self._script_path is None: + e = "Tried to do an API query, but no API URL is known." + raise SiteAPIError(e) + + url = ''.join((self._base_url, self._script_path, "/api.php")) + params["format"] = "json" # this is the only format we understand + data = urlencode(params) + + print url, data # debug code + + try: + response = self._opener.open(url, data) + except URLError as error: + if hasattr(error, "reason"): + e = "API query at {0} failed because {1}." + e = e.format(error.geturl, error.reason) + elif hasattr(error, "code"): + e = "API query at {0} failed; got an error code of {1}." + e = e.format(error.geturl, error.code) + else: + e = "API query failed." + raise SiteAPIError(e) + else: + result = response.read() + if response.headers.get("Content-Encoding") == "gzip": + stream = StringIO(result) + gzipper = GzipFile(fileobj=stream) + result = gzipper.read() + return loads(result) # parse as a JSON object + def _load_attributes(self, force=False): """Load data about our Site from the API. @@ -94,13 +143,13 @@ class Site(object): if self._namespaces is None or force: params["siprop"] = "general|namespaces|namespacealiases" - result = self.api_query(params) + result = self._api_query(params) self._load_namespaces(result) elif all(attrs): # everything is already specified and we're not told return # to force a reload, so do nothing else: # we're only loading attributes other than _namespaces params["siprop"] = "general" - result = self.api_query(params) + result = self._api_query(params) res = result["query"]["general"] self._name = res["wikiid"] @@ -197,7 +246,7 @@ class Site(object): we are logged out. """ params = {"action": "query", "meta": "userinfo"} - result = self.api_query(params) + result = self._api_query(params) return result["query"]["userinfo"]["name"] def _get_username(self): @@ -253,7 +302,7 @@ class Site(object): params = {"action": "login", "lgname": name, "lgpassword": password} if token is not None: params["lgtoken"] = token - result = self.api_query(params) + result = self._api_query(params) res = result["login"]["result"] if res == "Success": @@ -282,58 +331,16 @@ class Site(object): save it, if it supports that sort of thing. """ params = {"action": "logout"} - self.api_query(params) + self._api_query(params) self._cookiejar.clear() self._save_cookiejar() - def api_query(self, params): - """Do an API query with `params` as a dict of parameters. - - This will first attempt to construct an API url from self._base_url and - self._script_path. We need both of these, or else we'll raise - SiteAPIError. + def api_query(self, **kwargs): + """Do an API query with `kwargs` as the parameters. - We'll encode the given params, adding format=json along the way, and - make the request through self._opener, which has built-in cookie - support via self._cookiejar, a User-Agent - (wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip". - Assuming everything went well, we'll gunzip the data (if compressed), - load it as a JSON object, and return it. - - If our request failed, we'll raise SiteAPIError with details. - - There's helpful MediaWiki API documentation at - . + See _api_query()'s documentation for details. """ - if self._base_url is None or self._script_path is None: - e = "Tried to do an API query, but no API URL is known." - raise SiteAPIError(e) - - url = ''.join((self._base_url, self._script_path, "/api.php")) - params["format"] = "json" # this is the only format we understand - data = urlencode(params) - - print url, data # debug code - - try: - response = self._opener.open(url, data) - except URLError as error: - if hasattr(error, "reason"): - e = "API query at {0} failed because {1}." - e = e.format(error.geturl, error.reason) - elif hasattr(error, "code"): - e = "API query at {0} failed; got an error code of {1}." - e = e.format(error.geturl, error.code) - else: - e = "API query failed." - raise SiteAPIError(e) - else: - result = response.read() - if response.headers.get("Content-Encoding") == "gzip": - stream = StringIO(result) - gzipper = GzipFile(fileobj=stream) - result = gzipper.read() - return loads(result) # parse as a JSON object + return self._api_query(kwargs) def name(self): """Returns the Site's name (or "wikiid" in the API), like "enwiki".""" @@ -389,32 +396,32 @@ class Site(object): e = "There is no namespace with name '{0}'.".format(name) raise NamespaceNotFoundError(e) - def get_page(self, pagename): - """Returns a Page object for the given pagename. + def get_page(self, title, follow_redirects=False): + """Returns a Page object for the given title (pagename). - Will return a Category object instead if the given pagename is in the + Will return a Category object instead if the given title is in the category namespace. As Category is a subclass of Page, this should not cause problems. - Note that this doesn't do any checks for existence or + Note that this doesn't do any direct checks for existence or redirect-following - Page's methods provide that. """ prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True) - prefix = pagename.split(":", 1)[0] - if prefix != pagename: # avoid a page that is simply "Category" + prefix = title.split(":", 1)[0] + if prefix != title: # avoid a page that is simply "Category" if prefix in prefixes: - return Category(self, pagename) - return Page(self, pagename) + return Category(self, title, follow_redirects) + return Page(self, title, follow_redirects) - def get_category(self, catname): + def get_category(self, catname, follow_redirects=False): """Returns a Category object for the given category name. `catname` should be given *without* a namespace prefix. This method is really just shorthand for get_page("Category:" + catname). """ prefix = self.namespace_id_to_name(NS_CATEGORY) - pagename = "{0}:{1}".format(prefix, catname) - return Category(self, pagename) + pagename = ':'.join((prefix, catname)) + return Category(self, pagename, follow_redirects) def get_user(self, username=None): """Returns a User object for the given username. diff --git a/wiki/tools/user.py b/wiki/tools/user.py index 16919e1..3b0173f 100644 --- a/wiki/tools/user.py +++ b/wiki/tools/user.py @@ -69,8 +69,8 @@ class User(object): is not defined. This defines it. """ params = {"action": "query", "list": "users", "ususers": self._name, - "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"} - result = self._site.api_query(params) + "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"} + result = self._site._api_query(params) res = result["query"]["users"][0] # normalize our username in case it was entered oddly @@ -130,7 +130,7 @@ class User(object): Makes an API query if `force` is True or if we haven't made one already. """ - if self._exists is None or force: + if not hasattr(self, "_exists") or force: self._load_attributes() return self._exists @@ -212,7 +212,7 @@ class User(object): conventions are followed. """ prefix = self._site.namespace_id_to_name(NS_USER) - pagename = ''.join((prefix, ":", self._name)) + pagename = ':'.join((prefix, self._name)) return Page(self._site, pagename) def get_talkpage(self): @@ -222,5 +222,5 @@ class User(object): conventions are followed. """ prefix = self._site.namespace_id_to_name(NS_USER_TALK) - pagename = ''.join((prefix, ":", self._name)) + pagename = ':'.join((prefix, self._name)) return Page(self._site, pagename) From e4f8fb2e21cdbd8c2b0d93a7c0cbbe5b7231ac28 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Aug 2011 00:43:51 -0400 Subject: [PATCH 18/19] Major additions to Page in wikitools. * Page: added about 400 lines of rambling nonsense. * Exceptions: added RedirectError. --- wiki/tools/exceptions.py | 4 + wiki/tools/page.py | 413 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 401 insertions(+), 16 deletions(-) diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py index d16a1b9..f36dae3 100644 --- a/wiki/tools/exceptions.py +++ b/wiki/tools/exceptions.py @@ -38,6 +38,10 @@ class InvalidPageError(WikiToolsetError): """Attempting to get certain information about a page whose title is invalid.""" +class RedirectError(WikiToolsetError): + """Page's get_redirect_target() method failed because the page is either + not a redirect, or it is malformed.""" + class UserNotFoundError(WikiToolsetError): """Attempting to get certain information about a user that does not exist.""" diff --git a/wiki/tools/page.py b/wiki/tools/page.py index d267674..8ae25f1 100644 --- a/wiki/tools/page.py +++ b/wiki/tools/page.py @@ -1,33 +1,414 @@ # -*- coding: utf-8 -*- +import re +from urllib import quote + +from wiki.tools.exceptions import * + class Page(object): """ EarwigBot's Wiki Toolset: Page Class + + Represents a Page on a given Site. Has methods for getting information + about the page, getting page content, and so on. Category is a subclass of + Page with additional methods. + + Public methods: + title -- returns the page's title, or pagename + exists -- returns whether the page exists + pageid -- returns an integer ID representing the page + url -- returns the page's URL + namespace -- returns the page's namespace as an integer + protection -- returns the page's current protection status + is_talkpage -- returns True if the page is a talkpage, else False + is_redirect -- returns True if the page is a redirect, else False + toggle_talk -- returns a content page's talk page, or vice versa + get -- returns page content + get_redirect_target -- if the page is a redirect, returns its destination """ - def __init__(self, site, title): - """ - Docstring needed + def __init__(self, site, title, follow_redirects=False): + """Constructor for new Page instances. + + Takes three arguments: a Site object, the Page's title (or pagename), + and whether or not to follow redirects (optional, defaults to False). + + As with User, site.get_page() is preferred. Site's method has support + for a default `follow_redirects` value in our config, while __init__ + always defaults to False. + + __init__ will not do any API queries, but it will use basic namespace + logic to determine our namespace ID and if we are a talkpage. """ - self.site = site - self.title = title + self._site = site + self._title = title.strip() + self._follow_redirects = self._keep_following = follow_redirects + + self._exists = 0 + self._pageid = None + self._is_redirect = None + self._lastrevid = None + self._protection = None + self._fullurl = None self._content = None - def exists(self): + # Try to determine the page's namespace using our site's namespace + # converter: + prefix = self._title.split(":", 1)[0] + if prefix != title: # ignore a page that's titled "Category" or "User" + try: + self._namespace = self._site.namespace_name_to_id(prefix) + except NamespaceNotFoundError: + self._namespace = 0 + else: + self._namespace = 0 + + # Is this a talkpage? Talkpages have odd IDs, while content pages have + # even IDs, excluding the "special" namespaces: + if self._namespace < 0: + self._is_talkpage = False + else: + self._is_talkpage = self._namespace % 2 == 1 + + def _force_validity(self): + """Used to ensure that our page's title is valid. + + If this method is called when our page is not valid (and after + _load_attributes() has been called), InvalidPageError will be raised. + + Note that validity != existence. If a page's title is invalid (e.g, it + contains "[") it will always be invalid, and cannot be edited. """ - Docstring needed + if self._exists == 1: + e = "Page '{0}' is invalid.".format(self._title) + raise InvalidPageError(e) + + def _force_existence(self): + """Used to ensure that our page exists. + + If this method is called when our page doesn't exist (and after + _load_attributes() has been called), PageNotFoundError will be raised. + It will also call _force_validity() beforehand. """ - pass + self._force_validity() + if self._exists == 2: + e = "Page '{0}' does not exist.".format(self._title) + raise PageNotFoundError(e) + + def _load_wrapper(self): + """Calls _load_attributes() and follows redirects if we're supposed to. + + This method will only follow redirects if follow_redirects=True was + passed to __init__() (perhaps indirectly passed by site.get_page()). + It avoids the API's &redirects param in favor of manual following, + so we can act more realistically (we don't follow double redirects, and + circular redirects don't break us). - def get(self, force_reload=False): + This will raise RedirectError if we have a problem following, but that + is a bug and should NOT happen. + + If we're following a redirect, this will make a grand total of three + API queries. It's a lot, but each one is quite small. """ - Docstring needed + self._load_attributes() + + if self._keep_following and self._is_redirect: + self._title = self.get_redirect_target() + self._keep_following = False # don't follow double redirects + self._content = None # reset the content we just loaded + self._load_attributes() + + def _load_attributes(self, result=None): + """Loads various data from the API in a single query. + + Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl, + ._protection, ._namespace, ._is_talkpage, and ._lastrevid using the + API. It will do a query of its own unless `result` is provided, in + which case we'll pretend `result` is what the query returned. + + Assuming the API is sound, this should not raise any exceptions. """ - if self._content is None or force_reload: - params = {"action": "query", "prop": "revisions", - "rvprop": "content", "rvlimit": 1, "titles": self.title} - result = self.site.api_query(params) - content = result["query"]["pages"].values()[0]["revisions"][0]["*"] + if result is None: + params = {"action": "query", "prop": "info", "titles": self._title, + "inprop": "protection|url"} + result = self._site._api_query(params) + + res = result["query"]["pages"].values()[0] + + # Normalize our pagename/title thing: + self._title = res["title"] + + try: + res["redirect"] + except KeyError: + self._is_redirect = False + else: + self._is_redirect = True + + self._pageid = result["query"]["pages"].keys()[0] + if int(self._pageid) < 0: + try: + res["missing"] + except KeyError: + # If it has a negative ID and it's invalid, then break here, + # because there's no other data for us to get: + self._exists = 1 + return + else: + # If it has a negative ID and it's missing; we can still get + # data like the namespace, protection, and URL: + self._exists = 2 + else: + self._exists = 3 + + self._fullurl = res["fullurl"] + self._protection = res["protection"] + + # We've determined the namespace and talkpage status in __init__() + # based on the title, but now we can be sure: + self._namespace = res["ns"] + self._is_talkpage = self._namespace % 2 == 1 # talkpages have odd IDs + + # This last field will only be specified if the page exists: + try: + self._lastrevid = res["lastrevid"] + except KeyError: + pass + + def _load_content(self, result=None): + """Loads current page content from the API. + + If `result` is provided, we'll pretend that is the result of an API + query and try to get content from that. Otherwise, we'll do an API + query on our own. + + Don't call this directly, ever - use .get(force=True) if you want to + force content reloading. + """ + if result is None: + params = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "titles": self._title} + result = self._site._api_query(params) + + res = result["query"]["pages"].values()[0] + try: + content = res["revisions"][0]["*"] self._content = content - return content + except KeyError: + # This can only happen if the page was deleted since we last called + # self._load_attributes(). In that case, some of our attributes are + # outdated, so force another self._load_attributes(): + self._load_attributes() + self._force_existence() + + def title(self, force=False): + """Returns the Page's title, or pagename. + + This won't do any API queries on its own unless force is True, in which + case the title will be forcibly reloaded from the API (normalizing it, + and following redirects if follow_redirects=True was passed to + __init__()). Any other methods that do API queries will reload title on + their own, however, like exists() and get(). + """ + if force: + self._load_wrapper() + return self._title + + def exists(self, force=False): + """Returns information about whether the Page exists or not. + + The returned "information" is a tuple with two items. The first is a + bool, either True if the page exists or False if it does not. The + second is a string giving more information, either "invalid", (title + is invalid, e.g. it contains "["), "missing", or "exists". + + Makes an API query if force is True or if we haven't already made one. + """ + cases = { + 0: (None, "unknown"), + 1: (False, "invalid"), + 2: (False, "missing"), + 3: (True, "exists"), + } + if self._exists == 0 or force: + self._load_wrapper() + return cases[self._exists] + + def pageid(self, force=False): + """Returns an integer ID representing the Page. + + Makes an API query if force is True or if we haven't already made one. + + Raises InvalidPageError or PageNotFoundError if the page name is + invalid or the page does not exist, respectively. + """ + if self._exists == 0 or force: + self._load_wrapper() + self._force_existence() # missing pages do not have IDs + return self._pageid + + def url(self, force=False): + """Returns the page's URL. + + Like title(), this won't do any API queries on its own unless force is + True. If the API was never queried for this page, we will attempt to + determine the URL ourselves based on the title. + """ + if force: + self._load_wrapper() + if self._fullurl is not None: + return self._fullurl + else: + slug = quote(self._title.replace(" ", "_"), safe="/:") + path = self._site._article_path.replace("$1", slug) + return ''.join((self._site._base_url, path)) + + def namespace(self, force=False): + """Returns the page's namespace ID (an integer). + + Like title(), this won't do any API queries on its own unless force is + True. If the API was never queried for this page, we will attempt to + determine the namespace ourselves based on the title. + """ + if force: + self._load_wrapper() + return self._namespace + + def protection(self, force=False): + """Returns the page's current protection status. + + Makes an API query if force is True or if we haven't already made one. + + Raises InvalidPageError if the page name is invalid. Will not raise an + error if the page is missing because those can still be protected. + """ + if self._exists == 0 or force: + self._load_wrapper() + self._force_validity() # invalid pages cannot be protected + return self._protection + + def is_talkpage(self, force=False): + """Returns True if the page is a talkpage, else False. + + Like title(), this won't do any API queries on its own unless force is + True. If the API was never queried for this page, we will attempt to + determine the talkpage status ourselves based on its namespace ID. + """ + if force: + self._load_wrapper() + return self._is_talkpage + + def is_redirect(self, force=False): + """Returns True if the page is a redirect, else False. + + Makes an API query if force is True or if we haven't already made one. + + We will return False even if the page does not exist or is invalid. + """ + if self._exists == 0 or force: + self._load_wrapper() + return self._is_redirect + + def toggle_talk(self, force=False, follow_redirects=None): + """Returns a content page's talk page, or vice versa. + + The title of the new page is determined by namespace logic, not API + queries. We won't make any API queries on our own unless force is True, + and the only reason then would be to forcibly update the title or + follow redirects if we haven't already made an API query. + + If `follow_redirects` is anything other than None (the default), it + will be passed to the new Page's __init__(). Otherwise, we'll use the + value passed to our own __init__(). + + Will raise InvalidPageError if we try to get the talk page of a special + page (in the Special: or Media: namespaces), but we won't raise an + exception if our page is otherwise missing or invalid. + """ + if force: + self._load_wrapper() + if self._namespace < 0: + ns = self._site.namespace_id_to_name(self._namespace) + e = "Pages in the {0} namespace can't have talk pages.".format(ns) + raise InvalidPageError(e) + + if self._is_talkpage: + new_ns = self._namespace - 1 + else: + new_ns = self._namespace + 1 + + try: + body = self._title.split(":", 1)[1] + except IndexError: + body = self._title + + new_prefix = self._site.namespace_id_to_name(new_ns) + + # If the new page is in namespace 0, don't do ":Title" (it's correct, + # but unnecessary), just do "Title": + if new_prefix: + new_title = ':'.join((new_prefix, body)) + else: + new_title = body + + if follow_redirects is None: + follow_redirects = self._follow_redirects + return Page(self._site, new_title, follow_redirects) + + def get(self, force=False): + """Returns page content, which is cached if you try to call get again. + + Use `force` to forcibly reload page content even if we've already + loaded some. This is good if you want to edit a page multiple times, + and you want to get updated content before you make your second edit. + + Raises InvalidPageError or PageNotFoundError if the page name is + invalid or the page does not exist, respectively. + """ + if force or self._exists == 0: + # Kill two birds with one stone by doing an API query for both our + # attributes and our page content: + params = {"action": "query", "rvprop": "content", "rvlimit": 1, + "prop": "info|revisions", "inprop": "protection|url", + "titles": self._title} + result = self._site._api_query(params) + self._load_attributes(result=result) + self._force_existence() + self._load_content(result=result) + + # Follow redirects if we're told to: + if self._keep_following and self._is_redirect: + self._title = self.get_redirect_target() + self._keep_following = False # don't follow double redirects + self._content = None # reset the content we just loaded + self.get(force=True) + + return self._content + + # Make sure we're dealing with a real page here. This may be outdated + # if the page was deleted since we last called self._load_attributes(), + # but self._load_content() can handle that: + self._force_existence() + + if self._content is None: + self._load_content() + return self._content + + def get_redirect_target(self, force=False): + """If the page is a redirect, returns its destination. + + Use `force` to forcibly reload content even if we've already loaded + some before. Note that this method calls get() for page content. + + Raises InvalidPageError or PageNotFoundError if the page name is + invalid or the page does not exist, respectively. Raises RedirectError + if the page is not a redirect. + """ + content = self.get(force) + regexp = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]" + try: + return re.findall(regexp, content, flags=re.IGNORECASE)[0] + except IndexError: + e = "The page does not appear to have a redirect target." + raise RedirectError(e) From a7367856ee26b4a35c244ea139307bbf67c97dd0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Aug 2011 16:34:54 -0400 Subject: [PATCH 19/19] Finished docstrings in wikitools. --- wiki/tools/category.py | 16 ++++++++++++++-- wiki/tools/site.py | 14 +++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/wiki/tools/category.py b/wiki/tools/category.py index 588956a..7ebe342 100644 --- a/wiki/tools/category.py +++ b/wiki/tools/category.py @@ -5,11 +5,23 @@ from wiki.tools.page import Page class Category(Page): """ EarwigBot's Wiki Toolset: Category Class + + Represents a Category on a given Site, a subclass of Page. Provides + additional methods, but Page's own methods should work fine on Category + objects. Site.get_page() will return a Category instead of a Page if the + given title is in the category namespace; get_category() is shorthand, + because it accepts category names without the namespace prefix. + + Public methods: + members -- returns a list of titles in the category """ def members(self, limit=50): - """ - Docstring needed + """Returns a list of titles in the category. + + If `limit` is provided, we will provide this many titles, or less if + the category is too small. `limit` defaults to 50; normal users can go + up to 500, and bots can go up to 5,000 on a single API query. """ params = {"action": "query", "list": "categorymembers", "cmlimit": limit, "cmtitle": self.title} diff --git a/wiki/tools/site.py b/wiki/tools/site.py index 933bc43..57b890d 100644 --- a/wiki/tools/site.py +++ b/wiki/tools/site.py @@ -24,6 +24,18 @@ class Site(object): won't need to call it directly, rather tools.get_site() for returning Site instances, tools.add_site() for adding new ones to config, and tools.del_site() for removing old ones from config, should suffice. + + Public methods: + name -- returns our name (or "wikiid"), like "enwiki" + project -- returns our project name, like "wikipedia" + lang -- returns our language code, like "en" + domain -- returns our web domain, like "en.wikipedia.org" + api_query -- does an API query with the given kwargs as params + namespace_id_to_name -- given a namespace ID, returns associated name(s) + namespace_name_to_id -- given a namespace name, returns associated id + get_page -- returns a Page object for the given title + get_category -- returns a Category object for the given title + get_user -- returns a User object for the given username """ def __init__(self, name=None, project=None, lang=None, base_url=None, @@ -351,7 +363,7 @@ class Site(object): return self._project def lang(self): - """Returns the Site's language, like "en" or "es".""" + """Returns the Site's language code, like "en" or "es".""" return self._lang def domain(self):