From edfa1d2d9e040c3a9c78cd176792c1ffa56c64f7 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Jul 2011 10:19:07 -0400
Subject: [PATCH 01/19] beginning wikitools core development with a few
 skeleton classes and one (nearly) working function, tools.get_site() (doesn't
 return a Site object yet)

---
 wiki/tools/__init__.py   | 19 ++++++++++++
 wiki/tools/category.py   |  9 ++++++
 wiki/tools/exceptions.py | 18 ++++++++++++
 wiki/tools/functions.py  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
 wiki/tools/page.py       |  9 ++++++
 wiki/tools/site.py       |  9 ++++++
 wiki/tools/user.py       |  9 ++++++
 7 files changed, 148 insertions(+)
 create mode 100644 wiki/tools/category.py
 create mode 100644 wiki/tools/exceptions.py
 create mode 100644 wiki/tools/functions.py
 create mode 100644 wiki/tools/page.py
 create mode 100644 wiki/tools/site.py
 create mode 100644 wiki/tools/user.py

diff --git a/wiki/tools/__init__.py b/wiki/tools/__init__.py
index e69de29..2e64c45 100644
--- a/wiki/tools/__init__.py
+++ b/wiki/tools/__init__.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8  -*-
+
+"""
+EarwigBot's Wiki Toolset
+
+This is a collection of classes and functions to read from and write to
+Wikipedia and other wiki sites. No connection whatsoever to python-wikitools
+written by Mr.Z-man, other than a similar purpose. We share no code.
+
+Import the toolset with `from wiki import tools`.
+"""
+
+from wiki.tools.exceptions import *
+from wiki.tools.functions import *
+
+from wiki.tools.category import Category
+from wiki.tools.page import Page
+from wiki.tools.site import Site
+from wiki.tools.user import User
diff --git a/wiki/tools/category.py b/wiki/tools/category.py
new file mode 100644
index 0000000..3df8477
--- /dev/null
+++ b/wiki/tools/category.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8  -*-
+
+class Category(object):
+    """
+    EarwigBot's Wiki Toolset: Category Class
+    """
+
+    def __init__(self):
+        pass
diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
new file mode 100644
index 0000000..b515c45
--- /dev/null
+++ b/wiki/tools/exceptions.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8  -*-
+
+"""
+EarwigBot's Wiki Toolset: Exceptions
+
+This module contains all exceptions used by the wiki.tools package.
+"""
+
+class WikiToolsetError(Exception):
+    """Base exception class for errors in the Wiki Toolset."""
+
+class ConfigError(WikiToolsetError):
+    """An error occured when trying to do something involving our config
+    file. Maybe it hasn't been loaded?"""
+
+class SiteNotFoundError(WikiToolsetError):
+    """A site matching the args given to get_site() could not be found in the
+    config file."""
diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
new file mode 100644
index 0000000..220e4f3
--- /dev/null
+++ b/wiki/tools/functions.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8  -*-
+
+"""
+EarwigBot's Wiki Toolset: Misc Functions
+
+This module, a component of the wiki.tools package, contains miscellaneous
+functions that are not methods of any class, like get_site().
+
+There's no need to import this module explicitly. All functions here are
+automatically available from wiki.tools.
+"""
+
+from core import config
+from wiki.tools.exceptions import ConfigError, SiteNotFoundError
+from wiki.tools.site import Site
+
+__all__ = ["get_site"]
+
+def get_site(name=None, project=None, lang=None):
+    """Returns a Site instance based on information from our config file.
+
+    With no arguments, returns the default site as specified by our config
+    file. This is default = config.wiki["defaultSite"];
+    config.wiki["sites"][default].
+
+    With `name` specified, returns the site specified by
+    config.wiki["sites"][name].
+
+    With `project` and `lang` specified, returns the site specified by the
+    member of config.wiki["sites"], `s`, for which s["project"] == project and
+    s["lang"] == lang.
+
+    Specifying a project without a lang or a lang without a project will raise
+    TypeError. If all three args are specified, `name` will be first tried,
+    then `project` and `lang`. If, with any number of args, a site cannot be
+    found in the config, SiteNotFoundError is raised.
+    """
+    if config._config is None:
+        e = "Config file has not been loaded: use config.verify_config() and then config.parse_config() to do so."
+        raise ConfigError(e)
+
+    if (project is None and lang is not None) or (project is not None and lang is None):
+        e = "Keyword arguments 'lang' and 'project' must be specified together."
+        raise TypeError(e)
+
+    if name is None and project is None:  # no args given (project is None implies lang is None)
+        try:  # ...so use the default site
+            default = config.wiki["defaultSite"]
+        except KeyError:
+            e = "Default site is not specified in config."
+            raise SiteNotFoundError(e)
+        try:
+            return config.wiki["sites"][default]
+        except KeyError:
+            e = "Default site specified by config is not in the config's sites list."
+            raise SiteNotFoundError(e)
+
+    if name is not None:  # name arg given, but don't look at others yet
+        try:
+            return config.wiki["sites"][name]
+        except KeyError:
+            if project is None:  # implies lang is None, i.e., only name was given
+                e = "Site '{0}' not found in config.".format(name)
+                raise SiteNotFoundError(e)
+            for site in config.wiki["sites"].values():
+                if site["project"] == project and site["lang"] == lang:
+                    return site
+            e = "Neither site '{0}' nor site '{1}:{2}' found in config.".format(name, project, lang)
+            raise SiteNotFoundError(e)
+
+    for site in config.wiki["sites"].values():  # implied lang and proj are not None
+        if site["project"] == project and site["lang"] == lang:
+            return site
+    e = "Site '{0}:{1}' not found in config.".format(project, lang)
+    raise SiteNotFoundError(e)
diff --git a/wiki/tools/page.py b/wiki/tools/page.py
new file mode 100644
index 0000000..3a30a70
--- /dev/null
+++ b/wiki/tools/page.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8  -*-
+
+class Page(object):
+    """
+    EarwigBot's Wiki Toolset: Page Class
+    """
+
+    def __init__(self):
+        pass
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
new file mode 100644
index 0000000..caba1e4
--- /dev/null
+++ b/wiki/tools/site.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8  -*-
+
+class Site(object):
+    """
+    EarwigBot's Wiki Toolset: Site Class
+    """
+
+    def __init__(self):
+        pass
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
new file mode 100644
index 0000000..5044e50
--- /dev/null
+++ b/wiki/tools/user.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8  -*-
+
+class User(object):
+    """
+    EarwigBot's Wiki Toolset: User Class
+    """
+
+    def __init__(self):
+        pass

From f4219ffad031592344e2a8508bf3daa8e17de268 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Jul 2011 12:03:20 -0400
Subject: [PATCH 02/19] More work on the early stages of wikitools:

tools.get_site() returns an actual Site object, thanks to tools.functions._get_site_object_from_dict().
Site objects now have a working (but primitive) .api_query(), .get_page(), .get_category(), and .get_user().
Page objects now have a working .get(), for getting page content from the API.
Category is now a subclass of Page, and has its own .get_members(), which returns a list of titles.

Still need to implement proper namespace logic in pages.
---
 wiki/tools/category.py  | 15 ++++++++++++---
 wiki/tools/functions.py | 34 ++++++++++++++++++++++++++++------
 wiki/tools/page.py      | 19 +++++++++++++++++--
 wiki/tools/site.py      | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/wiki/tools/category.py b/wiki/tools/category.py
index 3df8477..1dfdba3 100644
--- a/wiki/tools/category.py
+++ b/wiki/tools/category.py
@@ -1,9 +1,18 @@
 # -*- coding: utf-8  -*-
 
-class Category(object):
+from wiki.tools.page import Page
+
+class Category(Page):
     """
     EarwigBot's Wiki Toolset: Category Class
     """
 
-    def __init__(self):
-        pass
+    def get_members(limit=50):
+        """
+        Docstring needed
+        """
+        params = {"action": "query", "list": "categorymembers", "cmlimit": limit}
+        params["cmtitle"] = self.title
+        result = self.site.api_query(params)
+        members = result['query']['categorymembers']
+        return [member["title"] for member in members]
diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index 220e4f3..195400c 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -16,6 +16,25 @@ from wiki.tools.site import Site
 
 __all__ = ["get_site"]
 
+def _get_site_object_from_dict(name, d):
+    """Return a Site object based on the contents of a dict, probably acquired
+    through our config file, and a separate name."""
+    project = d["project"]
+    lang = d["lang"]
+    try:
+        api = d["apiURL"]
+    except KeyError:
+        api = None
+    try:
+        sql_server = d["sqlServer"]
+    except KeyError:
+        sql_server = None
+    try:
+        sql_db = d["sqlDB"]
+    except KeyError:
+        sql_db = None
+    return Site(name, project, lang, api, (sql_server, sql_db))
+
 def get_site(name=None, project=None, lang=None):
     """Returns a Site instance based on information from our config file.
 
@@ -50,26 +69,29 @@ def get_site(name=None, project=None, lang=None):
             e = "Default site is not specified in config."
             raise SiteNotFoundError(e)
         try:
-            return config.wiki["sites"][default]
+            site = config.wiki["sites"][default]
         except KeyError:
             e = "Default site specified by config is not in the config's sites list."
             raise SiteNotFoundError(e)
+        return _get_site_object_from_dict(default, site)
 
     if name is not None:  # name arg given, but don't look at others yet
         try:
-            return config.wiki["sites"][name]
+            site = config.wiki["sites"][name]
         except KeyError:
             if project is None:  # implies lang is None, i.e., only name was given
                 e = "Site '{0}' not found in config.".format(name)
                 raise SiteNotFoundError(e)
-            for site in config.wiki["sites"].values():
+            for sitename, site in config.wiki["sites"].items():
                 if site["project"] == project and site["lang"] == lang:
-                    return site
+                    return _get_site_object_from_dict(sitename, site)
             e = "Neither site '{0}' nor site '{1}:{2}' found in config.".format(name, project, lang)
             raise SiteNotFoundError(e)
+        else:
+            return _get_site_object_from_dict(name, site)
 
-    for site in config.wiki["sites"].values():  # implied lang and proj are not None
+    for sitename, site in config.wiki["sites"].items():  # implied lang and proj are not None
         if site["project"] == project and site["lang"] == lang:
-            return site
+            return _get_site_object_from_dict(sitename, site)
     e = "Site '{0}:{1}' not found in config.".format(project, lang)
     raise SiteNotFoundError(e)
diff --git a/wiki/tools/page.py b/wiki/tools/page.py
index 3a30a70..49a89ee 100644
--- a/wiki/tools/page.py
+++ b/wiki/tools/page.py
@@ -5,5 +5,20 @@ class Page(object):
     EarwigBot's Wiki Toolset: Page Class
     """
 
-    def __init__(self):
-        pass
+    def __init__(self, site, title):
+        """
+        Docstring needed
+        """
+        self.site = site
+        self.title = title
+
+    def get(self):
+        """
+        Docstring needed
+        """
+        params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1'}
+        params["titles"] = self.title
+        result = self.site.api_query(params)
+        pageid = result['query']['pages'].keys()[0]
+        content = result['query']['pages'][pageid]['revisions'][0]['*']
+        return content
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index caba1e4..34df709 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -1,9 +1,53 @@
 # -*- coding: utf-8  -*-
 
+from json import loads
+from urllib import urlencode
+from urllib2 import urlopen
+
+from wiki.tools.category import Category
+from wiki.tools.page import Page
+from wiki.tools.user import User
+
 class Site(object):
     """
     EarwigBot's Wiki Toolset: Site Class
     """
 
-    def __init__(self):
-        pass
+    def __init__(self, name, project, lang, api=None, sql=(None, None)):
+        """
+        Docstring needed
+        """
+        self.name = name
+        self.project = project
+        self.lang = lang
+        self.__api = api
+        self.__sql = sql
+
+    def api_query(self, params):
+        """
+        Docstring needed
+        """
+        params["format"] = "json"
+        data = urlencode(params)
+        result = urlopen(self.__api, data).read()
+        return loads(result)
+
+    def get_page(self, pagename):
+        """
+        Docstring needed
+        """
+        if pagename.startswith("Category:"):  # proper namespace checking!
+            return get_category(pagename[9:])
+        return Page(self, pagename)
+
+    def get_category(self, catname):
+        """
+        Docstring needed
+        """
+        return Category(self, "Category:" + catname)  # namespace checking!
+
+    def get_user(self, username):
+        """
+        Docstring needed
+        """
+        return User(self, username)

From 76113a3f6ac2f649607757a2b14bcc5846c25da2 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Jul 2011 18:01:06 -0400
Subject: [PATCH 03/19] Cleaning up slightly and adding some more methods:

User: added .get_rights() (working) and .exists() (skeleton).
Page: added .exists() (skeleton); store text as ._content; get() has a force_reload argument.
Category: fixed missing self in .get_members().
Site: self.__api -> self._api; self.__sql -> self._sql
---
 wiki/tools/category.py |  6 +++---
 wiki/tools/page.py     | 23 ++++++++++++++++-------
 wiki/tools/site.py     |  6 +++---
 wiki/tools/user.py     | 25 ++++++++++++++++++++++++-
 4 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/wiki/tools/category.py b/wiki/tools/category.py
index 1dfdba3..01a3179 100644
--- a/wiki/tools/category.py
+++ b/wiki/tools/category.py
@@ -7,12 +7,12 @@ class Category(Page):
     EarwigBot's Wiki Toolset: Category Class
     """
 
-    def get_members(limit=50):
+    def get_members(self, limit=50):
         """
         Docstring needed
         """
-        params = {"action": "query", "list": "categorymembers", "cmlimit": limit}
-        params["cmtitle"] = self.title
+        params = {"action": "query", "list": "categorymembers",
+            "cmlimit": limit, "cmtitle": self.title}
         result = self.site.api_query(params)
         members = result['query']['categorymembers']
         return [member["title"] for member in members]
diff --git a/wiki/tools/page.py b/wiki/tools/page.py
index 49a89ee..9dbb7ab 100644
--- a/wiki/tools/page.py
+++ b/wiki/tools/page.py
@@ -11,14 +11,23 @@ class Page(object):
         """
         self.site = site
         self.title = title
+        self._content = None
 
-    def get(self):
+    def exists(self):
         """
         Docstring needed
         """
-        params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1'}
-        params["titles"] = self.title
-        result = self.site.api_query(params)
-        pageid = result['query']['pages'].keys()[0]
-        content = result['query']['pages'][pageid]['revisions'][0]['*']
-        return content
+        pass
+
+    def get(self, force_reload=False):
+        """
+        Docstring needed
+        """
+        if content is None or force_reload:
+            params = {"action": "query", "prop": "revisions",
+                "rvprop": "content", "rvlimit": 1, "titles": self.title}
+            result = self.site.api_query(params)
+            content = result["query"]["pages"].values()[0]["revisions"][0]["*"]
+            self._content = content
+            return content
+        return self._content
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index 34df709..ea62c77 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -20,8 +20,8 @@ class Site(object):
         self.name = name
         self.project = project
         self.lang = lang
-        self.__api = api
-        self.__sql = sql
+        self._api = api
+        self._sql = sql
 
     def api_query(self, params):
         """
@@ -29,7 +29,7 @@ class Site(object):
         """
         params["format"] = "json"
         data = urlencode(params)
-        result = urlopen(self.__api, data).read()
+        result = urlopen(self._api, data).read()
         return loads(result)
 
     def get_page(self, pagename):
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index 5044e50..4fb69b7 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -5,5 +5,28 @@ class User(object):
     EarwigBot's Wiki Toolset: User Class
     """
 
-    def __init__(self):
+    def __init__(self, site, username):
+        """
+        Docstring needed
+        """
+        self.site = site
+        self.username = username
+
+    def exists(self):
+        """
+        Docstring needed
+        """
         pass
+
+    def get_rights(self):
+        """
+        Docstring needed
+        """
+        params = {"action": "query", "list": "users", "usprop": "groups",
+            "ususers": self.username}
+        result = self.site.api_query(params)
+        try:
+            rights = res['query']['users'][0]['groups']
+        except KeyError:  # 'groups' not found, meaning the user does not exist
+            return None
+        return rights

From 28cbbd7221789ec2b49fa13d284214bfe7c7ec3c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Jul 2011 18:08:35 -0400
Subject: [PATCH 04/19] afcAFCStatus and Rights IRC commands now use wikitools

---
 irc/commands/afc_status.py | 29 +++++++----------------------
 irc/commands/rights.py     | 28 ++++++++--------------------
 2 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/irc/commands/afc_status.py b/irc/commands/afc_status.py
index 9273a53..2b8b880 100644
--- a/irc/commands/afc_status.py
+++ b/irc/commands/afc_status.py
@@ -3,12 +3,11 @@
 """Report the status of AFC submissions, either as an automatic message on join
 or a request via !status."""
 
-import json
 import re
-import urllib
 
 from core import config
 from irc.classes import BaseCommand
+from wiki import tools
 
 class AFCStatus(BaseCommand):
     def get_hooks(self):
@@ -29,6 +28,8 @@ class AFCStatus(BaseCommand):
         return False
 
     def process(self, data):
+        self.site = tools.get_site()
+
         if data.line[1] == "JOIN":
             notice = self.get_join_notice()
             self.connection.notice(data.nick, notice)
@@ -85,19 +86,15 @@ class AFCStatus(BaseCommand):
 
     def count_submissions(self):
         """Returns the number of open AFC submissions (count of CAT:PEND)."""
-        params = {'action': 'query', 'list': 'categorymembers', 'cmlimit':'500', 'format': 'json'}
-        params['cmtitle'] = "Category:Pending_AfC_submissions"
-        data = urllib.urlencode(params)
-        raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
-        res = json.loads(raw)
-        subs = len(res['query']['categorymembers'])
+        cat = self.site.get_category("Pending AfC submissions")
+        subs = cat.get_members(limit=500)
         subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions
         return subs
 
     def count_redirects(self):
         """Returns the number of open redirect submissions. Calculated as the
         total number of submissions minus the closed ones."""
-        content = self.get_page("Wikipedia:Articles_for_creation/Redirects")
+        content = self.site.get_page("Wikipedia:Articles for creation/Redirects").get()
         total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE))
         closed = content.lower().count("{{afc-c|b}}")
         redirs = total - closed
@@ -106,24 +103,12 @@ class AFCStatus(BaseCommand):
     def count_files(self):
         """Returns the number of open WP:FFU (Files For Upload) requests.
         Calculated as the total number of requests minus the closed ones."""
-        content = self.get_page("Wikipedia:Files_for_upload")
+        content = self.site.get_page("Wikipedia:Files for upload").get()
         total = len(re.findall("^\s*==(.*?)==\s*$", content, re.MULTILINE))
         closed = content.lower().count("{{ifu-c|b}}")
         files = total - closed
         return files
 
-    def get_page(self, pagename):
-        """Simple method to return the content of the page 'pagename'. Will be
-        a part of wiki/tools/ when I finish that."""
-        params = {'action': 'query', 'prop': 'revisions', 'rvprop':'content', 'rvlimit':'1', 'format': 'json'}
-        params['titles'] = pagename
-        data = urllib.urlencode(params)
-        raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
-        res = json.loads(raw)
-        pageid = res['query']['pages'].keys()[0]
-        content = res['query']['pages'][pageid]['revisions'][0]['*']
-        return content
-
     def get_aggregate(self, num):
         """Returns a human-readable AFC status based on the number of pending
         AFC submissions, open redirect requests, and open FFU requests. This
diff --git a/irc/commands/rights.py b/irc/commands/rights.py
index 31d9437..2715d60 100644
--- a/irc/commands/rights.py
+++ b/irc/commands/rights.py
@@ -4,10 +4,8 @@
 Retrieve a list of user rights for a given username via the API.
 """
 
-import json
-import urllib
-
 from irc.classes import BaseCommand
+from wiki import tools
 
 class Rights(BaseCommand):
     def get_hooks(self):
@@ -27,24 +25,14 @@ class Rights(BaseCommand):
             return
 
         username = ' '.join(data.args)
-        rights = self.get_rights(username)
+        site = tools.get_site()
+        user = site.get_user(username)
+        rights = user.get_rights()
         if rights:
+            try:
+                rights.remove("*")  # remove the implicit '*' group given to everyone
+            except ValueError:
+                pass
             self.connection.reply(data, "the rights for \x0302{0}\x0301 are {1}.".format(username, ', '.join(rights)))
         else:
             self.connection.reply(data, "the user \x0302{0}\x0301 has no rights, or does not exist.".format(username))
-
-    def get_rights(self, username):
-        params = {'action': 'query', 'format': 'json', 'list': 'users', 'usprop': 'groups'}
-        params['ususers'] = username
-        data = urllib.urlencode(params)
-        raw = urllib.urlopen("http://en.wikipedia.org/w/api.php", data).read()
-        res = json.loads(raw)
-        try:
-            rights = res['query']['users'][0]['groups']
-        except KeyError:  # 'groups' not found, meaning the user does not exist
-            return None
-        try:
-            rights.remove("*")  # remove the implicit '*' group given to everyone
-        except ValueError:  # I don't expect this to happen, but if it does, be prepared
-            pass
-        return rights

From b290582dbf5a8e420f88cf9660fa45937fdd3b8c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Jul 2011 22:34:54 -0400
Subject: [PATCH 05/19] added a bunch of new methods to User in wikitools;
 added one user-related exception to wikitools; moved .get_rights() call in
 IRC command !rights to .get_groups(), because get_rights() now returns actual
 rights (thanks to the API)

---
 irc/commands/rights.py   |   2 +-
 wiki/tools/exceptions.py |   7 +++
 wiki/tools/user.py       | 135 ++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/irc/commands/rights.py b/irc/commands/rights.py
index 2715d60..6c44227 100644
--- a/irc/commands/rights.py
+++ b/irc/commands/rights.py
@@ -27,7 +27,7 @@ class Rights(BaseCommand):
         username = ' '.join(data.args)
         site = tools.get_site()
         user = site.get_user(username)
-        rights = user.get_rights()
+        rights = user.get_groups()
         if rights:
             try:
                 rights.remove("*")  # remove the implicit '*' group given to everyone
diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index b515c45..3dc463d 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -16,3 +16,10 @@ class ConfigError(WikiToolsetError):
 class SiteNotFoundError(WikiToolsetError):
     """A site matching the args given to get_site() could not be found in the
     config file."""
+
+class UserNotFoundError(WikiToolsetError):
+    """Attempting to get information about a user that does not exist."""
+    def __init__(self, name):
+        self.name = name
+    def __str__(self):
+        return "User '{0}' does not exist.".format(self.name)
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index 4fb69b7..c12e234 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -1,32 +1,143 @@
 # -*- coding: utf-8  -*-
 
+from wiki.tools.exceptions import UserNotFoundError
+from wiki.tools.page import Page
+
 class User(object):
     """
     EarwigBot's Wiki Toolset: User Class
     """
 
-    def __init__(self, site, username):
+    def __init__(self, site, name):
         """
         Docstring needed
         """
-        self.site = site
-        self.username = username
+        # Public attributes
+        self.site = site  # Site instance, for doing API queries, etc
+        self.name = name  # our username
+
+        # Attributes filled in by an API query
+        self._exists = None
+        self._userid = None
+        self._blockinfo = None
+        self._groups = None
+        self._rights = None
+        self._editcount = None
+        self._registration = None
+        self._emailable = None
+        self._gender = None
 
-    def exists(self):
+    def _get_attribute_from_api(self, attr, force):
         """
         Docstring needed
         """
-        pass
+        if self._exists is None or force:
+            self._load_attributes_from_api()
+        if self._exists is False:
+            raise UserNotFoundError(self.name)
+        return getattr(self, attr)
 
-    def get_rights(self):
+    def _load_attributes_from_api(self):
         """
         Docstring needed
         """
-        params = {"action": "query", "list": "users", "usprop": "groups",
-            "ususers": self.username}
+        params = {"action": "query", "list": "users", "ususers": self.name,
+        "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"}
         result = self.site.api_query(params)
+
+        # normalize our username in case it was entered oddly
+        self.name = result["query"]["users"][0]["name"]
+
+        try:
+            self._userid = result["query"]["users"][0]["userid"]
+        except KeyError:  # userid is missing, so user does not exist
+            self._exists = False
+            return
+
+        self._exists = True
+        res = result['query']['users'][0]
+
+        self._groups = res["groups"]
+        self._rights = res["rights"]
+        self._editcount = res["editcount"]
+        self._registration = res["registration"]
+        self._gender = res["gender"]
+
+        try:
+            res["emailable"]
+        except KeyError:
+            self._emailable = False
+        else:
+            self._emailable = True
+
         try:
-            rights = res['query']['users'][0]['groups']
-        except KeyError:  # 'groups' not found, meaning the user does not exist
-            return None
-        return rights
+            self._blockinfo = {"by": res["blockedby"],
+                "reason": res["blockreason"], "expiry": res["blockexpiry"]}
+        except KeyError:
+            self._blockinfo = False
+
+    def exists(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_exists", force)
+
+    def get_userid(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_userid", force)
+
+    def get_blockinfo(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_blockinfo", force)
+
+    def get_groups(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_groups", force)
+
+    def get_rights(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_rights", force)
+
+    def get_editcount(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_editcount", force)
+
+    def get_registration(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_registration", force)
+
+    def get_emailable(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_emailable", force)
+
+    def get_gender(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_gender", force)
+
+    def get_userpage(self):
+        """
+        Docstring needed
+        """
+        return Page(self.site, "User:" + self.name)  # Namespace checking!
+
+    def get_talkpage(self):
+        """
+        Docstring needed
+        """
+        return Page(self.site, "User talk:" + self.name)  # Namespace checking!

From 6aa2370900785c606464fac3da3aa600b2ef588e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Jul 2011 02:22:18 -0400
Subject: [PATCH 06/19] Exception and function cleanup in wikitools:

* Got rid of ConfigError from exceptions.py.
* Try to load config ourselves if it isn't already, via the new _load_config()
  method of Site. It uses getpass if passwords are encrypted, as done by
  earwigbot.py.
* Cleaned up UserNotFoundError in user.py and exceptions.py.
---
 wiki/tools/exceptions.py |  8 --------
 wiki/tools/functions.py  | 37 +++++++++++++++++++++++++++++--------
 wiki/tools/user.py       |  2 +-
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index 3dc463d..d628d0d 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -9,17 +9,9 @@ This module contains all exceptions used by the wiki.tools package.
 class WikiToolsetError(Exception):
     """Base exception class for errors in the Wiki Toolset."""
 
-class ConfigError(WikiToolsetError):
-    """An error occured when trying to do something involving our config
-    file. Maybe it hasn't been loaded?"""
-
 class SiteNotFoundError(WikiToolsetError):
     """A site matching the args given to get_site() could not be found in the
     config file."""
 
 class UserNotFoundError(WikiToolsetError):
     """Attempting to get information about a user that does not exist."""
-    def __init__(self, name):
-        self.name = name
-    def __str__(self):
-        return "User '{0}' does not exist.".format(self.name)
diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index 195400c..d1cc020 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -10,15 +10,31 @@ There's no need to import this module explicitly. All functions here are
 automatically available from wiki.tools.
 """
 
+from getpass import getpass
+
 from core import config
-from wiki.tools.exceptions import ConfigError, SiteNotFoundError
+from wiki.tools.exceptions import SiteNotFoundError
 from wiki.tools.site import Site
 
 __all__ = ["get_site"]
 
+def _load_config():
+    """Called by a config-requiring function, such as get_site(), when config
+    has not been loaded. This will usually happen only if we're running code
+    directly from Python's interpreter and not the bot itself, because
+    earwigbot.py or core/main.py will already call these functions.
+    """
+    is_encrypted = config.verify_config()
+    if is_encrypted:  # passwords in the config file are encrypted
+        key = getpass("Enter key to unencrypt bot passwords: ")
+        config.parse_config(key)
+    else:
+        config.parse_config(None)
+
 def _get_site_object_from_dict(name, d):
     """Return a Site object based on the contents of a dict, probably acquired
-    through our config file, and a separate name."""
+    through our config file, and a separate name.
+    """
     project = d["project"]
     lang = d["lang"]
     try:
@@ -54,15 +70,18 @@ def get_site(name=None, project=None, lang=None):
     then `project` and `lang`. If, with any number of args, a site cannot be
     found in the config, SiteNotFoundError is raised.
     """
-    if config._config is None:
-        e = "Config file has not been loaded: use config.verify_config() and then config.parse_config() to do so."
-        raise ConfigError(e)
+    # check if config has been loaded, and load it if it hasn't
+    if not config.is_config_loaded():
+        _load_config()
 
+    # someone specified a project without a lang (or a lang without a project)!
     if (project is None and lang is not None) or (project is not None and lang is None):
         e = "Keyword arguments 'lang' and 'project' must be specified together."
         raise TypeError(e)
 
-    if name is None and project is None:  # no args given (project is None implies lang is None)
+    # no args given, so return our default site (project is None implies lang
+    # is None, so we don't need to add that in)
+    if name is None and project is None:
         try:  # ...so use the default site
             default = config.wiki["defaultSite"]
         except KeyError:
@@ -75,7 +94,8 @@ def get_site(name=None, project=None, lang=None):
             raise SiteNotFoundError(e)
         return _get_site_object_from_dict(default, site)
 
-    if name is not None:  # name arg given, but don't look at others yet
+    # name arg given, but don't look at others unless `name` isn't found
+    if name is not None:
         try:
             site = config.wiki["sites"][name]
         except KeyError:
@@ -90,7 +110,8 @@ def get_site(name=None, project=None, lang=None):
         else:
             return _get_site_object_from_dict(name, site)
 
-    for sitename, site in config.wiki["sites"].items():  # implied lang and proj are not None
+    # if we end up here, then project and lang are both not None
+    for sitename, site in config.wiki["sites"].items():
         if site["project"] == project and site["lang"] == lang:
             return _get_site_object_from_dict(sitename, site)
     e = "Site '{0}:{1}' not found in config.".format(project, lang)
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index c12e234..ae19bfe 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -34,7 +34,7 @@ class User(object):
         if self._exists is None or force:
             self._load_attributes_from_api()
         if self._exists is False:
-            raise UserNotFoundError(self.name)
+            raise UserNotFoundError("User '{0}' does not exist.".format(self.name))
         return getattr(self, attr)
 
     def _load_attributes_from_api(self):

From ffc63c38f6943ac5a81179043ec6709f8dac2687 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Jul 2011 19:51:25 -0400
Subject: [PATCH 07/19] New additions and changes to wikitools, mostly
 namespaces.

Site:       Store namespace information in self._namespaces, a dict where key is a namespace ID and value is a list of
            matching names and aliases; added _get_namespaces_from_api(), namespaces(), namespace_id_to_name() and
            namespace_name_to_id(); get_page() and get_category() are smarter;
Constants:  new module, with 18 variables starting with "NS_" that hold IDs of common namespaces, e.g NS_USER = 2,
            NS_PROJECT = 4;
Exceptions: added NamespaceNotFoundError, raised by Site when bad input is given to namespace_id_to_name() or
            namespace_name_to_id();
User:       self.name -> self._name; new name() method returns name from API; dropping "get" from methods that return
            just a variable;
Category:   get_members() -> members().
---
 wiki/tools/__init__.py   |  1 +
 wiki/tools/category.py   |  2 +-
 wiki/tools/constants.py  | 27 +++++++++++++++
 wiki/tools/exceptions.py |  3 ++
 wiki/tools/page.py       |  2 +-
 wiki/tools/site.py       | 85 ++++++++++++++++++++++++++++++++++++++++++++++--
 wiki/tools/user.py       | 50 ++++++++++++++++++----------
 7 files changed, 147 insertions(+), 23 deletions(-)
 create mode 100644 wiki/tools/constants.py

diff --git a/wiki/tools/__init__.py b/wiki/tools/__init__.py
index 2e64c45..7fb431e 100644
--- a/wiki/tools/__init__.py
+++ b/wiki/tools/__init__.py
@@ -10,6 +10,7 @@ written by Mr.Z-man, other than a similar purpose. We share no code.
 Import the toolset with `from wiki import tools`.
 """
 
+from wiki.tools.constants import *
 from wiki.tools.exceptions import *
 from wiki.tools.functions import *
 
diff --git a/wiki/tools/category.py b/wiki/tools/category.py
index 01a3179..f6e301f 100644
--- a/wiki/tools/category.py
+++ b/wiki/tools/category.py
@@ -7,7 +7,7 @@ class Category(Page):
     EarwigBot's Wiki Toolset: Category Class
     """
 
-    def get_members(self, limit=50):
+    def members(self, limit=50):
         """
         Docstring needed
         """
diff --git a/wiki/tools/constants.py b/wiki/tools/constants.py
new file mode 100644
index 0000000..76a327d
--- /dev/null
+++ b/wiki/tools/constants.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8  -*-
+
+"""
+EarwigBot's Wiki Toolset: Constants
+
+This module defines some useful constants.
+"""
+
+# Default namespace IDs
+NS_MAIN = 0
+NS_TALK = 1
+NS_USER = 2
+NS_USER_TALK = 3
+NS_PROJECT = 4
+NS_PROJECT_TALK = 5
+NS_FILE = 6
+NS_FILE_TALK = 7
+NS_MEDIAWIKI = 8
+NS_MEDIAWIKI_TALK = 9
+NS_TEMPLATE = 10
+NS_TEMPLATE_TALK = 11
+NS_HELP = 12
+NS_HELP_TALK = 13
+NS_CATEGORY = 14
+NS_CATEGORY_TALK = 15
+NS_SPECIAL = -1
+NS_MEDIA = -2
diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index d628d0d..3e5eaf2 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -13,5 +13,8 @@ class SiteNotFoundError(WikiToolsetError):
     """A site matching the args given to get_site() could not be found in the
     config file."""
 
+class NamespaceNotFoundError(WikiToolsetError):
+    """A requested namespace name or namespace ID does not exist."""
+
 class UserNotFoundError(WikiToolsetError):
     """Attempting to get information about a user that does not exist."""
diff --git a/wiki/tools/page.py b/wiki/tools/page.py
index 9dbb7ab..d267674 100644
--- a/wiki/tools/page.py
+++ b/wiki/tools/page.py
@@ -23,7 +23,7 @@ class Page(object):
         """
         Docstring needed
         """
-        if content is None or force_reload:
+        if self._content is None or force_reload:
             params = {"action": "query", "prop": "revisions",
                 "rvprop": "content", "rvlimit": 1, "titles": self.title}
             result = self.site.api_query(params)
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index ea62c77..83a24c9 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -5,6 +5,8 @@ from urllib import urlencode
 from urllib2 import urlopen
 
 from wiki.tools.category import Category
+from wiki.tools.constants import *
+from wiki.tools.exceptions import NamespaceNotFoundError
 from wiki.tools.page import Page
 from wiki.tools.user import User
 
@@ -22,6 +24,37 @@ class Site(object):
         self.lang = lang
         self._api = api
         self._sql = sql
+        
+        self._namespaces = None
+
+    def _get_namespaces_from_api(self):
+        """
+        Docstring needed
+        """
+        params = {"action": "query", "meta": "siteinfo",
+            "siprop": "namespaces|namespacealiases"}
+        result = self.api_query(params)
+        
+        if self._namespaces is None:
+            self._namespaces = {}
+        
+        for namespace in result["query"]["namespaces"].values():
+            ns_id = namespace["id"]
+            name = namespace["*"]
+            try:
+                canonical = namespace["canonical"]
+            except KeyError:
+                self._namespaces[ns_id] = [name]
+            else:
+                if name != canonical:
+                    self._namespaces[ns_id] = [name, canonical]
+                else:
+                    self._namespaces[ns_id] = [name]
+
+        for namespace in result["query"]["namespacealiases"]:
+            ns_id = namespace["id"]
+            alias = namespace["*"]
+            self._namespaces[ns_id].append(alias)
 
     def api_query(self, params):
         """
@@ -32,19 +65,65 @@ class Site(object):
         result = urlopen(self._api, data).read()
         return loads(result)
 
+    def namespaces(self):
+        """
+        Docstring needed
+        """
+        if self._namespaces is None:
+            self._get_namespaces_from_api()
+        
+        return self._namespaces
+
+    def namespace_id_to_name(self, ns_id, all=False):
+        """
+        Docstring needed
+        """
+        if self._namespaces is None:
+            self._get_namespaces_from_api()
+
+        try:
+            if all:
+                return self._namespaces[ns_id]
+            else:
+                return self._namespaces[ns_id][0]
+        except KeyError:
+            e = "There is no namespace with id {0}.".format(ns_id)
+            raise NamespaceNotFoundError(e)
+
+    def namespace_name_to_id(self, name):
+        """
+        Docstring needed
+        """
+        if self._namespaces is None:
+            self._get_namespaces_from_api()
+        
+        lname = name.lower()
+        for ns_id, names in self._namespaces.items():
+            lnames = [n.lower() for n in names]  # be case-insensitive
+            if lname in lnames:
+                return ns_id
+
+        e = "There is no namespace with name '{0}'.".format(name)
+        raise NamespaceNotFoundError(e)
+
     def get_page(self, pagename):
         """
         Docstring needed
         """
-        if pagename.startswith("Category:"):  # proper namespace checking!
-            return get_category(pagename[9:])
+        prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
+        prefix = pagename.split(":", 1)[0]
+        if prefix != pagename:  # avoid a page that is simply "Category"
+            if prefix in prefixes:
+                return Category(self, pagename)
         return Page(self, pagename)
 
     def get_category(self, catname):
         """
         Docstring needed
         """
-        return Category(self, "Category:" + catname)  # namespace checking!
+        prefix = self.namespace_id_to_name(NS_CATEGORY)
+        pagename = "{0}:{1}".format(prefix, catname)
+        return Category(self, pagename)
 
     def get_user(self, username):
         """
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index ae19bfe..8f6e96f 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8  -*-
 
+from wiki.tools.constants import *
 from wiki.tools.exceptions import UserNotFoundError
 from wiki.tools.page import Page
 
@@ -12,9 +13,11 @@ class User(object):
         """
         Docstring needed
         """
-        # Public attributes
-        self.site = site  # Site instance, for doing API queries, etc
-        self.name = name  # our username
+        # Site instance, for doing API queries, etc
+        self.site = site
+
+        # Username
+        self._name = name
 
         # Attributes filled in by an API query
         self._exists = None
@@ -34,19 +37,20 @@ class User(object):
         if self._exists is None or force:
             self._load_attributes_from_api()
         if self._exists is False:
-            raise UserNotFoundError("User '{0}' does not exist.".format(self.name))
+            e = "User '{0}' does not exist.".format(self._name)
+            raise UserNotFoundError(e)
         return getattr(self, attr)
 
     def _load_attributes_from_api(self):
         """
         Docstring needed
         """
-        params = {"action": "query", "list": "users", "ususers": self.name,
+        params = {"action": "query", "list": "users", "ususers": self._name,
         "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"}
         result = self.site.api_query(params)
 
         # normalize our username in case it was entered oddly
-        self.name = result["query"]["users"][0]["name"]
+        self._name = result["query"]["users"][0]["name"]
 
         try:
             self._userid = result["query"]["users"][0]["userid"]
@@ -76,68 +80,78 @@ class User(object):
         except KeyError:
             self._blockinfo = False
 
+    def name(self, force=False):
+        """
+        Docstring needed
+        """
+        return self._get_attribute_from_api("_name", force)
+
     def exists(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_exists", force)
 
-    def get_userid(self, force=False):
+    def userid(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_userid", force)
 
-    def get_blockinfo(self, force=False):
+    def blockinfo(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_blockinfo", force)
 
-    def get_groups(self, force=False):
+    def groups(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_groups", force)
 
-    def get_rights(self, force=False):
+    def rights(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_rights", force)
 
-    def get_editcount(self, force=False):
+    def editcount(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_editcount", force)
 
-    def get_registration(self, force=False):
+    def registration(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_registration", force)
 
-    def get_emailable(self, force=False):
+    def is_emailable(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_emailable", force)
 
-    def get_gender(self, force=False):
+    def gender(self, force=False):
         """
         Docstring needed
         """
         return self._get_attribute_from_api("_gender", force)
 
-    def get_userpage(self):
+    def userpage(self):
         """
         Docstring needed
         """
-        return Page(self.site, "User:" + self.name)  # Namespace checking!
+        prefix = self.site.namespace_id_to_name(NS_USER)
+        pagename = "{0}:{1}".format(prefix, self._name)
+        return Page(self.site, pagename)
 
-    def get_talkpage(self):
+    def talkpage(self):
         """
         Docstring needed
         """
-        return Page(self.site, "User talk:" + self.name)  # Namespace checking!
+        prefix = self.site.namespace_id_to_name(NS_USER_TALK)
+        pagename = "{0}:{1}".format(prefix, self._name)
+        return Page(self.site, pagename)

From a2ceb7a85596fb49f8315aa4149969534ae5be18 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Jul 2011 23:34:16 -0400
Subject: [PATCH 08/19] Update AFCStatus and Rights IRC commands per wikitools
 updates.

---
 irc/commands/afc_status.py | 2 +-
 irc/commands/rights.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/irc/commands/afc_status.py b/irc/commands/afc_status.py
index 2b8b880..0f5722e 100644
--- a/irc/commands/afc_status.py
+++ b/irc/commands/afc_status.py
@@ -87,7 +87,7 @@ class AFCStatus(BaseCommand):
     def count_submissions(self):
         """Returns the number of open AFC submissions (count of CAT:PEND)."""
         cat = self.site.get_category("Pending AfC submissions")
-        subs = cat.get_members(limit=500)
+        subs = cat.members(limit=500)
         subs -= 2 # remove [[Wikipedia:Articles for creation/Redirects]] and [[Wikipedia:Files for upload]], which aren't real submissions
         return subs
 
diff --git a/irc/commands/rights.py b/irc/commands/rights.py
index 6c44227..4289002 100644
--- a/irc/commands/rights.py
+++ b/irc/commands/rights.py
@@ -27,7 +27,7 @@ class Rights(BaseCommand):
         username = ' '.join(data.args)
         site = tools.get_site()
         user = site.get_user(username)
-        rights = user.get_groups()
+        rights = user.groups()
         if rights:
             try:
                 rights.remove("*")  # remove the implicit '*' group given to everyone

From cafa9deeddb24bc7cada7e9ef9531b4df616a7bf Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 28 Jul 2011 16:11:10 -0400
Subject: [PATCH 09/19] More additions to wikitools, mostly in Site.

* Site's __init__() takes more args, all optional. As long as enough are
  provided to do an API query, the missing ones will be filled in
  automatically by _load_attributes(), which is called in __init__().
* User: _get_attribute_from_api() -> _get_attribute();
  _load_attributes_from_api() -> _load_attributes.
* Sites in config.json are stored with different keys/values.
---
 wiki/tools/functions.py |  31 +++++++++++--
 wiki/tools/site.py      | 118 ++++++++++++++++++++++++++++++++++++++----------
 wiki/tools/user.py      |  26 +++++------
 3 files changed, 133 insertions(+), 42 deletions(-)

diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index d1cc020..178a8e2 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -35,12 +35,26 @@ def _get_site_object_from_dict(name, d):
     """Return a Site object based on the contents of a dict, probably acquired
     through our config file, and a separate name.
     """
-    project = d["project"]
-    lang = d["lang"]
     try:
-        api = d["apiURL"]
+        project = d["project"]
     except KeyError:
-        api = None
+        project = None
+    try:
+        lang = d["lang"]
+    except KeyError:
+        lang = None
+    try:
+        base_url = d["baseURL"]
+    except KeyError:
+        base_url = None
+    try:
+        article_path = d["articlePath"]
+    except KeyError:
+        article_path = None
+    try:
+        script_path = d["scriptPath"]
+    except KeyError:
+        script_path = None
     try:
         sql_server = d["sqlServer"]
     except KeyError:
@@ -49,7 +63,14 @@ def _get_site_object_from_dict(name, d):
         sql_db = d["sqlDB"]
     except KeyError:
         sql_db = None
-    return Site(name, project, lang, api, (sql_server, sql_db))
+    try:
+        namespaces = d["namespaces"]
+    except KeyError:
+        namespaces = None
+
+    return Site(name=name, project=project, lang=lang, base_url=base_url,
+        article_path=article_path, script_path=script_path,
+        sql=(sql_server, sql_db), namespaces=namespaces)
 
 def get_site(name=None, project=None, lang=None):
     """Returns a Site instance based on information from our config file.
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index 83a24c9..62a2ecc 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -15,29 +15,71 @@ class Site(object):
     EarwigBot's Wiki Toolset: Site Class
     """
 
-    def __init__(self, name, project, lang, api=None, sql=(None, None)):
+    def __init__(self, name=None, project=None, lang=None, base_url=None,
+            article_path=None, script_path=None, sql=(None, None),
+            namespaces=None):
         """
         Docstring needed
         """
-        self.name = name
-        self.project = project
-        self.lang = lang
-        self._api = api
+        self._name = name
+        self._project = project
+        self._lang = lang
+        self._base_url = base_url
+        self._article_path = article_path
+        self._script_path = script_path
         self._sql = sql
-        
-        self._namespaces = None
+        self._namespaces = namespaces
+
+        # get all of the above attributes that were not specified by the user
+        self._load_attributes()
 
-    def _get_namespaces_from_api(self):
+    def _load_attributes(self, force=False):
         """
         Docstring needed
         """
-        params = {"action": "query", "meta": "siteinfo",
-            "siprop": "namespaces|namespacealiases"}
-        result = self.api_query(params)
-        
-        if self._namespaces is None:
-            self._namespaces = {}
+        # all attributes to be loaded, except _namespaces, which is a special
+        # case because it requires additional params in the API query
+        attrs = [self._name, self._project, self._lang, self._base_url,
+            self._article_path, self._script_path]
+
+        params = {"action": "query", "meta": "siteinfo"}
         
+        if self._namespaces is None or force:
+            params["siprop"] = "general|namespaces|namespacealiases"
+            result = self.api_query(params)
+            self._load_namespaces(result)
+        elif all(attrs):  # everything is already specified and we're not told
+            return        # to force a reload, so do nothing
+        else:  # we're only loading attributes other than _namespaces
+            params["siprop"] = "general"
+            result = self.api_query(params)
+
+        res = result["query"]["general"]
+
+        if self._name is None or force:
+            self._name = res["wikiid"]
+
+        if self._project is None or force:
+            self._project = res["sitename"].lower()
+
+        if self._lang is None or force:
+            self._lang = res["lang"]
+
+        if self._base_url is None or force:
+            self._base_url = res["server"]
+
+        if self._article_path is None or force:
+            self._article_path = res["articlepath"]
+
+        if self._script_path is None or force:
+            self._script_path = res["scriptpath"]
+
+    def _load_namespaces(self, result):
+        """
+        Docstring needed
+        """
+        self._namespaces = {}
+
         for namespace in result["query"]["namespaces"].values():
             ns_id = namespace["id"]
             name = namespace["*"]
@@ -60,27 +102,58 @@ class Site(object):
         """
         Docstring needed
         """
+        url = ''.join((self._base_url, self._script_path, "/api.php"))
         params["format"] = "json"
         data = urlencode(params)
-        result = urlopen(self._api, data).read()
+        result = urlopen(url, data).read()
         return loads(result)
 
+    def name(self):
+        """
+        Docstring needed
+        """
+        return self._name
+
+    def project(self):
+        """
+        Docstring needed
+        """
+        return self._project
+
+    def lang(self):
+        """
+        Docstring needed
+        """
+        return self._lang
+
+    def base_url(self):
+        """
+        Docstring needed
+        """
+        return self._base_url
+
+    def article_path(self):
+        """
+        Docstring needed
+        """
+        return self._article_path
+
+    def script_path(self):
+        """
+        Docstring needed
+        """
+        return self._script_path
+
     def namespaces(self):
         """
         Docstring needed
         """
-        if self._namespaces is None:
-            self._get_namespaces_from_api()
-        
         return self._namespaces
 
     def namespace_id_to_name(self, ns_id, all=False):
         """
         Docstring needed
         """
-        if self._namespaces is None:
-            self._get_namespaces_from_api()
-
         try:
             if all:
                 return self._namespaces[ns_id]
@@ -94,9 +167,6 @@ class Site(object):
         """
         Docstring needed
         """
-        if self._namespaces is None:
-            self._get_namespaces_from_api()
-        
         lname = name.lower()
         for ns_id, names in self._namespaces.items():
             lnames = [n.lower() for n in names]  # be case-insensitive
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index 8f6e96f..b406b97 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -30,18 +30,18 @@ class User(object):
         self._emailable = None
         self._gender = None
 
-    def _get_attribute_from_api(self, attr, force):
+    def _get_attribute(self, attr, force):
         """
         Docstring needed
         """
         if self._exists is None or force:
-            self._load_attributes_from_api()
+            self._load_attributes()
         if self._exists is False:
             e = "User '{0}' does not exist.".format(self._name)
             raise UserNotFoundError(e)
         return getattr(self, attr)
 
-    def _load_attributes_from_api(self):
+    def _load_attributes(self):
         """
         Docstring needed
         """
@@ -84,61 +84,61 @@ class User(object):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_name", force)
+        return self._get_attribute("_name", force)
 
     def exists(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_exists", force)
+        return self._get_attribute("_exists", force)
 
     def userid(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_userid", force)
+        return self._get_attribute("_userid", force)
 
     def blockinfo(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_blockinfo", force)
+        return self._get_attribute("_blockinfo", force)
 
     def groups(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_groups", force)
+        return self._get_attribute("_groups", force)
 
     def rights(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_rights", force)
+        return self._get_attribute("_rights", force)
 
     def editcount(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_editcount", force)
+        return self._get_attribute("_editcount", force)
 
     def registration(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_registration", force)
+        return self._get_attribute("_registration", force)
 
     def is_emailable(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_emailable", force)
+        return self._get_attribute("_emailable", force)
 
     def gender(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute_from_api("_gender", force)
+        return self._get_attribute("_gender", force)
 
     def userpage(self):
         """

From 74ddc5b702d19375a407b6c87fd34f0447ba0fb3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 29 Jul 2011 02:31:01 -0400
Subject: [PATCH 10/19] More work on wikitools, now with improved API queries
 and login.

* Site's api_query() is much smarter. It uses a custom urllib2 URL opener with cookie support and catches URLErrors, raising its own brand new exception (SiteAPIError) when something is wrong.
* The opener now uses a custom User-Agent, which is a constant in wiki.tools.constants.
* Site instances automatically login via _login(), which accepts a username and password (provided via config by get_site()) and uses two api_query()s and stores the login data as cookies in self._cookiejar. Login data is not preserved between bot restarts yet. Login errors, e.g. a bad password or username, raise the new LoginError.
* Site's get_user()'s username argument is now optional. If left blank, will return the current logged-in user, provided by an API query.
* Misc cleanup throughout.
---
 wiki/tools/constants.py  | 10 ++++++-
 wiki/tools/exceptions.py |  8 ++++++
 wiki/tools/functions.py  | 18 +++++++++---
 wiki/tools/site.py       | 74 ++++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 97 insertions(+), 13 deletions(-)

diff --git a/wiki/tools/constants.py b/wiki/tools/constants.py
index 76a327d..6397c5d 100644
--- a/wiki/tools/constants.py
+++ b/wiki/tools/constants.py
@@ -3,9 +3,17 @@
 """
 EarwigBot's Wiki Toolset: Constants
 
-This module defines some useful constants.
+This module defines some useful constants, such as default namespace IDs for
+easy lookup and our user agent.
+
+Import with `from wiki.tools.constants import *`.
 """
 
+import platform
+
+# User agent when making API queries
+USER_AGENT = "EarwigBot/0.1-dev (Python/{0}; https://github.com/earwig/earwigbot)".format(platform.python_version())
+
 # Default namespace IDs
 NS_MAIN = 0
 NS_TALK = 1
diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index 3e5eaf2..0620262 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -13,6 +13,14 @@ class SiteNotFoundError(WikiToolsetError):
     """A site matching the args given to get_site() could not be found in the
     config file."""
 
+class SiteAPIError(WikiToolsetError):
+    """We couldn't connect to a site's API, perhaps because the server doesn't
+    exist, our URL is wrong, or they're having temporary problems."""
+
+class LoginError(WikiToolsetError):
+    """An error occured while trying to login. Perhaps the username/password is
+    incorrect."""
+
 class NamespaceNotFoundError(WikiToolsetError):
     """A requested namespace name or namespace ID does not exist."""
 
diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index 178a8e2..2618a57 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -67,10 +67,14 @@ def _get_site_object_from_dict(name, d):
         namespaces = d["namespaces"]
     except KeyError:
         namespaces = None
+    try:
+        login = (config.wiki["username"], config.wiki["password"])
+    except KeyError:
+        login = (None, None)
 
     return Site(name=name, project=project, lang=lang, base_url=base_url,
         article_path=article_path, script_path=script_path,
-        sql=(sql_server, sql_db), namespaces=namespaces)
+        sql=(sql_server, sql_db), namespaces=namespaces, login=login)
 
 def get_site(name=None, project=None, lang=None):
     """Returns a Site instance based on information from our config file.
@@ -86,6 +90,10 @@ def get_site(name=None, project=None, lang=None):
     member of config.wiki["sites"], `s`, for which s["project"] == project and
     s["lang"] == lang.
 
+    We will attempt to login to the site automatically
+    using config.wiki["username"] and config.wiki["password"] if both are
+    defined.
+
     Specifying a project without a lang or a lang without a project will raise
     TypeError. If all three args are specified, `name` will be first tried,
     then `project` and `lang`. If, with any number of args, a site cannot be
@@ -96,7 +104,8 @@ def get_site(name=None, project=None, lang=None):
         _load_config()
 
     # someone specified a project without a lang (or a lang without a project)!
-    if (project is None and lang is not None) or (project is not None and lang is None):
+    if (project is None and lang is not None) or (project is not None and
+                                                  lang is None):
         e = "Keyword arguments 'lang' and 'project' must be specified together."
         raise TypeError(e)
 
@@ -120,13 +129,14 @@ def get_site(name=None, project=None, lang=None):
         try:
             site = config.wiki["sites"][name]
         except KeyError:
-            if project is None:  # implies lang is None, i.e., only name was given
+            if project is None:  # implies lang is None, so only name was given
                 e = "Site '{0}' not found in config.".format(name)
                 raise SiteNotFoundError(e)
             for sitename, site in config.wiki["sites"].items():
                 if site["project"] == project and site["lang"] == lang:
                     return _get_site_object_from_dict(sitename, site)
-            e = "Neither site '{0}' nor site '{1}:{2}' found in config.".format(name, project, lang)
+            e = "Neither site '{0}' nor site '{1}:{2}' found in config."
+            e.format(name, project, lang)
             raise SiteNotFoundError(e)
         else:
             return _get_site_object_from_dict(name, site)
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index 62a2ecc..f4b854f 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -1,12 +1,13 @@
 # -*- coding: utf-8  -*-
 
+from cookielib import CookieJar
 from json import loads
 from urllib import urlencode
-from urllib2 import urlopen
+from urllib2 import build_opener, HTTPCookieProcessor, URLError
 
 from wiki.tools.category import Category
 from wiki.tools.constants import *
-from wiki.tools.exceptions import NamespaceNotFoundError
+from wiki.tools.exceptions import *
 from wiki.tools.page import Page
 from wiki.tools.user import User
 
@@ -17,10 +18,12 @@ class Site(object):
 
     def __init__(self, name=None, project=None, lang=None, base_url=None,
             article_path=None, script_path=None, sql=(None, None),
-            namespaces=None):
+            namespaces=None, login=(None, None)):
         """
         Docstring needed
         """
+        # attributes referring to site information, filled in by an API query
+        # if they are missing (and an API url is available)
         self._name = name
         self._project = project
         self._lang = lang
@@ -30,9 +33,45 @@ class Site(object):
         self._sql = sql
         self._namespaces = namespaces
 
-        # get all of the above attributes that were not specified by the user
+        # set up cookiejar and URL opener for making API queries
+        self._cookiejar = CookieJar(cookie_file)
+        self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
+        self._opener.addheaders = [('User-agent', USER_AGENT)]
+
+        # use a username and password to login if they were provided
+        if login[0] is not None and login[1] is not None:
+            self._login(login[0], login[1])
+
+        # get all of the above attributes that were not specified as arguments
         self._load_attributes()
 
+    def _login(self, name, password, token="", attempt=0):
+        """
+        Docstring needed
+        """
+        params = {"action": "login", "lgname": name, "lgpassword": password,
+                  "lgtoken": token}
+        result = self.api_query(params)
+        res = result["login"]["result"]
+
+        if res == "Success":
+            return
+        elif res == "NeedToken" and attempt == 0:
+            token = result["login"]["token"]
+            return self._login(name, password, token, attempt=1)
+        else:
+            if res == "Illegal":
+                e = "The provided username is illegal."
+            elif res == "NotExists":
+                e = "The provided username does not exist."
+            elif res == "EmptyPass":
+                e = "No password was given."
+            elif res == "WrongPass" or res == "WrongPluginPass":
+                e = "The given password is incorrect."
+            else:
+                e = "Couldn't login; server says '{0}'.".format(res)
+            raise LoginError(e)
+
     def _load_attributes(self, force=False):
         """
         Docstring needed
@@ -103,10 +142,24 @@ class Site(object):
         Docstring needed
         """
         url = ''.join((self._base_url, self._script_path, "/api.php"))
-        params["format"] = "json"
+        params["format"] = "json"  # this is the only format we understand
         data = urlencode(params)
-        result = urlopen(url, data).read()
-        return loads(result)
+
+        try:
+            response = self._opener.open(url, data)
+        except URLError as error:
+            if hasattr(error, "reason"):
+                e = "API query at {0} failed because {1}.".format(error.geturl,
+                                                                  error.reason)
+            elif hasattr(error, "code"):
+                e = "API query at {0} failed; got an error code of {1}."
+                e = e.format(error.geturl, error.code)
+            else:
+                e = "API query failed."
+            raise SiteAPIError(e)
+        else:
+            result = response.read()
+            return loads(result)  # parse as a JSON object
 
     def name(self):
         """
@@ -195,8 +248,13 @@ class Site(object):
         pagename = "{0}:{1}".format(prefix, catname)
         return Category(self, pagename)
 
-    def get_user(self, username):
+    def get_user(self, username=None):
         """
         Docstring needed
         """
+        if username is None:
+            params = {"action": "query", "meta": "userinfo"}
+            result = self.api_query(params)
+            username = result["query"]["userinfo"]["name"]
+
         return User(self, username)

From 575e975930269645002d5aa57678069ad3403fb6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 29 Jul 2011 02:44:04 -0400
Subject: [PATCH 11/19] Bugfix in user.name() and user.exists().

---
 wiki/tools/user.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index b406b97..94a46d4 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -30,13 +30,13 @@ class User(object):
         self._emailable = None
         self._gender = None
 
-    def _get_attribute(self, attr, force):
+    def _get_attribute(self, attr, force, raise_exception=True):
         """
         Docstring needed
         """
         if self._exists is None or force:
             self._load_attributes()
-        if self._exists is False:
+        if self._exists is False and raise_exception:
             e = "User '{0}' does not exist.".format(self._name)
             raise UserNotFoundError(e)
         return getattr(self, attr)
@@ -84,13 +84,13 @@ class User(object):
         """
         Docstring needed
         """
-        return self._get_attribute("_name", force)
+        return self._get_attribute("_name", force, raise_exception=False)
 
     def exists(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute("_exists", force)
+        return self._get_attribute("_exists", force, raise_exception=False)
 
     def userid(self, force=False):
         """

From c7bbb211179e343b640db6881726bcc407449607 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 29 Jul 2011 03:42:44 -0400
Subject: [PATCH 12/19] Improvements to Site and User by removing unnecessary
 API queries.

* Site: New _get_logged_in_user() method, name self-explanatory. This acts
  as a replacement for the former crud in get_user(), which now calls this
  when the username arg is None. This method will first try to determine
  our username based on a special cookie in self._cookiejar (cookie.name is
  self._name + "UserName", e.g. "enwikiUserName"), and will only do an API
  query if no cookie was found. This removes an API query that is usually
  only necessary if we are not logged in.
* Site: silly bugfix in __init__().
* User: Reverted earlier change to _get_attribute() (addition of
  raise_exception arg); name() and exists() now use their own code, which
  is simpler.
* User: Calling name() does not do an API query unless force=True, unlike
  the other "get" methods.
* User: .join() instead of .format() because I feel it looks cleaner and is
  probably more efficient.
---
 wiki/tools/site.py | 28 ++++++++++++++++++++++------
 wiki/tools/user.py | 16 ++++++++++------
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index f4b854f..f32e3c2 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -2,8 +2,9 @@
 
 from cookielib import CookieJar
 from json import loads
-from urllib import urlencode
+from urllib import unquote_plus, urlencode
 from urllib2 import build_opener, HTTPCookieProcessor, URLError
+from urlparse import urlparse
 
 from wiki.tools.category import Category
 from wiki.tools.constants import *
@@ -34,7 +35,7 @@ class Site(object):
         self._namespaces = namespaces
 
         # set up cookiejar and URL opener for making API queries
-        self._cookiejar = CookieJar(cookie_file)
+        self._cookiejar = CookieJar()
         self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
         self._opener.addheaders = [('User-agent', USER_AGENT)]
 
@@ -72,6 +73,24 @@ class Site(object):
                 e = "Couldn't login; server says '{0}'.".format(res)
             raise LoginError(e)
 
+    def _get_logged_in_user(self):
+        """
+        Docstring needed
+        """
+        # first try to get username from the cookie jar to avoid an
+        # unnecessary API query
+        cookie_name = ''.join((self._name, "UserName"))
+        cookie_domain = urlparse(self._base_url).netloc
+        for cookie in self._cookiejar:
+            if cookie.name == cookie_name and cookie.domain == cookie_domain:
+                return unquote_plus(cookie.value)
+        
+        # if we end up here, we're probably an anon and thus an API query
+        # will be required to get our username
+        params = {"action": "query", "meta": "userinfo"}
+        result = self.api_query(params)
+        return result["query"]["userinfo"]["name"]
+
     def _load_attributes(self, force=False):
         """
         Docstring needed
@@ -253,8 +272,5 @@ class Site(object):
         Docstring needed
         """
         if username is None:
-            params = {"action": "query", "meta": "userinfo"}
-            result = self.api_query(params)
-            username = result["query"]["userinfo"]["name"]
-
+            username = self._get_logged_in_user()
         return User(self, username)
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index 94a46d4..98f9670 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -30,13 +30,13 @@ class User(object):
         self._emailable = None
         self._gender = None
 
-    def _get_attribute(self, attr, force, raise_exception=True):
+    def _get_attribute(self, attr, force):
         """
         Docstring needed
         """
         if self._exists is None or force:
             self._load_attributes()
-        if self._exists is False and raise_exception:
+        if self._exists is False:
             e = "User '{0}' does not exist.".format(self._name)
             raise UserNotFoundError(e)
         return getattr(self, attr)
@@ -84,13 +84,17 @@ class User(object):
         """
         Docstring needed
         """
-        return self._get_attribute("_name", force, raise_exception=False)
+        if force:
+            self._load_attributes()
+        return self._name
 
     def exists(self, force=False):
         """
         Docstring needed
         """
-        return self._get_attribute("_exists", force, raise_exception=False)
+        if self._exists is None or force:
+            self._load_attributes()
+        return self._exists
 
     def userid(self, force=False):
         """
@@ -145,7 +149,7 @@ class User(object):
         Docstring needed
         """
         prefix = self.site.namespace_id_to_name(NS_USER)
-        pagename = "{0}:{1}".format(prefix, self._name)
+        pagename = ''.join((prefix, ":", self._name))
         return Page(self.site, pagename)
 
     def talkpage(self):
@@ -153,5 +157,5 @@ class User(object):
         Docstring needed
         """
         prefix = self.site.namespace_id_to_name(NS_USER_TALK)
-        pagename = "{0}:{1}".format(prefix, self._name)
+        pagename = ''.join((prefix, ":", self._name))
         return Page(self.site, pagename)

From 612c9c8ff6999b14c89a534a7249e55b3984d41b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 31 Jul 2011 18:37:59 -0400
Subject: [PATCH 13/19] Major improvements to cookies, login/logout, and
 crosswiki support.

* Exceptions: New PermissionsError; reworded docstring of SiteAPIError.
* Site: __init__() accepts an optional cookiejar parameter, otherwise we
  use CookieJar(). Added five new cookie/username-related methods. Only
  login from __init__() if we are missing valid login cookies and a user/
  pass was provided. _login() and _logout() both try to save cookies via
  _save_cookiejar(). _load_attributes() automatically refreshes all
  attributes other than namespaces if at least one is missing, instead of
  only the missing ones. api_query() raises SiteAPIError if either
  self._base_url or self._script_path is missing. Removed some pointless
  methods and renamed one; added domain().
* Functions: _get_site_object_from_dict() is cleaner, adds our cookiejar
  to Site instances using _get_cookiejar() to load a LWPCookieJar() object
  from the ".cookies" file in our project root. The same cookiejar is
  returned for every site, enabling crosswiki login, via a global variable.
* User: Renamed some methods.
* .gitignore: Added .cookies file.
---
 .gitignore               |   3 +
 wiki/tools/exceptions.py |   8 +-
 wiki/tools/functions.py  |  94 ++++++++++-------
 wiki/tools/site.py       | 260 ++++++++++++++++++++++++++++++-----------------
 wiki/tools/user.py       |   6 +-
 5 files changed, 235 insertions(+), 136 deletions(-)

diff --git a/.gitignore b/.gitignore
index bc67eea..1884197 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,9 @@
 # Ignore bot-specific config file:
 config.json
 
+# Ignore cookies file:
+.cookies
+
 # Ignore OS X's crud:
 *.DS_Store
 
diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index 0620262..d28cac2 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -15,12 +15,18 @@ class SiteNotFoundError(WikiToolsetError):
 
 class SiteAPIError(WikiToolsetError):
     """We couldn't connect to a site's API, perhaps because the server doesn't
-    exist, our URL is wrong, or they're having temporary problems."""
+    exist, our URL is wrong or incomplete, or they're having temporary
+    problems."""
 
 class LoginError(WikiToolsetError):
     """An error occured while trying to login. Perhaps the username/password is
     incorrect."""
 
+class PermissionsError(WikiToolsetError):
+    """We tried to do something we don't have permission to, like a non-admin
+    trying to delete a page, or trying to edit a page when no login information
+    was provided."""
+
 class NamespaceNotFoundError(WikiToolsetError):
     """A requested namespace name or namespace ID does not exist."""
 
diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index 2618a57..ff69c19 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -10,7 +10,11 @@ There's no need to import this module explicitly. All functions here are
 automatically available from wiki.tools.
 """
 
+from cookielib import LWPCookieJar, LoadError
+import errno
 from getpass import getpass
+from os import chmod, path
+import stat
 
 from core import config
 from wiki.tools.exceptions import SiteNotFoundError
@@ -18,6 +22,8 @@ from wiki.tools.site import Site
 
 __all__ = ["get_site"]
 
+_cookiejar = None
+
 def _load_config():
     """Called by a config-requiring function, such as get_site(), when config
     has not been loaded. This will usually happen only if we're running code
@@ -31,50 +37,60 @@ def _load_config():
     else:
         config.parse_config(None)
 
+def _get_cookiejar():
+    """Returns a LWPCookieJar object loaded from our .cookies file. The same
+    one is returned every time.
+
+    The .cookies file is located in the project root, same directory as
+    config.json and earwigbot.py. If it doesn't exist, we will create the file
+    and set it to be readable and writeable only by us. If it exists but the
+    information inside is bogus, we will ignore it.
+
+    This is normally called by _get_site_object_from_dict() (in turn called by
+    get_site()), and the cookiejar is passed to our Site's constructor, used
+    when it makes API queries. This way, we can easily preserve cookies between
+    sites (e.g., for CentralAuth), making logins easier.
+    """
+    global _cookiejar
+    if _cookiejar is not None:
+        return _cookiejar
+
+    cookie_file = path.join(config.root_dir, ".cookies")
+    _cookiejar = LWPCookieJar(cookie_file)
+
+    try:
+        _cookiejar.load()
+    except LoadError:
+        # file contains bad data, so ignore it completely
+        pass
+    except IOError as e:
+        if e.errno == errno.ENOENT:  # "No such file or directory"
+            # create the file and restrict reading/writing only to the owner,
+            # so others can't peak at our cookies
+            open(cookie_file, "w").close()
+            chmod(cookie_file, stat.S_IRUSR|stat.S_IWUSR)
+        else:
+            raise
+
+    return _cookiejar
+
 def _get_site_object_from_dict(name, d):
     """Return a Site object based on the contents of a dict, probably acquired
     through our config file, and a separate name.
     """
-    try:
-        project = d["project"]
-    except KeyError:
-        project = None
-    try:
-        lang = d["lang"]
-    except KeyError:
-        lang = None
-    try:
-        base_url = d["baseURL"]
-    except KeyError:
-        base_url = None
-    try:
-        article_path = d["articlePath"]
-    except KeyError:
-        article_path = None
-    try:
-        script_path = d["scriptPath"]
-    except KeyError:
-        script_path = None
-    try:
-        sql_server = d["sqlServer"]
-    except KeyError:
-        sql_server = None
-    try:
-        sql_db = d["sqlDB"]
-    except KeyError:
-        sql_db = None
-    try:
-        namespaces = d["namespaces"]
-    except KeyError:
-        namespaces = None
-    try:
-        login = (config.wiki["username"], config.wiki["password"])
-    except KeyError:
-        login = (None, None)
+    project = d.get("project")
+    lang = d.get("lang")
+    base_url = d.get("baseURL")
+    article_path = d.get("articlePath")
+    script_path = d.get("scriptPath")
+    sql = (d.get("sqlServer"), d.get("sqlDB"))
+    namespaces = d.get("namespaces")
+    login = (config.wiki.get("username"), config.wiki.get("password"))
+    cookiejar = _get_cookiejar()
 
     return Site(name=name, project=project, lang=lang, base_url=base_url,
-        article_path=article_path, script_path=script_path,
-        sql=(sql_server, sql_db), namespaces=namespaces, login=login)
+        article_path=article_path, script_path=script_path, sql=sql,
+        namespaces=namespaces, login=login, cookiejar=cookiejar)
 
 def get_site(name=None, project=None, lang=None):
     """Returns a Site instance based on information from our config file.
@@ -112,7 +128,7 @@ def get_site(name=None, project=None, lang=None):
     # no args given, so return our default site (project is None implies lang
     # is None, so we don't need to add that in)
     if name is None and project is None:
-        try:  # ...so use the default site
+        try:
             default = config.wiki["defaultSite"]
         except KeyError:
             e = "Default site is not specified in config."
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index f32e3c2..982cd30 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -2,6 +2,7 @@
 
 from cookielib import CookieJar
 from json import loads
+from re import escape as re_escape, match as re_match
 from urllib import unquote_plus, urlencode
 from urllib2 import build_opener, HTTPCookieProcessor, URLError
 from urlparse import urlparse
@@ -19,12 +20,12 @@ class Site(object):
 
     def __init__(self, name=None, project=None, lang=None, base_url=None,
             article_path=None, script_path=None, sql=(None, None),
-            namespaces=None, login=(None, None)):
+            namespaces=None, login=(None, None), cookiejar=None):
         """
         Docstring needed
         """
         # attributes referring to site information, filled in by an API query
-        # if they are missing (and an API url is available)
+        # if they are missing (and an API url can be determined)
         self._name = name
         self._project = project
         self._lang = lang
@@ -35,61 +36,22 @@ class Site(object):
         self._namespaces = namespaces
 
         # set up cookiejar and URL opener for making API queries
-        self._cookiejar = CookieJar()
+        if cookiejar is not None:
+            self._cookiejar = cookiejar
+        else:
+            self._cookiejar = CookieJar()
         self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
         self._opener.addheaders = [('User-agent', USER_AGENT)]
 
-        # use a username and password to login if they were provided
-        if login[0] is not None and login[1] is not None:
-            self._login(login[0], login[1])
-
         # get all of the above attributes that were not specified as arguments
         self._load_attributes()
 
-    def _login(self, name, password, token="", attempt=0):
-        """
-        Docstring needed
-        """
-        params = {"action": "login", "lgname": name, "lgpassword": password,
-                  "lgtoken": token}
-        result = self.api_query(params)
-        res = result["login"]["result"]
-
-        if res == "Success":
-            return
-        elif res == "NeedToken" and attempt == 0:
-            token = result["login"]["token"]
-            return self._login(name, password, token, attempt=1)
-        else:
-            if res == "Illegal":
-                e = "The provided username is illegal."
-            elif res == "NotExists":
-                e = "The provided username does not exist."
-            elif res == "EmptyPass":
-                e = "No password was given."
-            elif res == "WrongPass" or res == "WrongPluginPass":
-                e = "The given password is incorrect."
-            else:
-                e = "Couldn't login; server says '{0}'.".format(res)
-            raise LoginError(e)
-
-    def _get_logged_in_user(self):
-        """
-        Docstring needed
-        """
-        # first try to get username from the cookie jar to avoid an
-        # unnecessary API query
-        cookie_name = ''.join((self._name, "UserName"))
-        cookie_domain = urlparse(self._base_url).netloc
-        for cookie in self._cookiejar:
-            if cookie.name == cookie_name and cookie.domain == cookie_domain:
-                return unquote_plus(cookie.value)
-        
-        # if we end up here, we're probably an anon and thus an API query
-        # will be required to get our username
-        params = {"action": "query", "meta": "userinfo"}
-        result = self.api_query(params)
-        return result["query"]["userinfo"]["name"]
+        # if we have a name/pass and the API says we're not logged in, log in
+        self._login_info = name, password = login
+        if name is not None and password is not None:
+            logged_in_as = self._get_username_from_cookies()
+            if logged_in_as is None or name != logged_in_as:
+                self._login(login)
 
     def _load_attributes(self, force=False):
         """
@@ -101,7 +63,7 @@ class Site(object):
             self._article_path, self._script_path]
 
         params = {"action": "query", "meta": "siteinfo"}
-        
+
         if self._namespaces is None or force:
             params["siprop"] = "general|namespaces|namespacealiases"
             result = self.api_query(params)
@@ -113,24 +75,12 @@ class Site(object):
             result = self.api_query(params)
 
         res = result["query"]["general"]
-
-        if self._name is None or force:
-            self._name = res["wikiid"]
-
-        if self._project is None or force:
-            self._project = res["sitename"].lower()
-
-        if self._lang is None or force:
-            self._lang = res["lang"]
-
-        if self._base_url is None or force:
-            self._base_url = res["server"]
-
-        if self._article_path is None or force:
-            self._article_path = res["articlepath"]
-
-        if self._script_path is None or force:
-            self._script_path = res["scriptpath"]
+        self._name = res["wikiid"]
+        self._project = res["sitename"].lower()
+        self._lang = res["lang"]
+        self._base_url = res["server"]
+        self._article_path = res["articlepath"]
+        self._script_path = res["scriptpath"]
 
     def _load_namespaces(self, result):
         """
@@ -156,20 +106,162 @@ class Site(object):
             alias = namespace["*"]
             self._namespaces[ns_id].append(alias)
 
+    def _get_cookie(self, name, domain):
+        """Return the cookie `name` in `domain`, unless it is expired. Return
+        None if no cookie was found.
+        """
+        for cookie in self._cookiejar:
+            if cookie.name == name and cookie.domain == domain:
+                if cookie.is_expired():
+                    break
+                return cookie
+        return None
+
+    def _get_username_from_cookies(self):
+        """Try to return our username based solely on cookies.
+
+        First, we'll look for a cookie named self._name + "Token", like
+        "enwikiToken". If it exists and isn't expired, we'll assume it's valid
+        and try to return the value of the cookie self._name + "UserName" (like
+        "enwikiUserName"). This should work fine on wikis without single-user
+        login.
+
+        If `enwikiToken` doesn't exist, we'll try to find a cookie named
+        `centralauth_Token`. If this exists and is not expired, we'll try to
+        return the value of `centralauth_User`.
+
+        If we didn't get any matches, we'll return None. Our goal here isn't to
+        return the most likely username, or what we *want* our username to be
+        (for that, we'd do self._login_info[0]), but rather to get our current
+        username without an unnecessary ?action=query&meta=userinfo API query. 
+        """
+        domain = self.domain()
+        name = ''.join((self._name, "Token"))
+        cookie = self._get_cookie(name, domain)
+
+        if cookie is not None:
+            name = ''.join((self._name, "UserName"))
+            user_name = self._get_cookie(name, domain)
+            if user_name is not None:
+                return user_name.value
+
+        name = "centralauth_Token"
+        for cookie in self._cookiejar:            
+            if cookie.domain_initial_dot is False or cookie.is_expired():
+                continue
+            if cookie.name != name:
+                continue
+            # build a regex that will match domains this cookie affects
+            search = ''.join(("(.*?)", re_escape(cookie.domain)))
+            if re_match(search, domain):  # test it against our site
+                user_name = self._get_cookie("centralauth_User", cookie.domain)
+                if user_name is not None:
+                    return user_name.value
+
+        return None
+
+    def _get_username_from_api(self):
+        """Do a simple API query to get our username and return it.
+        
+        This is a reliable way to make sure we are actually logged in, because
+        it doesn't deal with annoying cookie logic, but it results in an API
+        query that is unnecessary in many cases.
+        
+        Called by _get_username() (in turn called by get_user() with no
+        username argument) when cookie lookup fails, probably indicating that
+        we are logged out.
+        """
+        params = {"action": "query", "meta": "userinfo"}
+        result = self.api_query(params)
+        return result["query"]["userinfo"]["name"]
+
+    def _get_username(self):
+        """Return the name of the current user, whether logged in or not.
+
+        First, we'll try to deduce it solely from cookies, to avoid an
+        unnecessary API query. For the cookie-detection method, see
+        _get_username_from_cookies()'s docs.
+
+        If our username isn't in cookies, then we're probably not logged in, or
+        something fishy is going on (like forced logout). In this case, do a
+        single API query for our username (or IP address) and return that.
+        """
+        name = self._get_username_from_cookies()
+        if name is not None:
+            return name
+        return self._get_username_from_api()
+
+    def _save_cookiejar(self):
+        """Try to save our cookiejar after doing a (normal) login or logout.
+
+        Calls the standard .save() method with no filename. Don't fret if our
+        cookiejar doesn't support saving (CookieJar raises AttributeError,
+        FileCookieJar raises NotImplementedError) or no default filename was
+        given (LWPCookieJar and MozillaCookieJar raise ValueError).
+        """
+        try:
+            self._cookiejar.save()
+        except (AttributeError, NotImplementedError, ValueError):
+            pass
+
+    def _login(self, login, token=None, attempt=0):
+        """
+        Docstring needed
+        """
+        name, password = login
+        params = {"action": "login", "lgname": name, "lgpassword": password}
+        if token is not None:
+            params["lgtoken"] = token
+        result = self.api_query(params)
+        res = result["login"]["result"]
+
+        if res == "Success":
+            self._save_cookiejar()
+        elif res == "NeedToken" and attempt == 0:
+            token = result["login"]["token"]
+            return self._login(login, token, attempt=1)
+        else:
+            if res == "Illegal":
+                e = "The provided username is illegal."
+            elif res == "NotExists":
+                e = "The provided username does not exist."
+            elif res == "EmptyPass":
+                e = "No password was given."
+            elif res == "WrongPass" or res == "WrongPluginPass":
+                e = "The given password is incorrect."
+            else:
+                e = "Couldn't login; server says '{0}'.".format(res)
+            raise LoginError(e)
+
+    def _logout(self):
+        """
+        Docstring needed
+        """
+        params = {"action": "logout"}
+        self.api_query(params)
+        self._cookiejar.clear()
+        self._save_cookiejar()
+
     def api_query(self, params):
         """
         Docstring needed
         """
+        if self._base_url is None or self._script_path is None:
+            e = "Tried to do an API query, but no API URL is known."
+            raise SiteAPIError(e)
+
         url = ''.join((self._base_url, self._script_path, "/api.php"))
         params["format"] = "json"  # this is the only format we understand
         data = urlencode(params)
 
+        print url, data  # debug code
+
         try:
             response = self._opener.open(url, data)
         except URLError as error:
             if hasattr(error, "reason"):
-                e = "API query at {0} failed because {1}.".format(error.geturl,
-                                                                  error.reason)
+                e = "API query at {0} failed because {1}."
+                e = e.format(error.geturl, error.reason)
             elif hasattr(error, "code"):
                 e = "API query at {0} failed; got an error code of {1}."
                 e = e.format(error.geturl, error.code)
@@ -198,29 +290,11 @@ class Site(object):
         """
         return self._lang
 
-    def base_url(self):
-        """
-        Docstring needed
-        """
-        return self._base_url
-
-    def article_path(self):
-        """
-        Docstring needed
-        """
-        return self._article_path
-
-    def script_path(self):
-        """
-        Docstring needed
-        """
-        return self._script_path
-
-    def namespaces(self):
+    def domain(self):
         """
         Docstring needed
         """
-        return self._namespaces
+        return urlparse(self._base_url).netloc
 
     def namespace_id_to_name(self, ns_id, all=False):
         """
@@ -272,5 +346,5 @@ class Site(object):
         Docstring needed
         """
         if username is None:
-            username = self._get_logged_in_user()
+            username = self._get_username()
         return User(self, username)
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index 98f9670..be71515 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -132,7 +132,7 @@ class User(object):
         """
         return self._get_attribute("_registration", force)
 
-    def is_emailable(self, force=False):
+    def emailable(self, force=False):
         """
         Docstring needed
         """
@@ -144,7 +144,7 @@ class User(object):
         """
         return self._get_attribute("_gender", force)
 
-    def userpage(self):
+    def get_userpage(self):
         """
         Docstring needed
         """
@@ -152,7 +152,7 @@ class User(object):
         pagename = ''.join((prefix, ":", self._name))
         return Page(self.site, pagename)
 
-    def talkpage(self):
+    def get_talkpage(self):
         """
         Docstring needed
         """

From 77c541a5133af8fd3b4d77383cb38ee61dfb0b0f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 31 Jul 2011 23:22:41 -0400
Subject: [PATCH 14/19] Accept gzipped data and decompress it in api_query().

---
 wiki/tools/site.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index 982cd30..db3b7d2 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -1,8 +1,10 @@
 # -*- coding: utf-8  -*-
 
 from cookielib import CookieJar
+from gzip import GzipFile
 from json import loads
 from re import escape as re_escape, match as re_match
+from StringIO import StringIO
 from urllib import unquote_plus, urlencode
 from urllib2 import build_opener, HTTPCookieProcessor, URLError
 from urlparse import urlparse
@@ -41,7 +43,8 @@ class Site(object):
         else:
             self._cookiejar = CookieJar()
         self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
-        self._opener.addheaders = [('User-agent', USER_AGENT)]
+        self._opener.addheaders = [("User-Agent", USER_AGENT),
+                                   ("Accept-Encoding", "gzip")]
 
         # get all of the above attributes that were not specified as arguments
         self._load_attributes()
@@ -270,6 +273,10 @@ class Site(object):
             raise SiteAPIError(e)
         else:
             result = response.read()
+            if response.headers.get("Content-Encoding") == "gzip":
+                stream = StringIO(result)
+                gzipper = GzipFile(fileobj=stream)      
+                result = gzipper.read()
             return loads(result)  # parse as a JSON object
 
     def name(self):

From 434863dcd1478e59ff475414e032289397057a6d Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 2 Aug 2011 04:45:19 -0400
Subject: [PATCH 15/19] Docstrings for everything in Site; some cleanup in
 Site/Functions.

---
 wiki/tools/functions.py |  12 +++-
 wiki/tools/site.py      | 156 +++++++++++++++++++++++++++++++++++-------------
 2 files changed, 123 insertions(+), 45 deletions(-)

diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index ff69c19..bc7b187 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -89,8 +89,8 @@ def _get_site_object_from_dict(name, d):
     cookiejar = _get_cookiejar()
 
     return Site(name=name, project=project, lang=lang, base_url=base_url,
-        article_path=article_path, script_path=script_path, sql=sql,
-        namespaces=namespaces, login=login, cookiejar=cookiejar)
+                article_path=article_path, script_path=script_path, sql=sql,
+                namespaces=namespaces, login=login, cookiejar=cookiejar)
 
 def get_site(name=None, project=None, lang=None):
     """Returns a Site instance based on information from our config file.
@@ -163,3 +163,11 @@ def get_site(name=None, project=None, lang=None):
             return _get_site_object_from_dict(sitename, site)
     e = "Site '{0}:{1}' not found in config.".format(project, lang)
     raise SiteNotFoundError(e)
+
+def add_site():
+    """STUB: config editing is required first"""
+    pass
+
+def del_site():
+    """STUB: config editing is required first"""
+    pass
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index db3b7d2..65ed9b5 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -18,13 +18,32 @@ from wiki.tools.user import User
 class Site(object):
     """
     EarwigBot's Wiki Toolset: Site Class
+
+    Represents a Site, with support for API queries and returning Pages, Users,
+    and Categories. The constructor takes a bunch of arguments and you probably
+    won't need to call it directly, rather tools.get_site() for returning Site
+    instances, tools.add_site() for adding new ones to config, and
+    tools.del_site() for removing old ones from config, should suffice.
     """
 
     def __init__(self, name=None, project=None, lang=None, base_url=None,
-            article_path=None, script_path=None, sql=(None, None),
-            namespaces=None, login=(None, None), cookiejar=None):
-        """
-        Docstring needed
+                 article_path=None, script_path=None, sql=(None, None),
+                 namespaces=None, login=(None, None), cookiejar=None):
+        """Constructor for new Site instances.
+
+        This probably isn't necessary to call yourself unless you're building a
+        Site that's not in your config and you don't want to add it - normally
+        all you need is tools.get_site(name), which creates the Site for you
+        based on your config file. We accept a bunch of kwargs, but the only
+        ones you really "need" are `base_url` and `script_path` - this is
+        enough to figure out an API url. `login`, a tuple of
+        (username, password), is highly recommended. `cookiejar` will be used
+        to store cookies, and we'll use a normal CookieJar if none is given.
+
+        First, we'll store the given arguments as attributes, then set up our
+        URL opener. We'll load any of the attributes that weren't given from
+        the API, and then log in if a username/pass was given and we aren't
+        already logged in.
         """
         # attributes referring to site information, filled in by an API query
         # if they are missing (and an API url can be determined)
@@ -57,8 +76,14 @@ class Site(object):
                 self._login(login)
 
     def _load_attributes(self, force=False):
-        """
-        Docstring needed
+        """Load data about our Site from the API.
+
+        This function is called by __init__() when one of the site attributes
+        was not given as a keyword argument. We'll do an API query to get the
+        missing data, but only if there actually *is* missing data.
+
+        Additionally, you can call this with `force=True` to forcibly reload
+        all attributes.
         """
         # all attributes to be loaded, except _namespaces, which is a special
         # case because it requires additional params in the API query
@@ -86,8 +111,10 @@ class Site(object):
         self._script_path = res["scriptpath"]
 
     def _load_namespaces(self, result):
-        """
-        Docstring needed
+        """Fill self._namespaces with a dict of namespace IDs and names.
+
+        Called by _load_attributes() with API data as `result` when
+        self._namespaces was not given as an kwarg to __init__().
         """
         self._namespaces = {}
 
@@ -110,15 +137,12 @@ class Site(object):
             self._namespaces[ns_id].append(alias)
 
     def _get_cookie(self, name, domain):
-        """Return the cookie `name` in `domain`, unless it is expired. Return
-        None if no cookie was found.
-        """
+        """Return the named cookie unless it is expired or doesn't exist."""
         for cookie in self._cookiejar:
             if cookie.name == name and cookie.domain == domain:
                 if cookie.is_expired():
                     break
                 return cookie
-        return None
 
     def _get_username_from_cookies(self):
         """Try to return our username based solely on cookies.
@@ -161,14 +185,12 @@ class Site(object):
                 if user_name is not None:
                     return user_name.value
 
-        return None
-
     def _get_username_from_api(self):
         """Do a simple API query to get our username and return it.
         
         This is a reliable way to make sure we are actually logged in, because
         it doesn't deal with annoying cookie logic, but it results in an API
-        query that is unnecessary in many cases.
+        query that is unnecessary in some cases.
         
         Called by _get_username() (in turn called by get_user() with no
         username argument) when cookie lookup fails, probably indicating that
@@ -208,8 +230,24 @@ class Site(object):
             pass
 
     def _login(self, login, token=None, attempt=0):
-        """
-        Docstring needed
+        """Safely login through the API.
+
+        Normally, this is called by __init__() if a username and password have
+        been provided and no valid login cookies were found. The only other
+        time it needs to be called is when those cookies expire, which is done
+        automatically by api_query() if a query fails.
+
+        Recent versions of MediaWiki's API have fixed a CSRF vulnerability,
+        requiring login to be done in two separate requests. If the response
+        from from our initial request is "NeedToken", we'll do another one with
+        the token. If login is successful, we'll try to save our cookiejar.
+
+        Raises LoginError on login errors (duh), like bad passwords and
+        nonexistent usernames.
+
+        `login` is a (username, password) tuple. `token` is the token returned
+        from our first request, and `attempt` is to prevent getting stuck in a
+        loop if MediaWiki isn't acting right.
         """
         name, password = login
         params = {"action": "login", "lgname": name, "lgpassword": password}
@@ -237,8 +275,11 @@ class Site(object):
             raise LoginError(e)
 
     def _logout(self):
-        """
-        Docstring needed
+        """Safely logout through the API.
+
+        We'll do a simple API request (api.php?action=logout), clear our
+        cookiejar (which probably contains now-invalidated cookies) and try to
+        save it, if it supports that sort of thing.
         """
         params = {"action": "logout"}
         self.api_query(params)
@@ -246,8 +287,23 @@ class Site(object):
         self._save_cookiejar()
 
     def api_query(self, params):
-        """
-        Docstring needed
+        """Do an API query with `params` as a dict of parameters.
+
+        This will first attempt to construct an API url from self._base_url and
+        self._script_path. We need both of these, or else we'll raise
+        SiteAPIError.
+
+        We'll encode the given params, adding format=json along the way, and
+        make the request through self._opener, which has built-in cookie
+        support via self._cookiejar, a User-Agent
+        (wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip".
+        Assuming everything went well, we'll gunzip the data (if compressed),
+        load it as a JSON object, and return it.
+
+        If our request failed, we'll raise SiteAPIError with details.
+
+        There's helpful MediaWiki API documentation at
+        <http://www.mediawiki.org/wiki/API>.
         """
         if self._base_url is None or self._script_path is None:
             e = "Tried to do an API query, but no API URL is known."
@@ -280,32 +336,32 @@ class Site(object):
             return loads(result)  # parse as a JSON object
 
     def name(self):
-        """
-        Docstring needed
-        """
+        """Returns the Site's name (or "wikiid" in the API), like "enwiki"."""
         return self._name
 
     def project(self):
-        """
-        Docstring needed
-        """
+        """Returns the Site's project name in lowercase, like "wikipedia"."""
         return self._project
 
     def lang(self):
-        """
-        Docstring needed
-        """
+        """Returns the Site's language, like "en" or "es"."""
         return self._lang
 
     def domain(self):
-        """
-        Docstring needed
-        """
+        """Returns the Site's web domain, like "en.wikipedia.org"."""
         return urlparse(self._base_url).netloc
 
     def namespace_id_to_name(self, ns_id, all=False):
-        """
-        Docstring needed
+        """Given a namespace ID, returns associated namespace names.
+
+        If all is False (default), we'll return the first name in the list,
+        which is usually the localized version. Otherwise, we'll return the
+        entire list, which includes the canonical name.
+
+        For example, returns u"Wikipedia" if ns_id=4 and all=False on enwiki;
+        returns [u"Wikipedia", u"Project"] if ns_id=4 and all=True.
+
+        Raises NamespaceNotFoundError if the ID is not found.
         """
         try:
             if all:
@@ -317,8 +373,12 @@ class Site(object):
             raise NamespaceNotFoundError(e)
 
     def namespace_name_to_id(self, name):
-        """
-        Docstring needed
+        """Given a namespace name, returns the associated ID.
+
+        Like namespace_id_to_name(), but reversed. Case is ignored, because
+        namespaces are assumed to be case-insensitive.
+
+        Raises NamespaceNotFoundError if the name is not found.
         """
         lname = name.lower()
         for ns_id, names in self._namespaces.items():
@@ -330,8 +390,14 @@ class Site(object):
         raise NamespaceNotFoundError(e)
 
     def get_page(self, pagename):
-        """
-        Docstring needed
+        """Returns a Page object for the given pagename.
+
+        Will return a Category object instead if the given pagename is in the
+        category namespace. As Category is a subclass of Page, this should not
+        cause problems.
+
+        Note that this doesn't do any checks for existence or
+        redirect-following - Page's methods provide that.
         """
         prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
         prefix = pagename.split(":", 1)[0]
@@ -341,16 +407,20 @@ class Site(object):
         return Page(self, pagename)
 
     def get_category(self, catname):
-        """
-        Docstring needed
+        """Returns a Category object for the given category name.
+
+        `catname` should be given *without* a namespace prefix. This method is
+        really just shorthand for get_page("Category:" + catname).
         """
         prefix = self.namespace_id_to_name(NS_CATEGORY)
         pagename = "{0}:{1}".format(prefix, catname)
         return Category(self, pagename)
 
     def get_user(self, username=None):
-        """
-        Docstring needed
+        """Returns a User object for the given username.
+
+        If `username` is left as None, then a User object representing the
+        currently logged-in (or anonymous!) user is returned.
         """
         if username is None:
             username = self._get_username()

From a515a004c87526960e0aa297bf5a41a0224de771 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 2 Aug 2011 15:50:59 -0400
Subject: [PATCH 16/19] Docstrings for everything in User, cleaned stuff up a
 bit.

---
 wiki/tools/functions.py |  14 +++-
 wiki/tools/user.py      | 189 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 138 insertions(+), 65 deletions(-)

diff --git a/wiki/tools/functions.py b/wiki/tools/functions.py
index bc7b187..ab18609 100644
--- a/wiki/tools/functions.py
+++ b/wiki/tools/functions.py
@@ -165,9 +165,17 @@ def get_site(name=None, project=None, lang=None):
     raise SiteNotFoundError(e)
 
 def add_site():
-    """STUB: config editing is required first"""
+    """STUB: config editing is required first.
+
+    Returns True if the site was added successfully or False if the site was
+    already in our config. Raises ConfigError if saving the updated file failed
+    for some reason."""
     pass
 
-def del_site():
-    """STUB: config editing is required first"""
+def del_site(name):
+    """STUB: config editing is required first.
+
+    Returns True if the site was removed successfully or False if the site was
+    not in our config originally. Raises ConfigError if saving the updated file
+    failed for some reason."""
     pass
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index be71515..16919e1 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -1,5 +1,7 @@
 # -*- coding: utf-8  -*-
 
+from time import strptime
+
 from wiki.tools.constants import *
 from wiki.tools.exceptions import UserNotFoundError
 from wiki.tools.page import Page
@@ -7,34 +9,53 @@ from wiki.tools.page import Page
 class User(object):
     """
     EarwigBot's Wiki Toolset: User Class
+
+    Represents a User on a given Site. Has methods for getting a bunch of 
+    information about the user, such as editcount and user rights, methods for
+    returning the user's userpage and talkpage, etc.
+
+    Public methods:
+    name         -- returns the user's username
+    exists       -- returns True if the user exists, False if they do not
+    userid       -- returns an integer ID representing the user
+    blockinfo    -- returns information about a current block on the user
+    groups       -- returns a list of the user's groups
+    rights       -- returns a list of the user's rights
+    editcount    -- returns the number of edits made by the user
+    registration -- returns the time the user registered as a time.struct_time
+    emailable    -- returns True if you can email the user, False if you cannot
+    gender       -- returns the user's gender ("male", "female", or "unknown")
+    get_userpage -- returns a Page object representing the user's userpage
+    get_talkpage -- returns a Page object representing the user's talkpage
     """
 
     def __init__(self, site, name):
-        """
-        Docstring needed
-        """
-        # Site instance, for doing API queries, etc
-        self.site = site
+        """Constructor for new User instances.
 
-        # Username
-        self._name = name
+        Takes two arguments, a Site object (necessary for doing API queries),
+        and the name of the user, preferably without "User:" in front, although
+        this prefix will be automatically removed by the API if given.
 
-        # Attributes filled in by an API query
-        self._exists = None
-        self._userid = None
-        self._blockinfo = None
-        self._groups = None
-        self._rights = None
-        self._editcount = None
-        self._registration = None
-        self._emailable = None
-        self._gender = None
+        You can also use site.get_user() instead, which returns a User object,
+        and is preferred.
 
-    def _get_attribute(self, attr, force):
+        We won't do any API queries yet for basic information about the user -
+        save that for when the information is requested.
         """
-        Docstring needed
+        self._site = site
+        self._name = name
+
+    def _get_attribute(self, attr, force):
+        """Internally used to get an attribute by name.
+
+        We'll call _load_attributes() to get this (and all other attributes)
+        from the API if it is not already defined. If `force` is True, we'll
+        re-load them even if they've already been loaded.
+
+        Raises UserNotFoundError if a nonexistant user prevents us from
+        returning a certain attribute.
         """
-        if self._exists is None or force:
+        if not hasattr(self, attr) or force:
             self._load_attributes()
         if self._exists is False:
             e = "User '{0}' does not exist.".format(self._name)
@@ -42,30 +63,42 @@ class User(object):
         return getattr(self, attr)
 
     def _load_attributes(self):
-        """
-        Docstring needed
+        """Internally used to load all attributes from the API.
+
+        Normally, this is called by _get_attribute() when a requested attribute
+        is not defined. This defines it.
         """
         params = {"action": "query", "list": "users", "ususers": self._name,
         "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"}
-        result = self.site.api_query(params)
+        result = self._site.api_query(params)
+        res = result["query"]["users"][0]
 
         # normalize our username in case it was entered oddly
-        self._name = result["query"]["users"][0]["name"]
+        self._name = res["name"]
 
         try:
-            self._userid = result["query"]["users"][0]["userid"]
+            self._userid = res["userid"]
         except KeyError:  # userid is missing, so user does not exist
             self._exists = False
             return
 
         self._exists = True
-        res = result['query']['users'][0]
+
+        try:
+            self._blockinfo = {
+                "by": res["blockedby"],
+                "reason": res["blockreason"],
+                "expiry": res["blockexpiry"]
+            }
+        except KeyError:
+            self._blockinfo = False
 
         self._groups = res["groups"]
-        self._rights = res["rights"]
+        self._rights = res["rights"].values()
         self._editcount = res["editcount"]
-        self._registration = res["registration"]
-        self._gender = res["gender"]
+
+        reg = res["registration"]
+        self._registration = strptime(reg, "%Y-%m-%dT%H:%M:%SZ")
 
         try:
             res["emailable"]
@@ -74,88 +107,120 @@ class User(object):
         else:
             self._emailable = True
 
-        try:
-            self._blockinfo = {"by": res["blockedby"],
-                "reason": res["blockreason"], "expiry": res["blockexpiry"]}
-        except KeyError:
-            self._blockinfo = False
+        self._gender = res["gender"]
 
     def name(self, force=False):
-        """
-        Docstring needed
+        """Returns the user's name.
+
+        If `force` is True, we will load the name from the API and return that.
+        This could potentially return a "normalized" version of the name - for
+        example, without a "User:" prefix or without underscores. Unlike other
+        attribute getters, this will never make an API query without `force`.
+
+        Note that if another attribute getter, like exists(), has already been
+        called, then the username has already been normalized.
         """
         if force:
             self._load_attributes()
         return self._name
 
     def exists(self, force=False):
-        """
-        Docstring needed
+        """Returns True if the user exists, or False if they do not.
+
+        Makes an API query if `force` is True or if we haven't made one
+        already.
         """
         if self._exists is None or force:
             self._load_attributes()
         return self._exists
 
     def userid(self, force=False):
-        """
-        Docstring needed
+        """Returns an integer ID used by MediaWiki to represent the user.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_userid", force)
 
     def blockinfo(self, force=False):
-        """
-        Docstring needed
+        """Returns information about a current block on the user.
+
+        If the user is not blocked, returns False. If they are, returns a dict
+        with three keys: "by" is the blocker's username, "reason" is the reason
+        why they were blocked, and "expiry" is when the block expires.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_blockinfo", force)
 
     def groups(self, force=False):
-        """
-        Docstring needed
+        """Returns a list of groups this user is in, including "*".
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_groups", force)
 
     def rights(self, force=False):
-        """
-        Docstring needed
+        """Returns a list of this user's rights.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_rights", force)
 
     def editcount(self, force=False):
-        """
-        Docstring needed
+        """Returns the number of edits made by the user.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_editcount", force)
 
     def registration(self, force=False):
-        """
-        Docstring needed
+        """Returns the time the user registered as a time.struct_time object.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_registration", force)
 
     def emailable(self, force=False):
-        """
-        Docstring needed
+        """Returns True if the user can be emailed, or False if they cannot.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_emailable", force)
 
     def gender(self, force=False):
-        """
-        Docstring needed
+        """Returns the user's gender.
+
+        Can return either "male", "female", or "unknown", if they did not
+        specify it.
+
+        Raises UserNotFoundError if the user does not exist. Makes an API query
+        if `force` is True or if we haven't made one already.
         """
         return self._get_attribute("_gender", force)
 
     def get_userpage(self):
+        """Returns a Page object representing the user's userpage.
+        
+        No checks are made to see if it exists or not. Proper site namespace
+        conventions are followed.
         """
-        Docstring needed
-        """
-        prefix = self.site.namespace_id_to_name(NS_USER)
+        prefix = self._site.namespace_id_to_name(NS_USER)
         pagename = ''.join((prefix, ":", self._name))
-        return Page(self.site, pagename)
+        return Page(self._site, pagename)
 
     def get_talkpage(self):
+        """Returns a Page object representing the user's talkpage.
+        
+        No checks are made to see if it exists or not. Proper site namespace
+        conventions are followed.
         """
-        Docstring needed
-        """
-        prefix = self.site.namespace_id_to_name(NS_USER_TALK)
+        prefix = self._site.namespace_id_to_name(NS_USER_TALK)
         pagename = ''.join((prefix, ":", self._name))
-        return Page(self.site, pagename)
+        return Page(self._site, pagename)

From 4bada57a9bfa21aa62f0ccbe0befc57061ea8d50 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 4 Aug 2011 17:31:58 -0400
Subject: [PATCH 17/19] Some quick updates to wikitools before I commit Page.

* Site: api_query() -> _api_query(); api_query() acts as a wrapper for _api_query(), accepting API params as **kwargs.
* Various cleanup throughout and minor fixes.
---
 wiki/tools/category.py   |   2 +-
 wiki/tools/exceptions.py |  11 +++-
 wiki/tools/site.py       | 131 +++++++++++++++++++++++++----------------------
 wiki/tools/user.py       |  10 ++--
 4 files changed, 85 insertions(+), 69 deletions(-)

diff --git a/wiki/tools/category.py b/wiki/tools/category.py
index f6e301f..588956a 100644
--- a/wiki/tools/category.py
+++ b/wiki/tools/category.py
@@ -13,6 +13,6 @@ class Category(Page):
         """
         params = {"action": "query", "list": "categorymembers",
             "cmlimit": limit, "cmtitle": self.title}
-        result = self.site.api_query(params)
+        result = self._site._api_query(params)
         members = result['query']['categorymembers']
         return [member["title"] for member in members]
diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index d28cac2..d16a1b9 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -30,5 +30,14 @@ class PermissionsError(WikiToolsetError):
 class NamespaceNotFoundError(WikiToolsetError):
     """A requested namespace name or namespace ID does not exist."""
 
+class PageNotFoundError(WikiToolsetError):
+    """Attempting to get certain information about a page that does not
+    exist."""
+
+class InvalidPageError(WikiToolsetError):
+    """Attempting to get certain information about a page whose title is
+    invalid."""
+
 class UserNotFoundError(WikiToolsetError):
-    """Attempting to get information about a user that does not exist."""
+    """Attempting to get certain information about a user that does not
+    exist."""
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index 65ed9b5..933bc43 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -75,6 +75,55 @@ class Site(object):
             if logged_in_as is None or name != logged_in_as:
                 self._login(login)
 
+    def _api_query(self, params):
+        """Do an API query with `params` as a dict of parameters.
+
+        This will first attempt to construct an API url from self._base_url and
+        self._script_path. We need both of these, or else we'll raise
+        SiteAPIError.
+
+        We'll encode the given params, adding format=json along the way, and
+        make the request through self._opener, which has built-in cookie
+        support via self._cookiejar, a User-Agent
+        (wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip".
+        Assuming everything went well, we'll gunzip the data (if compressed),
+        load it as a JSON object, and return it.
+
+        If our request failed, we'll raise SiteAPIError with details.
+
+        There's helpful MediaWiki API documentation at
+        <http://www.mediawiki.org/wiki/API>.
+        """
+        if self._base_url is None or self._script_path is None:
+            e = "Tried to do an API query, but no API URL is known."
+            raise SiteAPIError(e)
+
+        url = ''.join((self._base_url, self._script_path, "/api.php"))
+        params["format"] = "json"  # this is the only format we understand
+        data = urlencode(params)
+
+        print url, data  # debug code
+
+        try:
+            response = self._opener.open(url, data)
+        except URLError as error:
+            if hasattr(error, "reason"):
+                e = "API query at {0} failed because {1}."
+                e = e.format(error.geturl, error.reason)
+            elif hasattr(error, "code"):
+                e = "API query at {0} failed; got an error code of {1}."
+                e = e.format(error.geturl, error.code)
+            else:
+                e = "API query failed."
+            raise SiteAPIError(e)
+        else:
+            result = response.read()
+            if response.headers.get("Content-Encoding") == "gzip":
+                stream = StringIO(result)
+                gzipper = GzipFile(fileobj=stream)
+                result = gzipper.read()
+            return loads(result)  # parse as a JSON object
+
     def _load_attributes(self, force=False):
         """Load data about our Site from the API.
 
@@ -94,13 +143,13 @@ class Site(object):
 
         if self._namespaces is None or force:
             params["siprop"] = "general|namespaces|namespacealiases"
-            result = self.api_query(params)
+            result = self._api_query(params)
             self._load_namespaces(result)
         elif all(attrs):  # everything is already specified and we're not told
             return        # to force a reload, so do nothing
         else:  # we're only loading attributes other than _namespaces
             params["siprop"] = "general"
-            result = self.api_query(params)
+            result = self._api_query(params)
 
         res = result["query"]["general"]
         self._name = res["wikiid"]
@@ -197,7 +246,7 @@ class Site(object):
         we are logged out.
         """
         params = {"action": "query", "meta": "userinfo"}
-        result = self.api_query(params)
+        result = self._api_query(params)
         return result["query"]["userinfo"]["name"]
 
     def _get_username(self):
@@ -253,7 +302,7 @@ class Site(object):
         params = {"action": "login", "lgname": name, "lgpassword": password}
         if token is not None:
             params["lgtoken"] = token
-        result = self.api_query(params)
+        result = self._api_query(params)
         res = result["login"]["result"]
 
         if res == "Success":
@@ -282,58 +331,16 @@ class Site(object):
         save it, if it supports that sort of thing.
         """
         params = {"action": "logout"}
-        self.api_query(params)
+        self._api_query(params)
         self._cookiejar.clear()
         self._save_cookiejar()
 
-    def api_query(self, params):
-        """Do an API query with `params` as a dict of parameters.
-
-        This will first attempt to construct an API url from self._base_url and
-        self._script_path. We need both of these, or else we'll raise
-        SiteAPIError.
+    def api_query(self, **kwargs):
+        """Do an API query with `kwargs` as the parameters.
 
-        We'll encode the given params, adding format=json along the way, and
-        make the request through self._opener, which has built-in cookie
-        support via self._cookiejar, a User-Agent
-        (wiki.tools.constants.USER_AGENT), and Accept-Encoding set to "gzip".
-        Assuming everything went well, we'll gunzip the data (if compressed),
-        load it as a JSON object, and return it.
-
-        If our request failed, we'll raise SiteAPIError with details.
-
-        There's helpful MediaWiki API documentation at
-        <http://www.mediawiki.org/wiki/API>.
+        See _api_query()'s documentation for details.
         """
-        if self._base_url is None or self._script_path is None:
-            e = "Tried to do an API query, but no API URL is known."
-            raise SiteAPIError(e)
-
-        url = ''.join((self._base_url, self._script_path, "/api.php"))
-        params["format"] = "json"  # this is the only format we understand
-        data = urlencode(params)
-
-        print url, data  # debug code
-
-        try:
-            response = self._opener.open(url, data)
-        except URLError as error:
-            if hasattr(error, "reason"):
-                e = "API query at {0} failed because {1}."
-                e = e.format(error.geturl, error.reason)
-            elif hasattr(error, "code"):
-                e = "API query at {0} failed; got an error code of {1}."
-                e = e.format(error.geturl, error.code)
-            else:
-                e = "API query failed."
-            raise SiteAPIError(e)
-        else:
-            result = response.read()
-            if response.headers.get("Content-Encoding") == "gzip":
-                stream = StringIO(result)
-                gzipper = GzipFile(fileobj=stream)      
-                result = gzipper.read()
-            return loads(result)  # parse as a JSON object
+        return self._api_query(kwargs)
 
     def name(self):
         """Returns the Site's name (or "wikiid" in the API), like "enwiki"."""
@@ -389,32 +396,32 @@ class Site(object):
         e = "There is no namespace with name '{0}'.".format(name)
         raise NamespaceNotFoundError(e)
 
-    def get_page(self, pagename):
-        """Returns a Page object for the given pagename.
+    def get_page(self, title, follow_redirects=False):
+        """Returns a Page object for the given title (pagename).
 
-        Will return a Category object instead if the given pagename is in the
+        Will return a Category object instead if the given title is in the
         category namespace. As Category is a subclass of Page, this should not
         cause problems.
 
-        Note that this doesn't do any checks for existence or
+        Note that this doesn't do any direct checks for existence or
         redirect-following - Page's methods provide that.
         """
         prefixes = self.namespace_id_to_name(NS_CATEGORY, all=True)
-        prefix = pagename.split(":", 1)[0]
-        if prefix != pagename:  # avoid a page that is simply "Category"
+        prefix = title.split(":", 1)[0]
+        if prefix != title:  # avoid a page that is simply "Category"
             if prefix in prefixes:
-                return Category(self, pagename)
-        return Page(self, pagename)
+                return Category(self, title, follow_redirects)
+        return Page(self, title, follow_redirects)
 
-    def get_category(self, catname):
+    def get_category(self, catname, follow_redirects=False):
         """Returns a Category object for the given category name.
 
         `catname` should be given *without* a namespace prefix. This method is
         really just shorthand for get_page("Category:" + catname).
         """
         prefix = self.namespace_id_to_name(NS_CATEGORY)
-        pagename = "{0}:{1}".format(prefix, catname)
-        return Category(self, pagename)
+        pagename = ':'.join((prefix, catname))
+        return Category(self, pagename, follow_redirects)
 
     def get_user(self, username=None):
         """Returns a User object for the given username.
diff --git a/wiki/tools/user.py b/wiki/tools/user.py
index 16919e1..3b0173f 100644
--- a/wiki/tools/user.py
+++ b/wiki/tools/user.py
@@ -69,8 +69,8 @@ class User(object):
         is not defined. This defines it.
         """
         params = {"action": "query", "list": "users", "ususers": self._name,
-        "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"}
-        result = self._site.api_query(params)
+                  "usprop": "blockinfo|groups|rights|editcount|registration|emailable|gender"}
+        result = self._site._api_query(params)
         res = result["query"]["users"][0]
 
         # normalize our username in case it was entered oddly
@@ -130,7 +130,7 @@ class User(object):
         Makes an API query if `force` is True or if we haven't made one
         already.
         """
-        if self._exists is None or force:
+        if not hasattr(self, "_exists") or force:
             self._load_attributes()
         return self._exists
 
@@ -212,7 +212,7 @@ class User(object):
         conventions are followed.
         """
         prefix = self._site.namespace_id_to_name(NS_USER)
-        pagename = ''.join((prefix, ":", self._name))
+        pagename = ':'.join((prefix, self._name))
         return Page(self._site, pagename)
 
     def get_talkpage(self):
@@ -222,5 +222,5 @@ class User(object):
         conventions are followed.
         """
         prefix = self._site.namespace_id_to_name(NS_USER_TALK)
-        pagename = ''.join((prefix, ":", self._name))
+        pagename = ':'.join((prefix, self._name))
         return Page(self._site, pagename)

From e4f8fb2e21cdbd8c2b0d93a7c0cbbe5b7231ac28 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 5 Aug 2011 00:43:51 -0400
Subject: [PATCH 18/19] Major additions to Page in wikitools.

* Page: added about 400 lines of rambling nonsense.
* Exceptions: added RedirectError.
---
 wiki/tools/exceptions.py |   4 +
 wiki/tools/page.py       | 413 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 401 insertions(+), 16 deletions(-)

diff --git a/wiki/tools/exceptions.py b/wiki/tools/exceptions.py
index d16a1b9..f36dae3 100644
--- a/wiki/tools/exceptions.py
+++ b/wiki/tools/exceptions.py
@@ -38,6 +38,10 @@ class InvalidPageError(WikiToolsetError):
     """Attempting to get certain information about a page whose title is
     invalid."""
 
+class RedirectError(WikiToolsetError):
+    """Page's get_redirect_target() method failed because the page is either
+    not a redirect, or it is malformed."""
+
 class UserNotFoundError(WikiToolsetError):
     """Attempting to get certain information about a user that does not
     exist."""
diff --git a/wiki/tools/page.py b/wiki/tools/page.py
index d267674..8ae25f1 100644
--- a/wiki/tools/page.py
+++ b/wiki/tools/page.py
@@ -1,33 +1,414 @@
 # -*- coding: utf-8  -*-
 
+import re
+from urllib import quote
+
+from wiki.tools.exceptions import *
+
 class Page(object):
     """
     EarwigBot's Wiki Toolset: Page Class
+
+    Represents a Page on a given Site. Has methods for getting information
+    about the page, getting page content, and so on. Category is a subclass of
+    Page with additional methods.
+
+    Public methods:
+    title               -- returns the page's title, or pagename
+    exists              -- returns whether the page exists
+    pageid              -- returns an integer ID representing the page
+    url                 -- returns the page's URL
+    namespace           -- returns the page's namespace as an integer
+    protection          -- returns the page's current protection status
+    is_talkpage         -- returns True if the page is a talkpage, else False
+    is_redirect         -- returns True if the page is a redirect, else False
+    toggle_talk         -- returns a content page's talk page, or vice versa
+    get                 -- returns page content
+    get_redirect_target -- if the page is a redirect, returns its destination 
     """
 
-    def __init__(self, site, title):
-        """
-        Docstring needed
+    def __init__(self, site, title, follow_redirects=False):
+        """Constructor for new Page instances.
+
+        Takes three arguments: a Site object, the Page's title (or pagename),
+        and whether or not to follow redirects (optional, defaults to False).
+
+        As with User, site.get_page() is preferred. Site's method has support
+        for a default `follow_redirects` value in our config, while __init__
+        always defaults to False.
+
+        __init__ will not do any API queries, but it will use basic namespace
+        logic to determine our namespace ID and if we are a talkpage.
         """
-        self.site = site
-        self.title = title
+        self._site = site
+        self._title = title.strip()
+        self._follow_redirects = self._keep_following = follow_redirects
+
+        self._exists = 0
+        self._pageid = None
+        self._is_redirect = None
+        self._lastrevid = None
+        self._protection = None
+        self._fullurl = None
         self._content = None
 
-    def exists(self):
+        # Try to determine the page's namespace using our site's namespace
+        # converter:
+        prefix = self._title.split(":", 1)[0]
+        if prefix != title:  # ignore a page that's titled "Category" or "User"
+            try:
+                self._namespace = self._site.namespace_name_to_id(prefix)
+            except NamespaceNotFoundError:
+                self._namespace = 0
+        else:
+            self._namespace = 0
+
+        # Is this a talkpage? Talkpages have odd IDs, while content pages have
+        # even IDs, excluding the "special" namespaces:
+        if self._namespace < 0:
+            self._is_talkpage = False
+        else:
+            self._is_talkpage = self._namespace % 2 == 1
+
+    def _force_validity(self):
+        """Used to ensure that our page's title is valid.
+
+        If this method is called when our page is not valid (and after
+        _load_attributes() has been called), InvalidPageError will be raised.
+
+        Note that validity != existence. If a page's title is invalid (e.g, it
+        contains "[") it will always be invalid, and cannot be edited.
         """
-        Docstring needed
+        if self._exists == 1:
+            e = "Page '{0}' is invalid.".format(self._title)
+            raise InvalidPageError(e)
+
+    def _force_existence(self):
+        """Used to ensure that our page exists.
+
+        If this method is called when our page doesn't exist (and after
+        _load_attributes() has been called), PageNotFoundError will be raised.
+        It will also call _force_validity() beforehand.
         """
-        pass
+        self._force_validity()
+        if self._exists == 2:
+            e = "Page '{0}' does not exist.".format(self._title)
+            raise PageNotFoundError(e)
+
+    def _load_wrapper(self):
+        """Calls _load_attributes() and follows redirects if we're supposed to.
+
+        This method will only follow redirects if follow_redirects=True was
+        passed to __init__() (perhaps indirectly passed by site.get_page()).
+        It avoids the API's &redirects param in favor of manual following,
+        so we can act more realistically (we don't follow double redirects, and
+        circular redirects don't break us).
 
-    def get(self, force_reload=False):
+        This will raise RedirectError if we have a problem following, but that
+        is a bug and should NOT happen.
+
+        If we're following a redirect, this will make a grand total of three
+        API queries. It's a lot, but each one is quite small.
         """
-        Docstring needed
+        self._load_attributes()
+
+        if self._keep_following and self._is_redirect:
+            self._title = self.get_redirect_target()
+            self._keep_following = False  # don't follow double redirects
+            self._content = None  # reset the content we just loaded
+            self._load_attributes()
+
+    def _load_attributes(self, result=None):
+        """Loads various data from the API in a single query.
+
+        Loads self._title, ._exists, ._is_redirect, ._pageid, ._fullurl,
+        ._protection, ._namespace, ._is_talkpage, and ._lastrevid using the
+        API. It will do a query of its own unless `result` is provided, in
+        which case we'll pretend `result` is what the query returned.
+
+        Assuming the API is sound, this should not raise any exceptions.
         """
-        if self._content is None or force_reload:
-            params = {"action": "query", "prop": "revisions",
-                "rvprop": "content", "rvlimit": 1, "titles": self.title}
-            result = self.site.api_query(params)
-            content = result["query"]["pages"].values()[0]["revisions"][0]["*"]
+        if result is None:
+            params = {"action": "query", "prop": "info", "titles": self._title,
+                      "inprop": "protection|url"}
+            result = self._site._api_query(params)
+
+        res = result["query"]["pages"].values()[0]
+
+        # Normalize our pagename/title thing:
+        self._title = res["title"]
+
+        try:
+            res["redirect"]
+        except KeyError:
+            self._is_redirect = False
+        else:
+            self._is_redirect = True
+
+        self._pageid = result["query"]["pages"].keys()[0]
+        if int(self._pageid) < 0:
+            try:
+                res["missing"]
+            except KeyError:
+                # If it has a negative ID and it's invalid, then break here,
+                # because there's no other data for us to get:
+                self._exists = 1
+                return
+            else:
+                # If it has a negative ID and it's missing; we can still get
+                # data like the namespace, protection, and URL:
+                self._exists = 2
+        else:
+            self._exists = 3
+
+        self._fullurl = res["fullurl"]
+        self._protection = res["protection"]
+
+        # We've determined the namespace and talkpage status in __init__()
+        # based on the title, but now we can be sure:
+        self._namespace = res["ns"]
+        self._is_talkpage = self._namespace % 2 == 1  # talkpages have odd IDs
+
+        # This last field will only be specified if the page exists:
+        try:
+            self._lastrevid = res["lastrevid"]
+        except KeyError:
+            pass
+
+    def _load_content(self, result=None):
+        """Loads current page content from the API.
+
+        If `result` is provided, we'll pretend that is the result of an API
+        query and try to get content from that. Otherwise, we'll do an API
+        query on our own.
+
+        Don't call this directly, ever - use .get(force=True) if you want to
+        force content reloading.
+        """
+        if result is None:
+            params = {"action": "query", "prop": "revisions", "rvlimit": 1,
+                      "rvprop": "content", "titles": self._title}
+            result = self._site._api_query(params)
+
+        res = result["query"]["pages"].values()[0]
+        try:
+            content = res["revisions"][0]["*"]
             self._content = content
-            return content
+        except KeyError:
+            # This can only happen if the page was deleted since we last called
+            # self._load_attributes(). In that case, some of our attributes are
+            # outdated, so force another self._load_attributes():
+            self._load_attributes()
+            self._force_existence()
+
+    def title(self, force=False):
+        """Returns the Page's title, or pagename.
+
+        This won't do any API queries on its own unless force is True, in which
+        case the title will be forcibly reloaded from the API (normalizing it,
+        and following redirects if follow_redirects=True was passed to
+        __init__()). Any other methods that do API queries will reload title on
+        their own, however, like exists() and get().
+        """
+        if force:
+            self._load_wrapper()
+        return self._title
+
+    def exists(self, force=False):
+        """Returns information about whether the Page exists or not.
+
+        The returned "information" is a tuple with two items. The first is a
+        bool, either True if the page exists or False if it does not. The
+        second is a string giving more information, either "invalid", (title
+        is invalid, e.g. it contains "["), "missing", or "exists".
+
+        Makes an API query if force is True or if we haven't already made one.
+        """
+        cases = {
+            0: (None, "unknown"),
+            1: (False, "invalid"),
+            2: (False, "missing"),
+            3: (True, "exists"),
+        }
+        if self._exists == 0 or force:
+            self._load_wrapper()
+        return cases[self._exists]
+
+    def pageid(self, force=False):
+        """Returns an integer ID representing the Page.
+
+        Makes an API query if force is True or if we haven't already made one.
+
+        Raises InvalidPageError or PageNotFoundError if the page name is
+        invalid or the page does not exist, respectively.
+        """
+        if self._exists == 0 or force:
+            self._load_wrapper()
+        self._force_existence()  # missing pages do not have IDs
+        return self._pageid
+
+    def url(self, force=False):
+        """Returns the page's URL.
+
+        Like title(), this won't do any API queries on its own unless force is
+        True. If the API was never queried for this page, we will attempt to
+        determine the URL ourselves based on the title.
+        """
+        if force:
+            self._load_wrapper()
+        if self._fullurl is not None:
+            return self._fullurl
+        else:
+            slug = quote(self._title.replace(" ", "_"), safe="/:")
+            path = self._site._article_path.replace("$1", slug)
+            return ''.join((self._site._base_url, path))
+
+    def namespace(self, force=False):
+        """Returns the page's namespace ID (an integer).
+
+        Like title(), this won't do any API queries on its own unless force is
+        True. If the API was never queried for this page, we will attempt to
+        determine the namespace ourselves based on the title.
+        """
+        if force:
+            self._load_wrapper()
+        return self._namespace
+
+    def protection(self, force=False):
+        """Returns the page's current protection status.
+
+        Makes an API query if force is True or if we haven't already made one.
+
+        Raises InvalidPageError if the page name is invalid. Will not raise an
+        error if the page is missing because those can still be protected.
+        """
+        if self._exists == 0 or force:
+            self._load_wrapper()
+        self._force_validity()  # invalid pages cannot be protected
+        return self._protection
+
+    def is_talkpage(self, force=False):
+        """Returns True if the page is a talkpage, else False.
+
+        Like title(), this won't do any API queries on its own unless force is
+        True. If the API was never queried for this page, we will attempt to
+        determine the talkpage status ourselves based on its namespace ID.
+        """
+        if force:
+            self._load_wrapper()
+        return self._is_talkpage
+
+    def is_redirect(self, force=False):
+        """Returns True if the page is a redirect, else False.
+
+        Makes an API query if force is True or if we haven't already made one.
+
+        We will return False even if the page does not exist or is invalid.
+        """
+        if self._exists == 0 or force:
+            self._load_wrapper()
+        return self._is_redirect
+
+    def toggle_talk(self, force=False, follow_redirects=None):
+        """Returns a content page's talk page, or vice versa.
+
+        The title of the new page is determined by namespace logic, not API
+        queries. We won't make any API queries on our own unless force is True,
+        and the only reason then would be to forcibly update the title or
+        follow redirects if we haven't already made an API query.
+
+        If `follow_redirects` is anything other than None (the default), it
+        will be passed to the new Page's __init__(). Otherwise, we'll use the
+        value passed to our own __init__().
+
+        Will raise InvalidPageError if we try to get the talk page of a special
+        page (in the Special: or Media: namespaces), but we won't raise an
+        exception if our page is otherwise missing or invalid.
+        """
+        if force:
+            self._load_wrapper()
+        if self._namespace < 0:
+            ns = self._site.namespace_id_to_name(self._namespace)
+            e = "Pages in the {0} namespace can't have talk pages.".format(ns)
+            raise InvalidPageError(e)
+
+        if self._is_talkpage:
+            new_ns = self._namespace - 1
+        else:
+            new_ns = self._namespace + 1
+
+        try:
+            body = self._title.split(":", 1)[1]
+        except IndexError:
+            body = self._title
+
+        new_prefix = self._site.namespace_id_to_name(new_ns)
+
+        # If the new page is in namespace 0, don't do ":Title" (it's correct,
+        # but unnecessary), just do "Title":
+        if new_prefix:
+            new_title = ':'.join((new_prefix, body))
+        else:
+            new_title = body
+
+        if follow_redirects is None:
+            follow_redirects = self._follow_redirects
+        return Page(self._site, new_title, follow_redirects)
+
+    def get(self, force=False):
+        """Returns page content, which is cached if you try to call get again.
+
+        Use `force` to forcibly reload page content even if we've already
+        loaded some. This is good if you want to edit a page multiple times,
+        and you want to get updated content before you make your second edit.
+
+        Raises InvalidPageError or PageNotFoundError if the page name is
+        invalid or the page does not exist, respectively.
+        """
+        if force or self._exists == 0:
+            # Kill two birds with one stone by doing an API query for both our
+            # attributes and our page content:
+            params = {"action": "query", "rvprop": "content", "rvlimit": 1,
+                      "prop": "info|revisions", "inprop": "protection|url",
+                      "titles": self._title}
+            result = self._site._api_query(params)
+            self._load_attributes(result=result)
+            self._force_existence()
+            self._load_content(result=result)
+
+            # Follow redirects if we're told to:
+            if self._keep_following and self._is_redirect:
+                self._title = self.get_redirect_target()
+                self._keep_following = False  # don't follow double redirects
+                self._content = None  # reset the content we just loaded
+                self.get(force=True)
+
+            return self._content
+
+        # Make sure we're dealing with a real page here. This may be outdated
+        # if the page was deleted since we last called self._load_attributes(),
+        # but self._load_content() can handle that:
+        self._force_existence()
+
+        if self._content is None:
+            self._load_content()
+
         return self._content
+
+    def get_redirect_target(self, force=False):
+        """If the page is a redirect, returns its destination.
+
+        Use `force` to forcibly reload content even if we've already loaded
+        some before. Note that this method calls get() for page content.
+
+        Raises InvalidPageError or PageNotFoundError if the page name is
+        invalid or the page does not exist, respectively. Raises RedirectError
+        if the page is not a redirect.
+        """
+        content = self.get(force)
+        regexp = "^\s*\#\s*redirect\s*\[\[(.*?)\]\]"
+        try:
+            return re.findall(regexp, content, flags=re.IGNORECASE)[0]
+        except IndexError:
+            e = "The page does not appear to have a redirect target."
+            raise RedirectError(e)

From a7367856ee26b4a35c244ea139307bbf67c97dd0 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Aug 2011 16:34:54 -0400
Subject: [PATCH 19/19] Finished docstrings in wikitools.

---
 wiki/tools/category.py | 16 ++++++++++++++--
 wiki/tools/site.py     | 14 +++++++++++++-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/wiki/tools/category.py b/wiki/tools/category.py
index 588956a..7ebe342 100644
--- a/wiki/tools/category.py
+++ b/wiki/tools/category.py
@@ -5,11 +5,23 @@ from wiki.tools.page import Page
 class Category(Page):
     """
     EarwigBot's Wiki Toolset: Category Class
+
+    Represents a Category on a given Site, a subclass of Page. Provides
+    additional methods, but Page's own methods should work fine on Category
+    objects. Site.get_page() will return a Category instead of a Page if the
+    given title is in the category namespace; get_category() is shorthand,
+    because it accepts category names without the namespace prefix.
+
+    Public methods:
+    members -- returns a list of titles in the category
     """
 
     def members(self, limit=50):
-        """
-        Docstring needed
+        """Returns a list of titles in the category.
+
+        If `limit` is provided, we will provide this many titles, or less if
+        the category is too small. `limit` defaults to 50; normal users can go
+        up to 500, and bots can go up to 5,000 on a single API query.
         """
         params = {"action": "query", "list": "categorymembers",
             "cmlimit": limit, "cmtitle": self.title}
diff --git a/wiki/tools/site.py b/wiki/tools/site.py
index 933bc43..57b890d 100644
--- a/wiki/tools/site.py
+++ b/wiki/tools/site.py
@@ -24,6 +24,18 @@ class Site(object):
     won't need to call it directly, rather tools.get_site() for returning Site
     instances, tools.add_site() for adding new ones to config, and
     tools.del_site() for removing old ones from config, should suffice.
+
+    Public methods:
+    name                 -- returns our name (or "wikiid"), like "enwiki"
+    project              -- returns our project name, like "wikipedia"
+    lang                 -- returns our language code, like "en"
+    domain               -- returns our web domain, like "en.wikipedia.org"
+    api_query            -- does an API query with the given kwargs as params
+    namespace_id_to_name -- given a namespace ID, returns associated name(s)
+    namespace_name_to_id -- given a namespace name, returns associated id
+    get_page             -- returns a Page object for the given title
+    get_category         -- returns a Category object for the given title
+    get_user             -- returns a User object for the given username
     """
 
     def __init__(self, name=None, project=None, lang=None, base_url=None,
@@ -351,7 +363,7 @@ class Site(object):
         return self._project
 
     def lang(self):
-        """Returns the Site's language, like "en" or "es"."""
+        """Returns the Site's language code, like "en" or "es"."""
         return self._lang
 
     def domain(self):