Merge branch 'feature/tests' into develop (#7)

11 anni fa · 81592b3bd7
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.pyc
 *.so
 *.egg
 *.egg-info
 .DS_Store
--- a/README.rst
+++ b/README.rst
@@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell``
    cd mwparserfromhell
    python setup.py install

 You can run the comprehensive unit testing suite with ``python setup.py test``.
 You can run the comprehensive unit testing suite with
 ``python setup.py test -q``.

 Usage
 -----
@@ -124,7 +125,9 @@ following code (via the API_)::
    import mwparserfromhell
    API_URL = "http://en.wikipedia.org/w/api.php"
    def parse(title):
        raw = urllib.urlopen(API_URL, data).read()
        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                "rvprop": "content", "format": "json", "titles": title}
        raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read()
        res = json.loads(raw)
        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
        return mwparserfromhell.parse(text)
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -1,29 +1,29 @@
 # -*- coding: utf-8 -*-

 """
 Implements support for both Python 2 and Python 3 by defining common types in
 terms of their Python 2/3 variants. For example, :py:class:`str` is set to
 :py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
 :py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
 types are meant to be imported directly from within the parser's modules.
 """

 import sys

 py3k = sys.version_info[0] == 3

 if py3k:
    bytes = bytes
    str = str
    basestring = str
    maxsize = sys.maxsize
    import html.entities as htmlentities

 else:
    bytes = str
    str = unicode
    basestring = basestring
    maxsize = sys.maxint
    import htmlentitydefs as htmlentities

 del sys
 # -*- coding: utf-8 -*-

 """
 Implements support for both Python 2 and Python 3 by defining common types in
 terms of their Python 2/3 variants. For example, :py:class:`str` is set to
 :py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
 :py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
 types are meant to be imported directly from within the parser's modules.
 """

 import sys

 py3k = sys.version_info[0] == 3

 if py3k:
    bytes = bytes
    str = str
    basestring = str
    maxsize = sys.maxsize
    import html.entities as htmlentities

 else:
    bytes = str
    str = unicode
    basestring = basestring
    maxsize = sys.maxint
    import htmlentitydefs as htmlentities

 del sys
--- a/mwparserfromhell/nodes/argument.py
+++ b/mwparserfromhell/nodes/argument.py
@@ -30,6 +30,7 @@ __all__ = ["Argument"]

 class Argument(Node):
    """Represents a template argument substitution, like ``{{{foo}}}``."""

    def __init__(self, name, default=None):
        super(Argument, self).__init__()
        self._name = name
--- a/mwparserfromhell/nodes/comment.py
+++ b/mwparserfromhell/nodes/comment.py
@@ -29,6 +29,7 @@ __all__ = ["Comment"]

 class Comment(Node):
    """Represents a hidden HTML comment, like ``<!-- foobar -->``."""

    def __init__(self, contents):
        super(Comment, self).__init__()
        self._contents = contents
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -135,7 +135,10 @@ class HTMLEntity(Node):

    @hex_char.setter
    def hex_char(self, newval):
        self._hex_char = bool(newval)
        newval = str(newval)
        if newval not in ("x", "X"):
            raise ValueError(newval)
        self._hex_char = newval

    def normalize(self):
        """Return the unicode character represented by the HTML entity."""
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -29,6 +29,7 @@ __all__ = ["Text"]

 class Text(Node):
    """Represents ordinary, unformatted text with no special properties."""

    def __init__(self, value):
        super(Text, self).__init__()
        self._value = value
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -30,6 +30,7 @@ __all__ = ["Wikilink"]

 class Wikilink(Node):
    """Represents an internal wikilink, like ``[[Foo|Bar]]``."""

    def __init__(self, title, text=None):
        super(Wikilink, self).__init__()
        self._title = title
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -62,6 +62,15 @@ Local (stack-specific) contexts:

 * :py:const:`COMMENT`

 * :py:const:`SAFETY_CHECK`

    * :py:const:`HAS_TEXT`
    * :py:const:`FAIL_ON_TEXT`
    * :py:const:`FAIL_NEXT`
    * :py:const:`FAIL_ON_LBRACE`
    * :py:const:`FAIL_ON_RBRACE`
    * :py:const:`FAIL_ON_EQUALS`

 Global contexts:

 * :py:const:`GL_HEADING`
@@ -69,29 +78,36 @@ Global contexts:

 # Local contexts:

 TEMPLATE =              0b00000000000111
 TEMPLATE_NAME =         0b00000000000001
 TEMPLATE_PARAM_KEY =    0b00000000000010
 TEMPLATE_PARAM_VALUE =  0b00000000000100

 ARGUMENT =              0b00000000011000
 ARGUMENT_NAME =         0b00000000001000
 ARGUMENT_DEFAULT =      0b00000000010000

 WIKILINK =              0b00000001100000
 WIKILINK_TITLE =        0b00000000100000
 WIKILINK_TEXT =         0b00000001000000

 HEADING =               0b01111110000000
 HEADING_LEVEL_1 =       0b00000010000000
 HEADING_LEVEL_2 =       0b00000100000000
 HEADING_LEVEL_3 =       0b00001000000000
 HEADING_LEVEL_4 =       0b00010000000000
 HEADING_LEVEL_5 =       0b00100000000000
 HEADING_LEVEL_6 =       0b01000000000000

 COMMENT =               0b10000000000000

 TEMPLATE =              0b00000000000000000111
 TEMPLATE_NAME =         0b00000000000000000001
 TEMPLATE_PARAM_KEY =    0b00000000000000000010
 TEMPLATE_PARAM_VALUE =  0b00000000000000000100

 ARGUMENT =              0b00000000000000011000
 ARGUMENT_NAME =         0b00000000000000001000
 ARGUMENT_DEFAULT =      0b00000000000000010000

 WIKILINK =              0b00000000000001100000
 WIKILINK_TITLE =        0b00000000000000100000
 WIKILINK_TEXT =         0b00000000000001000000

 HEADING =               0b00000001111110000000
 HEADING_LEVEL_1 =       0b00000000000010000000
 HEADING_LEVEL_2 =       0b00000000000100000000
 HEADING_LEVEL_3 =       0b00000000001000000000
 HEADING_LEVEL_4 =       0b00000000010000000000
 HEADING_LEVEL_5 =       0b00000000100000000000
 HEADING_LEVEL_6 =       0b00000001000000000000

 COMMENT =               0b00000010000000000000

 SAFETY_CHECK =          0b11111100000000000000
 HAS_TEXT =              0b00000100000000000000
 FAIL_ON_TEXT =          0b00001000000000000000
 FAIL_NEXT  =            0b00010000000000000000
 FAIL_ON_LBRACE =        0b00100000000000000000
 FAIL_ON_RBRACE =        0b01000000000000000000
 FAIL_ON_EQUALS =        0b10000000000000000000

 # Global contexts:

--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1,6 +1,6 @@
 /*
 Tokenizer for MWParserFromHell
 Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
 Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
        self->head++;
    }
    current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
    level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
    level = current > best ? (best > 6 ? 6 : best) :
                             (current > 6 ? 6 : current);
    after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
    if (BAD_ROUTE) {
        RESET_ROUTE();
@@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self)
    else
        numeric = hexadecimal = 0;
    if (hexadecimal)
        valid = "0123456789abcdefABCDEF";
        valid = HEXDIGITS;
    else if (numeric)
        valid = "0123456789";
        valid = DIGITS;
    else
        valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
        valid = ALPHANUM;
    text = calloc(MAX_ENTITY_SIZE, sizeof(char));
    if (!text) {
        PyErr_NoMemory();
@@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
        i = 0;
        while (1) {
            def = entitydefs[i];
            if (!def)  // We've reached the end of the def list without finding it
            if (!def)  // We've reached the end of the defs without finding it
                FAIL_ROUTE_AND_EXIT()
            if (strcmp(text, def) == 0)
                break;
@@ -1135,48 +1136,59 @@ Tokenizer_parse_comment(Tokenizer* self)
 }

 /*
    Make sure we are not trying to write an invalid character.
    Make sure we are not trying to write an invalid character. Return 0 if
    everything is safe, or -1 if the route must be failed.
 */
 static void
 static int
 Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 {
    if (context & LC_FAIL_NEXT) {
        Tokenizer_fail_route(self);
        return;
        return -1;
    }
    if (context & LC_WIKILINK_TITLE) {
        if (data == *"]" || data == *"{")
            self->topstack->context |= LC_FAIL_NEXT;
        else if (data == *"\n" || data == *"[" || data == *"}")
            Tokenizer_fail_route(self);
        return;
            return -1;
        return 0;
    }
    if (context & LC_TEMPLATE_NAME) {
        if (data == *"{" || data == *"}" || data == *"[") {
            self->topstack->context |= LC_FAIL_NEXT;
            return;
            return 0;
        }
        if (data == *"]") {
            Tokenizer_fail_route(self);
            return;
            return -1;
        }
        if (data == *"|")
            return;
            return 0;
        if (context & LC_HAS_TEXT) {
            if (context & LC_FAIL_ON_TEXT) {
                if (!Py_UNICODE_ISSPACE(data))
                    return -1;
            }
            else {
                if (data == *"\n")
                    self->topstack->context |= LC_FAIL_ON_TEXT;
            }
        }
        else if (!Py_UNICODE_ISSPACE(data))
            self->topstack->context |= LC_HAS_TEXT;
    }
    else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
    else {
        if (context & LC_FAIL_ON_EQUALS) {
            if (data == *"=") {
                Tokenizer_fail_route(self);
                return;
                return -1;
            }
        }
        else if (context & LC_FAIL_ON_LBRACE) {
            if (data == *"{") {
            if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" &&
                                 Tokenizer_READ(self, -2) == *"{")) {
                if (context & LC_TEMPLATE)
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                else
                    self->topstack->context |= LC_FAIL_NEXT;
                return;
                return 0;
            }
            self->topstack->context ^= LC_FAIL_ON_LBRACE;
        }
@@ -1186,7 +1198,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                else
                    self->topstack->context |= LC_FAIL_NEXT;
                return;
                return 0;
            }
            self->topstack->context ^= LC_FAIL_ON_RBRACE;
        }
@@ -1195,47 +1207,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
        else if (data == *"}")
            self->topstack->context |= LC_FAIL_ON_RBRACE;
    }
    if (context & LC_HAS_TEXT) {
        if (context & LC_FAIL_ON_TEXT) {
            if (!Py_UNICODE_ISSPACE(data)) {
                if (context & LC_TEMPLATE_PARAM_KEY) {
                    self->topstack->context ^= LC_FAIL_ON_TEXT;
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                }
                else
                    Tokenizer_fail_route(self);
                return;
            }
        }
        else {
            if (data == *"\n")
                self->topstack->context |= LC_FAIL_ON_TEXT;
        }
    }
    else if (!Py_UNICODE_ISSPACE(data))
        self->topstack->context |= LC_HAS_TEXT;
 }

 /*
    Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used
    when we preserve a context but previous data becomes invalid, like when
    moving between template parameters.
 */
 static void
 Tokenizer_reset_safety_checks(Tokenizer* self)
 {
    static int checks[] = {
        LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE,
        LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0};
    int context = self->topstack->context, i = 0, this;
    while (1) {
        this = checks[i];
        if (!this)
            return;
        if (context & this)
            self->topstack->context ^= this;
        i++;
    }
    return 0;
 }

 /*
@@ -1258,12 +1230,12 @@ Tokenizer_parse(Tokenizer* self, int context)
        this = Tokenizer_READ(self, 0);
        this_context = self->topstack->context;
        if (this_context & unsafe_contexts) {
            Tokenizer_verify_safe(self, this_context, this);
            if (BAD_ROUTE) {
            if (Tokenizer_verify_safe(self, this_context, this) < 0) {
                if (this_context & LC_TEMPLATE_PARAM_KEY) {
                    trash = Tokenizer_pop(self);
                    Py_XDECREF(trash);
                }
                Tokenizer_fail_route(self);
                return NULL;
            }
        }
@@ -1303,7 +1275,6 @@ Tokenizer_parse(Tokenizer* self, int context)
                self->topstack->context ^= LC_FAIL_NEXT;
        }
        else if (this == *"|" && this_context & LC_TEMPLATE) {
            Tokenizer_reset_safety_checks(self);
            if (Tokenizer_handle_template_param(self))
                return NULL;
        }
@@ -1324,10 +1295,14 @@ Tokenizer_parse(Tokenizer* self, int context)
            Tokenizer_write_text(self, this);
        }
        else if (this == next && next == *"[") {
            if (Tokenizer_parse_wikilink(self))
                return NULL;
            if (self->topstack->context & LC_FAIL_NEXT)
                self->topstack->context ^= LC_FAIL_NEXT;
            if (!(this_context & LC_WIKILINK_TITLE)) {
                if (Tokenizer_parse_wikilink(self))
                    return NULL;
                if (self->topstack->context & LC_FAIL_NEXT)
                    self->topstack->context ^= LC_FAIL_NEXT;
            }
            else
                Tokenizer_write_text(self, this);
        }
        else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
            if (Tokenizer_handle_wikilink_separator(self))
@@ -1401,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 PyMODINIT_FUNC
 init_tokenizer(void)
 {
    PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname;
    PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals,
             *fromlist, *modname;
    unsigned numdefs, i;
    char *name;

@@ -1411,14 +1387,16 @@ init_tokenizer(void)
    module = Py_InitModule("_tokenizer", module_methods);
    Py_INCREF(&TokenizerType);
    PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
    Py_INCREF(Py_True);
    PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True);

    tempmodule = PyImport_ImportModule("htmlentitydefs");
    if (!tempmodule)
    tempmod = PyImport_ImportModule("htmlentitydefs");
    if (!tempmod)
        return;
    defmap = PyObject_GetAttrString(tempmodule, "entitydefs");
    defmap = PyObject_GetAttrString(tempmod, "entitydefs");
    if (!defmap)
        return;
    Py_DECREF(tempmodule);
    Py_DECREF(tempmod);
    deflist = PyDict_Keys(defmap);
    if (!deflist)
        return;
@@ -1442,18 +1420,20 @@ init_tokenizer(void)
    if (!modname)
        return;
    PyList_SET_ITEM(fromlist, 0, modname);
    tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
    tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
    Py_DECREF(fromlist);
    if (!tempmodule)
    if (!tempmod)
        return;
    tokens = PyObject_GetAttrString(tempmodule, "tokens");
    Py_DECREF(tempmodule);
    tokens = PyObject_GetAttrString(tempmod, "tokens");
    Py_DECREF(tempmod);

    Text = PyObject_GetAttrString(tokens, "Text");

    TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen");
    TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
    TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals");
    TemplateParamSeparator = PyObject_GetAttrString(tokens,
                                                    "TemplateParamSeparator");
    TemplateParamEquals = PyObject_GetAttrString(tokens,
                                                 "TemplateParamEquals");
    TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose");

    ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen");
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -1,6 +1,6 @@
 /*
 Tokenizer Header File for MWParserFromHell
 Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
 Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -36,6 +36,10 @@ SOFTWARE.
 #define malloc PyObject_Malloc
 #define free   PyObject_Free

 #define DIGITS    "0123456789"
 #define HEXDIGITS "0123456789abcdefABCDEF"
 #define ALPHANUM  "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

 static const char* MARKERS[] = {
    "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
    "!", "\n", ""};
@@ -118,6 +122,7 @@ static PyObject* TagCloseClose;

 #define LC_COMMENT              0x02000

 #define LC_SAFETY_CHECK         0xFC000
 #define LC_HAS_TEXT             0x04000
 #define LC_FAIL_ON_TEXT         0x08000
 #define LC_FAIL_NEXT            0x10000
@@ -205,8 +210,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
 static int Tokenizer_really_parse_entity(Tokenizer*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
 static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
 static void Tokenizer_reset_safety_checks(Tokenizer*);
 static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
 static PyObject* Tokenizer_parse(Tokenizer*, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);

--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -37,6 +37,7 @@ class BadRoute(Exception):

 class Tokenizer(object):
    """Creates a list of tokens from a string of wikicode."""
    USES_C = False
    START = object()
    END = object()
    MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
@@ -212,28 +213,9 @@ class Tokenizer(object):
        self._write_all(argument)
        self._write(tokens.ArgumentClose())

    def _verify_safe(self, unsafes, strip=True):
        """Verify that there are no unsafe characters in the current stack.

        The route will be failed if the name contains any element of *unsafes*
        in it. This is used when parsing template names, parameter keys, and so
        on, which cannot contain newlines and some other characters. If *strip*
        is ``True``, the text will be stripped of whitespace, since this is
        allowed at the ends of certain elements but not between text.
        """
        self._push_textbuffer()
        if self._stack:
            text = [tok for tok in self._stack if isinstance(tok, tokens.Text)]
            text = "".join([token.text for token in text])
            if strip:
                text = text.strip()
            if text and any([unsafe in text for unsafe in unsafes]):
                self._fail_route()

    def _handle_template_param(self):
        """Handle a template parameter at the head of the string."""
        if self._context & contexts.TEMPLATE_NAME:
            self._verify_safe(["\n", "{", "}", "[", "]"])
            self._context ^= contexts.TEMPLATE_NAME
        elif self._context & contexts.TEMPLATE_PARAM_VALUE:
            self._context ^= contexts.TEMPLATE_PARAM_VALUE
@@ -245,11 +227,6 @@ class Tokenizer(object):

    def _handle_template_param_value(self):
        """Handle a template parameter's value at the head of the string."""
        try:
            self._verify_safe(["\n", "{{", "}}"])
        except BadRoute:
            self._pop()
            raise
        self._write_all(self._pop(keep_context=True))
        self._context ^= contexts.TEMPLATE_PARAM_KEY
        self._context |= contexts.TEMPLATE_PARAM_VALUE
@@ -257,24 +234,19 @@ class Tokenizer(object):

    def _handle_template_end(self):
        """Handle the end of a template at the head of the string."""
        if self._context & contexts.TEMPLATE_NAME:
            self._verify_safe(["\n", "{", "}", "[", "]"])
        elif self._context & contexts.TEMPLATE_PARAM_KEY:
        if self._context & contexts.TEMPLATE_PARAM_KEY:
            self._write_all(self._pop(keep_context=True))
        self._head += 1
        return self._pop()

    def _handle_argument_separator(self):
        """Handle the separator between an argument's name and default."""
        self._verify_safe(["\n", "{{", "}}"])
        self._context ^= contexts.ARGUMENT_NAME
        self._context |= contexts.ARGUMENT_DEFAULT
        self._write(tokens.ArgumentSeparator())

    def _handle_argument_end(self):
        """Handle the end of an argument at the head of the string."""
        if self._context & contexts.ARGUMENT_NAME:
            self._verify_safe(["\n", "{{", "}}"])
        self._head += 2
        return self._pop()

@@ -294,15 +266,12 @@ class Tokenizer(object):

    def _handle_wikilink_separator(self):
        """Handle the separator between a wikilink's title and its text."""
        self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
        self._context ^= contexts.WIKILINK_TITLE
        self._context |= contexts.WIKILINK_TEXT
        self._write(tokens.WikilinkSeparator())

    def _handle_wikilink_end(self):
        """Handle the end of a wikilink at the head of the string."""
        if self._context & contexts.WIKILINK_TITLE:
            self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
        self._head += 1
        return self._pop()

@@ -423,11 +392,73 @@ class Tokenizer(object):
            self._write(tokens.CommentEnd())
            self._head += 2

    def _verify_safe(self, this):
        """Make sure we are not trying to write an invalid character."""
        context = self._context
        if context & contexts.FAIL_NEXT:
            return False
        if context & contexts.WIKILINK_TITLE:
            if this == "]" or this == "{":
                self._context |= contexts.FAIL_NEXT
            elif this == "\n" or this == "[" or this == "}":
                return False
            return True
        if context & contexts.TEMPLATE_NAME:
            if this == "{" or this == "}" or this == "[":
                self._context |= contexts.FAIL_NEXT
                return True
            if this == "]":
                return False
            if this == "|":
                return True
            if context & contexts.HAS_TEXT:
                if context & contexts.FAIL_ON_TEXT:
                    if this is self.END or not this.isspace():
                        return False
                else:
                    if this == "\n":
                        self._context |= contexts.FAIL_ON_TEXT
            elif this is not self.END or not this.isspace():
                self._context |= contexts.HAS_TEXT
            return True
        else:
            if context & contexts.FAIL_ON_EQUALS:
                if this == "=":
                    return False
            elif context & contexts.FAIL_ON_LBRACE:
                if this == "{" or (self._read(-1) == self._read(-2) == "{"):
                    if context & contexts.TEMPLATE:
                        self._context |= contexts.FAIL_ON_EQUALS
                    else:
                        self._context |= contexts.FAIL_NEXT
                    return True
                self._context ^= contexts.FAIL_ON_LBRACE
            elif context & contexts.FAIL_ON_RBRACE:
                if this == "}":
                    if context & contexts.TEMPLATE:
                        self._context |= contexts.FAIL_ON_EQUALS
                    else:
                        self._context |= contexts.FAIL_NEXT
                    return True
                self._context ^= contexts.FAIL_ON_RBRACE
            elif this == "{":
                self._context |= contexts.FAIL_ON_LBRACE
            elif this == "}":
                self._context |= contexts.FAIL_ON_RBRACE
            return True

    def _parse(self, context=0):
        """Parse the wikicode string, using *context* for when to stop."""
        self._push(context)
        while True:
            this = self._read()
            unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE |
                      contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME)
            if self._context & unsafe:
                if not self._verify_safe(this):
                    if self._context & contexts.TEMPLATE_PARAM_KEY:
                        self._pop()
                    self._fail_route()
            if this not in self.MARKERS:
                self._write_text(this)
                self._head += 1
@@ -449,6 +480,8 @@ class Tokenizer(object):
                    self._write_text(this)
            elif this == next == "{":
                self._parse_template_or_argument()
                if self._context & contexts.FAIL_NEXT:
                    self._context ^= contexts.FAIL_NEXT
            elif this == "|" and self._context & contexts.TEMPLATE:
                self._handle_template_param()
            elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
@@ -465,6 +498,8 @@ class Tokenizer(object):
            elif this == next == "[":
                if not self._context & contexts.WIKILINK_TITLE:
                    self._parse_wikilink()
                    if self._context & contexts.FAIL_NEXT:
                        self._context ^= contexts.FAIL_NEXT
                else:
                    self._write_text("[")
            elif this == "|" and self._context & contexts.WIKILINK_TITLE:
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -41,8 +41,23 @@ def inheritdoc(method):
    method.__doc__ = getattr(list, method.__name__).__doc__
    return method

 class _SliceNormalizerMixIn(object):
    """MixIn that provides a private method to normalize slices."""

 class SmartList(list):
    def _normalize_slice(self, key):
        """Return a slice equivalent to the input *key*, standardized."""
        if key.start is not None:
            start = (len(self) + key.start) if key.start < 0 else key.start
        else:
            start = 0
        if key.stop is not None:
            stop = (len(self) + key.stop) if key.stop < 0 else key.stop
        else:
            stop = maxsize
        return slice(start, stop, key.step or 1)


 class SmartList(_SliceNormalizerMixIn, list):
    """Implements the ``list`` interface with special handling of sublists.

    When a sublist is created (by ``list[i:j]``), any changes made to this
@@ -76,7 +91,8 @@ class SmartList(list):
    def __getitem__(self, key):
        if not isinstance(key, slice):
            return super(SmartList, self).__getitem__(key)
        sliceinfo = [key.start or 0, key.stop or 0, key.step or 1]
        key = self._normalize_slice(key)
        sliceinfo = [key.start, key.stop, key.step]
        child = _ListProxy(self, sliceinfo)
        self._children[id(child)] = (child, sliceinfo)
        return child
@@ -86,25 +102,28 @@ class SmartList(list):
            return super(SmartList, self).__setitem__(key, item)
        item = list(item)
        super(SmartList, self).__setitem__(key, item)
        diff = len(item) - key.stop + key.start
        key = self._normalize_slice(key)
        diff = len(item) + (key.start - key.stop) // key.step
        values = self._children.values if py3k else self._children.itervalues
        if diff:
            for child, (start, stop, step) in values():
                if start >= key.stop:
                if start > key.stop:
                    self._children[id(child)][1][0] += diff
                if stop >= key.stop and stop != maxsize:
                    self._children[id(child)][1][1] += diff

    def __delitem__(self, key):
        super(SmartList, self).__delitem__(key)
        if not isinstance(key, slice):
            key = slice(key, key + 1)
        diff = key.stop - key.start
        if isinstance(key, slice):
            key = self._normalize_slice(key)
        else:
            key = slice(key, key + 1, 1)
        diff = (key.stop - key.start) // key.step
        values = self._children.values if py3k else self._children.itervalues
        for child, (start, stop, step) in values():
            if start > key.start:
                self._children[id(child)][1][0] -= diff
            if stop >= key.stop:
            if stop >= key.stop and stop != maxsize:
                self._children[id(child)][1][1] -= diff

    if not py3k:
@@ -160,24 +179,35 @@ class SmartList(list):
            child._parent = copy
        super(SmartList, self).reverse()

    @inheritdoc
    def sort(self, cmp=None, key=None, reverse=None):
        copy = list(self)
        for child in self._children:
            child._parent = copy
        if cmp is not None:
    if py3k:
        @inheritdoc
        def sort(self, key=None, reverse=None):
            copy = list(self)
            for child in self._children:
                child._parent = copy
            kwargs = {}
            if key is not None:
                if reverse is not None:
                    super(SmartList, self).sort(cmp, key, reverse)
                else:
                    super(SmartList, self).sort(cmp, key)
            else:
                super(SmartList, self).sort(cmp)
        else:
            super(SmartList, self).sort()
                kwargs["key"] = key
            if reverse is not None:
                kwargs["reverse"] = reverse
            super(SmartList, self).sort(**kwargs)
    else:
        @inheritdoc
        def sort(self, cmp=None, key=None, reverse=None):
            copy = list(self)
            for child in self._children:
                child._parent = copy
            kwargs = {}
            if cmp is not None:
                kwargs["cmp"] = cmp
            if key is not None:
                kwargs["key"] = key
            if reverse is not None:
                kwargs["reverse"] = reverse
            super(SmartList, self).sort(**kwargs)


 class _ListProxy(list):
 class _ListProxy(_SliceNormalizerMixIn, list):
    """Implement the ``list`` interface by getting elements from a parent.

    This is created by a :py:class:`~.SmartList` object when slicing. It does
@@ -231,25 +261,52 @@ class _ListProxy(list):
            return bool(self._render())

    def __len__(self):
        return (self._stop - self._start) / self._step
        return (self._stop - self._start) // self._step

    def __getitem__(self, key):
        return self._render()[key]
        if isinstance(key, slice):
            key = self._normalize_slice(key)
            if key.stop == maxsize:
                keystop = self._stop
            else:
                keystop = key.stop + self._start
            adjusted = slice(key.start + self._start, keystop, key.step)
            return self._parent[adjusted]
        else:
            return self._render()[key]

    def __setitem__(self, key, item):
        if isinstance(key, slice):
            adjusted = slice(key.start + self._start, key.stop + self._stop,
                             key.step)
            key = self._normalize_slice(key)
            if key.stop == maxsize:
                keystop = self._stop
            else:
                keystop = key.stop + self._start
            adjusted = slice(key.start + self._start, keystop, key.step)
            self._parent[adjusted] = item
        else:
            length = len(self)
            if key < 0:
                key = length + key
            if key < 0 or key >= length:
                raise IndexError("list assignment index out of range")
            self._parent[self._start + key] = item

    def __delitem__(self, key):
        if isinstance(key, slice):
            adjusted = slice(key.start + self._start, key.stop + self._stop,
                             key.step)
            key = self._normalize_slice(key)
            if key.stop == maxsize:
                keystop = self._stop
            else:
                keystop = key.stop + self._start
            adjusted = slice(key.start + self._start, keystop, key.step)
            del self._parent[adjusted]
        else:
            length = len(self)
            if key < 0:
                key = length + key
            if key < 0 or key >= length:
                raise IndexError("list assignment index out of range")
            del self._parent[self._start + key]

    def __iter__(self):
@@ -287,6 +344,16 @@ class _ListProxy(list):
        self.extend(other)
        return self

    def __mul__(self, other):
        return SmartList(list(self) * other)

    def __rmul__(self, other):
        return SmartList(other * list(self))

    def __imul__(self, other):
        self.extend(list(self) * (other - 1))
        return self

    @property
    def _start(self):
        """The starting index of this list, inclusive."""
@@ -295,6 +362,8 @@ class _ListProxy(list):
    @property
    def _stop(self):
        """The ending index of this list, exclusive."""
        if self._sliceinfo[1] == maxsize:
            return len(self._parent)
        return self._sliceinfo[1]

    @property
@@ -328,18 +397,25 @@ class _ListProxy(list):

    @inheritdoc
    def insert(self, index, item):
        if index < 0:
            index = len(self) + index
        self._parent.insert(self._start + index, item)

    @inheritdoc
    def pop(self, index=None):
        length = len(self)
        if index is None:
            index = len(self) - 1
            index = length - 1
        elif index < 0:
            index = length + index
        if index < 0 or index >= length:
            raise IndexError("pop index out of range")
        return self._parent.pop(self._start + index)

    @inheritdoc
    def remove(self, item):
        index = self.index(item)
        del self._parent[index]
        del self._parent[self._start + index]

    @inheritdoc
    def reverse(self):
@@ -347,17 +423,30 @@ class _ListProxy(list):
        item.reverse()
        self._parent[self._start:self._stop:self._step] = item

    @inheritdoc
    def sort(self, cmp=None, key=None, reverse=None):
        item = self._render()
        if cmp is not None:
    if py3k:
        @inheritdoc
        def sort(self, key=None, reverse=None):
            item = self._render()
            kwargs = {}
            if key is not None:
                if reverse is not None:
                    item.sort(cmp, key, reverse)
                else:
                    item.sort(cmp, key)
            else:
                item.sort(cmp)
        else:
            item.sort()
        self._parent[self._start:self._stop:self._step] = item
                kwargs["key"] = key
            if reverse is not None:
                kwargs["reverse"] = reverse
            item.sort(**kwargs)
            self._parent[self._start:self._stop:self._step] = item
    else:
        @inheritdoc
        def sort(self, cmp=None, key=None, reverse=None):
            item = self._render()
            kwargs = {}
            if cmp is not None:
                kwargs["cmp"] = cmp
            if key is not None:
                kwargs["key"] = key
            if reverse is not None:
                kwargs["reverse"] = reverse
            item.sort(**kwargs)
            self._parent[self._start:self._stop:self._step] = item


 del inheritdoc
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -114,6 +114,9 @@ class StringMixIn(object):
    def __getitem__(self, key):
        return self.__unicode__()[key]

    def __reversed__(self):
        return reversed(self.__unicode__())

    def __contains__(self, item):
        if isinstance(item, StringMixIn):
            return str(item) in self.__unicode__()
@@ -123,6 +126,11 @@ class StringMixIn(object):
    def capitalize(self):
        return self.__unicode__().capitalize()

    if py3k:
        @inheritdoc
        def casefold(self):
            return self.__unicode__().casefold()

    @inheritdoc
    def center(self, width, fillchar=None):
        if fillchar is None:
@@ -136,19 +144,21 @@ class StringMixIn(object):
    if not py3k:
        @inheritdoc
        def decode(self, encoding=None, errors=None):
            if errors is None:
                if encoding is None:
                    return self.__unicode__().decode()
                return self.__unicode__().decode(encoding)
            return self.__unicode__().decode(encoding, errors)
            kwargs = {}
            if encoding is not None:
                kwargs["encoding"] = encoding
            if errors is not None:
                kwargs["errors"] = errors
            return self.__unicode__().decode(**kwargs)

    @inheritdoc
    def encode(self, encoding=None, errors=None):
        if errors is None:
            if encoding is None:
                return self.__unicode__().encode()
            return self.__unicode__().encode(encoding)
        return self.__unicode__().encode(encoding, errors)
        kwargs = {}
        if encoding is not None:
            kwargs["encoding"] = encoding
        if errors is not None:
            kwargs["errors"] = errors
        return self.__unicode__().encode(**kwargs)

    @inheritdoc
    def endswith(self, prefix, start=None, end=None):
@@ -168,6 +178,11 @@ class StringMixIn(object):
    def format(self, *args, **kwargs):
        return self.__unicode__().format(*args, **kwargs)

    if py3k:
        @inheritdoc
        def format_map(self, mapping):
            return self.__unicode__().format_map(mapping)

    @inheritdoc
    def index(self, sub, start=None, end=None):
        return self.__unicode__().index(sub, start, end)
@@ -188,6 +203,11 @@ class StringMixIn(object):
    def isdigit(self):
        return self.__unicode__().isdigit()

    if py3k:
        @inheritdoc
        def isidentifier(self):
            return self.__unicode__().isidentifier()

    @inheritdoc
    def islower(self):
        return self.__unicode__().islower()
@@ -196,6 +216,11 @@ class StringMixIn(object):
    def isnumeric(self):
        return self.__unicode__().isnumeric()

    if py3k:
        @inheritdoc
        def isprintable(self):
            return self.__unicode__().isprintable()

    @inheritdoc
    def isspace(self):
        return self.__unicode__().isspace()
@@ -226,12 +251,24 @@ class StringMixIn(object):
    def lstrip(self, chars=None):
        return self.__unicode__().lstrip(chars)

    if py3k:
        @staticmethod
        @inheritdoc
        def maketrans(self, x, y=None, z=None):
            if z is None:
                if y is None:
                    return self.__unicode__.maketrans(x)
                return self.__unicode__.maketrans(x, y)
            return self.__unicode__.maketrans(x, y, z)

    @inheritdoc
    def partition(self, sep):
        return self.__unicode__().partition(sep)

    @inheritdoc
    def replace(self, old, new, count):
    def replace(self, old, new, count=None):
        if count is None:
            return self.__unicode__().replace(old, new)
        return self.__unicode__().replace(old, new, count)

    @inheritdoc
@@ -252,25 +289,45 @@ class StringMixIn(object):
    def rpartition(self, sep):
        return self.__unicode__().rpartition(sep)

    @inheritdoc
    def rsplit(self, sep=None, maxsplit=None):
        if maxsplit is None:
            if sep is None:
                return self.__unicode__().rsplit()
            return self.__unicode__().rsplit(sep)
        return self.__unicode__().rsplit(sep, maxsplit)
    if py3k:
        @inheritdoc
        def rsplit(self, sep=None, maxsplit=None):
            kwargs = {}
            if sep is not None:
                kwargs["sep"] = sep
            if maxsplit is not None:
                kwargs["maxsplit"] = maxsplit
            return self.__unicode__().rsplit(**kwargs)
    else:
        @inheritdoc
        def rsplit(self, sep=None, maxsplit=None):
            if maxsplit is None:
                if sep is None:
                    return self.__unicode__().rsplit()
                return self.__unicode__().rsplit(sep)
            return self.__unicode__().rsplit(sep, maxsplit)

    @inheritdoc
    def rstrip(self, chars=None):
        return self.__unicode__().rstrip(chars)

    @inheritdoc
    def split(self, sep=None, maxsplit=None):
        if maxsplit is None:
            if sep is None:
                return self.__unicode__().split()
            return self.__unicode__().split(sep)
        return self.__unicode__().split(sep, maxsplit)
    if py3k:
        @inheritdoc
        def split(self, sep=None, maxsplit=None):
            kwargs = {}
            if sep is not None:
                kwargs["sep"] = sep
            if maxsplit is not None:
                kwargs["maxsplit"] = maxsplit
            return self.__unicode__().split(**kwargs)
    else:
        @inheritdoc
        def split(self, sep=None, maxsplit=None):
            if maxsplit is None:
                if sep is None:
                    return self.__unicode__().split()
                return self.__unicode__().split(sep)
            return self.__unicode__().split(sep, maxsplit)

    @inheritdoc
    def splitlines(self, keepends=None):
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,7 @@
 from setuptools import setup, find_packages, Extension

 from mwparserfromhell import __version__
 from mwparserfromhell.compat import py3k

 with open("README.rst") as fp:
    long_docs = fp.read()
@@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer",
 setup(
    name = "mwparserfromhell",
    packages = find_packages(exclude=("tests",)),
    ext_modules = [tokenizer],
    ext_modules = [] if py3k else [tokenizer],
    test_suite = "tests",
    version = __version__,
    author = "Ben Kurtovic",
--- a/tests/MWPFHTestCase.tmlanguage
+++ b/tests/MWPFHTestCase.tmlanguage
@@ -0,0 +1,130 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
 	<key>fileTypes</key>
 	<array>
 		<string>mwtest</string>
 	</array>
 	<key>name</key>
 	<string>MWParserFromHell Test Case</string>
 	<key>patterns</key>
 	<array>
 		<dict>
 			<key>match</key>
 			<string>---</string>
 			<key>name</key>
 			<string>markup.heading.divider.mwpfh</string>
 		</dict>
 		<dict>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
 				<dict>
 					<key>name</key>
 					<string>keyword.other.name.mwpfh</string>
 				</dict>
 				<key>2</key>
 				<dict>
 					<key>name</key>
 					<string>variable.other.name.mwpfh</string>
 				</dict>
 			</dict>
 			<key>match</key>
 			<string>(name:)\s*(\w*)</string>
 			<key>name</key>
 			<string>meta.name.mwpfh</string>
 		</dict>
 		<dict>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
 				<dict>
 					<key>name</key>
 					<string>keyword.other.label.mwpfh</string>
 				</dict>
 				<key>2</key>
 				<dict>
 					<key>name</key>
 					<string>comment.line.other.label.mwpfh</string>
 				</dict>
 			</dict>
 			<key>match</key>
 			<string>(label:)\s*(.*)</string>
 			<key>name</key>
 			<string>meta.label.mwpfh</string>
 		</dict>
 		<dict>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
 				<dict>
 					<key>name</key>
 					<string>keyword.other.input.mwpfh</string>
 				</dict>
 				<key>2</key>
 				<dict>
 					<key>name</key>
 					<string>string.quoted.double.input.mwpfh</string>
 				</dict>
 			</dict>
 			<key>match</key>
 			<string>(input:)\s*(.*)</string>
 			<key>name</key>
 			<string>meta.input.mwpfh</string>
 		</dict>
 		<dict>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
 				<dict>
 					<key>name</key>
 					<string>keyword.other.output.mwpfh</string>
 				</dict>
 			</dict>
 			<key>match</key>
 			<string>(output:)</string>
 			<key>name</key>
 			<string>meta.output.mwpfh</string>
 		</dict>
 		<dict>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
 				<dict>
 					<key>name</key>
 					<string>support.language.token.mwpfh</string>
 				</dict>
 			</dict>
 			<key>match</key>
 			<string>(\w+)\s*\(</string>
 			<key>name</key>
 			<string>meta.name.token.mwpfh</string>
 		</dict>
 		<dict>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
 				<dict>
 					<key>name</key>
 					<string>variable.parameter.token.mwpfh</string>
 				</dict>
 			</dict>
 			<key>match</key>
 			<string>(\w+)\s*(=)</string>
 			<key>name</key>
 			<string>meta.name.parameter.token.mwpfh</string>
 		</dict>
 		<dict>
 			<key>match</key>
 			<string>".*?"</string>
 			<key>name</key>
 			<string>string.quoted.double.mwpfh</string>
 		</dict>
 	</array>
 	<key>scopeName</key>
 	<string>text.mwpfh</string>
 	<key>uuid</key>
 	<string>cd3e2ffa-a57d-4c40-954f-1a2e87ffd638</string>
 </dict>
 </plist>
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -0,0 +1,121 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import print_function, unicode_literals
 from os import listdir, path

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.parser import tokens

 class _TestParseError(Exception):
    """Raised internally when a test could not be parsed."""
    pass


 class TokenizerTestCase(object):
    """A base test case for tokenizers, whose tests are loaded dynamically.

    Subclassed along with unittest.TestCase to form TestPyTokenizer and
    TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer'
    directory.
    """

    @classmethod
    def _build_test_method(cls, funcname, data):
        """Create and return a method to be treated as a test case method.

        *data* is a dict containing multiple keys: the *input* text to be
        tokenized, the expected list of tokens as *output*, and an optional
        *label* for the method's docstring.
        """
        def inner(self):
            expected = data["output"]
            actual = self.tokenizer().tokenize(data["input"])
            self.assertEqual(expected, actual)
        if not py3k:
            inner.__name__ = funcname.encode("utf8")
        inner.__doc__ = data["label"]
        return inner

    @classmethod
    def _load_tests(cls, filename, text):
        """Load all tests in *text* from the file *filename*."""
        tests = text.split("\n---\n")
        counter = 1
        digits = len(str(len(tests)))
        for test in tests:
            data = {"name": None, "label": None, "input": None, "output": None}
            try:
                for line in test.strip().splitlines():
                    if line.startswith("name:"):
                        data["name"] = line[len("name:"):].strip()
                    elif line.startswith("label:"):
                        data["label"] = line[len("label:"):].strip()
                    elif line.startswith("input:"):
                        raw = line[len("input:"):].strip()
                        if raw[0] == '"' and raw[-1] == '"':
                            raw = raw[1:-1]
                        raw = raw.encode("raw_unicode_escape")
                        data["input"] = raw.decode("unicode_escape")
                    elif line.startswith("output:"):
                        raw = line[len("output:"):].strip()
                        try:
                            data["output"] = eval(raw, vars(tokens))
                        except Exception as err:
                            raise _TestParseError(err)
            except _TestParseError as err:
                if data["name"]:
                    error = "Could not parse test '{0}' in '{1}':\n\t{2}"
                    print(error.format(data["name"], filename, err))
                else:
                    error = "Could not parse a test in '{0}':\n\t{1}"
                    print(error.format(filename, err))
                continue
            if not data["name"]:
                error = "A test in '{0}' was ignored because it lacked a name"
                print(error.format(filename))
                continue
            if data["input"] is None or data["output"] is None:
                error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
                print(error.format(data["name"], filename))
                continue
            number = str(counter).zfill(digits)
            fname = "test_{0}{1}_{2}".format(filename, number, data["name"])
            meth = cls._build_test_method(fname, data)
            setattr(cls, fname, meth)
            counter += 1

    @classmethod
    def build(cls):
        """Load and install all tests from the 'tokenizer' directory."""
        directory = path.join(path.dirname(__file__), "tokenizer")
        extension = ".mwtest"
        for filename in listdir(directory):
            if not filename.endswith(extension):
                continue
            with open(path.join(directory, filename), "r") as fp:
                text = fp.read()
                if not py3k:
                    text = text.decode("utf8")
                cls._load_tests(filename[:0-len(extension)], text)

 TokenizerTestCase.build()
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -0,0 +1,113 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 from unittest import TestCase

 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                    Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
 from mwparserfromhell.wikicode import Wikicode

 class TreeEqualityTestCase(TestCase):
    """A base test case with support for comparing the equality of node trees.

    This adds a number of type equality functions, for Wikicode, Text,
    Templates, and Wikilinks.
    """

    def assertNodeEqual(self, expected, actual):
        """Assert that two Nodes have the same type and have the same data."""
        registry = {
            Argument: self.assertArgumentNodeEqual,
            Comment: self.assertCommentNodeEqual,
            Heading: self.assertHeadingNodeEqual,
            HTMLEntity: self.assertHTMLEntityNodeEqual,
            Tag: self.assertTagNodeEqual,
            Template: self.assertTemplateNodeEqual,
            Text: self.assertTextNodeEqual,
            Wikilink: self.assertWikilinkNodeEqual
        }
        for nodetype in registry:
            if isinstance(expected, nodetype):
                self.assertIsInstance(actual, nodetype)
                registry[nodetype](expected, actual)

    def assertArgumentNodeEqual(self, expected, actual):
        """Assert that two Argument nodes have the same data."""
        self.assertWikicodeEqual(expected.name, actual.name)
        if expected.default is not None:
            self.assertWikicodeEqual(expected.default, actual.default)
        else:
            self.assertIs(None, actual.default)

    def assertCommentNodeEqual(self, expected, actual):
        """Assert that two Comment nodes have the same data."""
        self.assertWikicodeEqual(expected.contents, actual.contents)

    def assertHeadingNodeEqual(self, expected, actual):
        """Assert that two Heading nodes have the same data."""
        self.assertWikicodeEqual(expected.title, actual.title)
        self.assertEqual(expected.level, actual.level)

    def assertHTMLEntityNodeEqual(self, expected, actual):
        """Assert that two HTMLEntity nodes have the same data."""
        self.assertEqual(expected.value, actual.value)
        self.assertIs(expected.named, actual.named)
        self.assertIs(expected.hexadecimal, actual.hexadecimal)
        self.assertEqual(expected.hex_char, actual.hex_char)

    def assertTagNodeEqual(self, expected, actual):
        """Assert that two Tag nodes have the same data."""
        self.fail("Holding this until feature/html_tags is ready.")

    def assertTemplateNodeEqual(self, expected, actual):
        """Assert that two Template nodes have the same data."""
        self.assertWikicodeEqual(expected.name, actual.name)
        length = len(expected.params)
        self.assertEqual(length, len(actual.params))
        for i in range(length):
            exp_param = expected.params[i]
            act_param = actual.params[i]
            self.assertWikicodeEqual(exp_param.name, act_param.name)
            self.assertWikicodeEqual(exp_param.value, act_param.value)
            self.assertIs(exp_param.showkey, act_param.showkey)

    def assertTextNodeEqual(self, expected, actual):
        """Assert that two Text nodes have the same data."""
        self.assertEqual(expected.value, actual.value)

    def assertWikilinkNodeEqual(self, expected, actual):
        """Assert that two Wikilink nodes have the same data."""
        self.assertWikicodeEqual(expected.title, actual.title)
        if expected.text is not None:
            self.assertWikicodeEqual(expected.text, actual.text)
        else:
            self.assertIs(None, actual.text)

    def assertWikicodeEqual(self, expected, actual):
        """Assert that two Wikicode objects have the same data."""
        self.assertIsInstance(actual, Wikicode)
        length = len(expected.nodes)
        self.assertEqual(length, len(actual.nodes))
        for i in range(length):
            self.assertNodeEqual(expected.get(i), actual.get(i))
--- a/tests/compat.py
+++ b/tests/compat.py
@@ -0,0 +1,20 @@
 # -*- coding: utf-8 -*-

 """
 Serves the same purpose as mwparserfromhell.compat, but only for objects
 required by unit tests. This avoids unnecessary imports (like urllib) within
 the main library.
 """

 from mwparserfromhell.compat import py3k

 if py3k:
    range = range
    from io import StringIO
    from urllib.parse import urlencode
    from urllib.request import urlopen

 else:
    range = xrange
    from StringIO import StringIO
    from urllib import urlencode, urlopen
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -0,0 +1,261 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                    Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
 from mwparserfromhell.parser import tokens
 from mwparserfromhell.parser.builder import Builder
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode

 from ._test_tree_equality import TreeEqualityTestCase

 wrap = lambda L: Wikicode(SmartList(L))

 class TestBuilder(TreeEqualityTestCase):
    """Tests for the builder, which turns tokens into Wikicode objects."""

    def setUp(self):
        self.builder = Builder()

    def test_text(self):
        """tests for building Text nodes"""
        tests = [
            ([tokens.Text(text="foobar")], wrap([Text("foobar")])),
            ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])),
            ([tokens.Text(text="spam"), tokens.Text(text="eggs")],
             wrap([Text("spam"), Text("eggs")])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_template(self):
        """tests for building Template nodes"""
        tests = [
            ([tokens.TemplateOpen(), tokens.Text(text="foobar"),
              tokens.TemplateClose()],
             wrap([Template(wrap([Text("foobar")]))])),

            ([tokens.TemplateOpen(), tokens.Text(text="spam"),
              tokens.Text(text="eggs"), tokens.TemplateClose()],
             wrap([Template(wrap([Text("spam"), Text("eggs")]))])),

            ([tokens.TemplateOpen(), tokens.Text(text="foo"),
              tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
              tokens.TemplateClose()],
             wrap([Template(wrap([Text("foo")]), params=[
                 Parameter(wrap([Text("1")]), wrap([Text("bar")]),
                           showkey=False)])])),

            ([tokens.TemplateOpen(), tokens.Text(text="foo"),
              tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
              tokens.TemplateParamEquals(), tokens.Text(text="baz"),
              tokens.TemplateClose()],
             wrap([Template(wrap([Text("foo")]), params=[
                 Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])),

            ([tokens.TemplateOpen(), tokens.Text(text="foo"),
              tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
              tokens.TemplateParamEquals(), tokens.Text(text="baz"),
              tokens.TemplateParamSeparator(), tokens.Text(text="biz"),
              tokens.TemplateParamSeparator(), tokens.Text(text="buzz"),
              tokens.TemplateParamSeparator(), tokens.Text(text="3"),
              tokens.TemplateParamEquals(), tokens.Text(text="buff"),
              tokens.TemplateParamSeparator(), tokens.Text(text="baff"),
              tokens.TemplateClose()],
             wrap([Template(wrap([Text("foo")]), params=[
                 Parameter(wrap([Text("bar")]), wrap([Text("baz")])),
                 Parameter(wrap([Text("1")]), wrap([Text("biz")]),
                           showkey=False),
                 Parameter(wrap([Text("2")]), wrap([Text("buzz")]),
                           showkey=False),
                 Parameter(wrap([Text("3")]), wrap([Text("buff")])),
                 Parameter(wrap([Text("3")]), wrap([Text("baff")]),
                           showkey=False)])])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_argument(self):
        """tests for building Argument nodes"""
        tests = [
            ([tokens.ArgumentOpen(), tokens.Text(text="foobar"),
              tokens.ArgumentClose()],
             wrap([Argument(wrap([Text("foobar")]))])),

            ([tokens.ArgumentOpen(), tokens.Text(text="spam"),
              tokens.Text(text="eggs"), tokens.ArgumentClose()],
             wrap([Argument(wrap([Text("spam"), Text("eggs")]))])),

            ([tokens.ArgumentOpen(), tokens.Text(text="foo"),
              tokens.ArgumentSeparator(), tokens.Text(text="bar"),
              tokens.ArgumentClose()],
             wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])),

            ([tokens.ArgumentOpen(), tokens.Text(text="foo"),
              tokens.Text(text="bar"), tokens.ArgumentSeparator(),
              tokens.Text(text="baz"), tokens.Text(text="biz"),
              tokens.ArgumentClose()],
             wrap([Argument(wrap([Text("foo"), Text("bar")]),
                            wrap([Text("baz"), Text("biz")]))])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_wikilink(self):
        """tests for building Wikilink nodes"""
        tests = [
            ([tokens.WikilinkOpen(), tokens.Text(text="foobar"),
              tokens.WikilinkClose()],
             wrap([Wikilink(wrap([Text("foobar")]))])),

            ([tokens.WikilinkOpen(), tokens.Text(text="spam"),
              tokens.Text(text="eggs"), tokens.WikilinkClose()],
             wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])),

            ([tokens.WikilinkOpen(), tokens.Text(text="foo"),
              tokens.WikilinkSeparator(), tokens.Text(text="bar"),
              tokens.WikilinkClose()],
             wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])),

            ([tokens.WikilinkOpen(), tokens.Text(text="foo"),
              tokens.Text(text="bar"), tokens.WikilinkSeparator(),
              tokens.Text(text="baz"), tokens.Text(text="biz"),
              tokens.WikilinkClose()],
             wrap([Wikilink(wrap([Text("foo"), Text("bar")]),
                            wrap([Text("baz"), Text("biz")]))])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_html_entity(self):
        """tests for building HTMLEntity nodes"""
        tests = [
            ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"),
              tokens.HTMLEntityEnd()],
             wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])),

            ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(),
              tokens.Text(text="107"), tokens.HTMLEntityEnd()],
             wrap([HTMLEntity("107", named=False, hexadecimal=False)])),

            ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(),
              tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"),
              tokens.HTMLEntityEnd()],
             wrap([HTMLEntity("6B", named=False, hexadecimal=True,
                              hex_char="X")])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_heading(self):
        """tests for building Heading nodes"""
        tests = [
            ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"),
              tokens.HeadingEnd()],
             wrap([Heading(wrap([Text("foobar")]), 2)])),

            ([tokens.HeadingStart(level=4), tokens.Text(text="spam"),
              tokens.Text(text="eggs"), tokens.HeadingEnd()],
             wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_comment(self):
        """tests for building Comment nodes"""
        tests = [
            ([tokens.CommentStart(), tokens.Text(text="foobar"),
              tokens.CommentEnd()],
             wrap([Comment(wrap([Text("foobar")]))])),

            ([tokens.CommentStart(), tokens.Text(text="spam"),
              tokens.Text(text="eggs"), tokens.CommentEnd()],
             wrap([Comment(wrap([Text("spam"), Text("eggs")]))])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    @unittest.skip("holding this until feature/html_tags is ready")
    def test_tag(self):
        """tests for building Tag nodes"""
        pass

    def test_integration(self):
        """a test for building a combination of templates together"""
        # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}
        test = [tokens.TemplateOpen(), tokens.TemplateOpen(),
                tokens.TemplateOpen(), tokens.TemplateOpen(),
                tokens.Text(text="foo"), tokens.TemplateClose(),
                tokens.Text(text="bar"), tokens.TemplateParamSeparator(),
                tokens.Text(text="baz"), tokens.TemplateParamEquals(),
                tokens.Text(text="biz"), tokens.TemplateClose(),
                tokens.Text(text="buzz"), tokens.TemplateClose(),
                tokens.Text(text="usr"), tokens.TemplateParamSeparator(),
                tokens.TemplateOpen(), tokens.Text(text="bin"),
                tokens.TemplateClose(), tokens.TemplateClose()]
        valid = wrap(
            [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text(
            "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]),
            wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[
            Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]),
            showkey=False)])])
        self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_integration2(self):
        """an even more audacious test for building a horrible wikicode mess"""
        # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j=&nbsp;}}
        test = [tokens.TemplateOpen(), tokens.Text(text="a"),
                tokens.TemplateParamSeparator(), tokens.Text(text="b"),
                tokens.TemplateParamSeparator(), tokens.TemplateOpen(),
                tokens.Text(text="c"), tokens.TemplateParamSeparator(),
                tokens.WikilinkOpen(), tokens.Text(text="d"),
                tokens.WikilinkClose(), tokens.ArgumentOpen(),
                tokens.Text(text="e"), tokens.ArgumentClose(),
                tokens.TemplateClose(), tokens.TemplateClose(),
                tokens.WikilinkOpen(), tokens.Text(text="f"),
                tokens.WikilinkSeparator(), tokens.ArgumentOpen(),
                tokens.Text(text="g"), tokens.ArgumentClose(),
                tokens.CommentStart(), tokens.Text(text="h"),
                tokens.CommentEnd(), tokens.WikilinkClose(),
                tokens.TemplateOpen(), tokens.Text(text="i"),
                tokens.TemplateParamSeparator(), tokens.Text(text="j"),
                tokens.TemplateParamEquals(), tokens.HTMLEntityStart(),
                tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(),
                tokens.TemplateClose()]
        valid = wrap(
            [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]),
            wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]),
            wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1")
            ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))]
            ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")]
            ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))])
            ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]),
            wrap([HTMLEntity("nbsp", named=True)]))])])
        self.assertWikicodeEqual(valid, self.builder.build(test))

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -0,0 +1,47 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    from mwparserfromhell.parser._tokenizer import CTokenizer
 except ImportError:
    CTokenizer = None

 from ._test_tokenizer import TokenizerTestCase

@unittest.skipUnless(CTokenizer, "C tokenizer not available")
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
    """Test cases for the C tokenizer."""

    @classmethod
    def setUpClass(cls):
        cls.tokenizer = CTokenizer

    def test_uses_c(self):
        """make sure the C tokenizer identifies as using a C extension"""
        self.assertTrue(CTokenizer.USES_C)
        self.assertTrue(CTokenizer().USES_C)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -0,0 +1,131 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import print_function, unicode_literals
 import json
 import unittest

 import mwparserfromhell
 from mwparserfromhell.compat import py3k, str

 from .compat import StringIO, urlencode, urlopen

 class TestDocs(unittest.TestCase):
    """Integration test cases for mwparserfromhell's documentation."""

    def assertPrint(self, input, output):
        """Assertion check that *input*, when printed, produces *output*."""
        buff = StringIO()
        print(input, end="", file=buff)
        buff.seek(0)
        self.assertEqual(output, buff.read())

    def test_readme_1(self):
        """test a block of example code in the README"""
        text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
        wikicode = mwparserfromhell.parse(text)
        self.assertPrint(wikicode,
                         "I has a template! {{foo|bar|baz|eggs=spam}} See it?")
        templates = wikicode.filter_templates()
        if py3k:
            self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']")
        else:
            self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']")
        template = templates[0]
        self.assertPrint(template.name, "foo")
        if py3k:
            self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']")
        else:
            self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']")
        self.assertPrint(template.get(1).value, "bar")
        self.assertPrint(template.get("eggs").value, "spam")

    def test_readme_2(self):
        """test a block of example code in the README"""
        code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
        if py3k:
            self.assertPrint(code.filter_templates(),
                             "['{{foo|this {{includes a|template}}}}']")
        else:
            self.assertPrint(code.filter_templates(),
                             "[u'{{foo|this {{includes a|template}}}}']")
        foo = code.filter_templates()[0]
        self.assertPrint(foo.get(1).value, "this {{includes a|template}}")
        self.assertPrint(foo.get(1).value.filter_templates()[0],
                         "{{includes a|template}}")
        self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value,
                         "template")

    def test_readme_3(self):
        """test a block of example code in the README"""
        text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
        temps = mwparserfromhell.parse(text).filter_templates(recursive=True)
        if py3k:
            res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']"
        else:
            res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']"
        self.assertPrint(temps, res)

    def test_readme_4(self):
        """test a block of example code in the README"""
        text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
        code = mwparserfromhell.parse(text)
        for template in code.filter_templates():
            if template.name == "cleanup" and not template.has_param("date"):
                template.add("date", "July 2012")
        res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}"
        self.assertPrint(code, res)
        code.replace("{{uncategorized}}", "{{bar-stub}}")
        res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"
        self.assertPrint(code, res)
        if py3k:
            res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']"
        else:
            res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']"
        self.assertPrint(code.filter_templates(), res)
        text = str(code)
        res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"
        self.assertPrint(text, res)
        self.assertEqual(text, code)

    def test_readme_5(self):
        """test a block of example code in the README; includes a web call"""
        url1 = "http://en.wikipedia.org/w/api.php"
        url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw"
        title = "Test"
        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                "rvprop": "content", "format": "json", "titles": title}
        try:
            raw = urlopen(url1, urlencode(data).encode("utf8")).read()
        except IOError:
            self.skipTest("cannot continue because of unsuccessful web call")
        res = json.loads(raw.decode("utf8"))
        text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
        try:
            expected = urlopen(url2.format(title)).read().decode("utf8")
        except IOError:
            self.skipTest("cannot continue because of unsuccessful web call")
        actual = mwparserfromhell.parse(text)
        self.assertEqual(expected, actual)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -1,119 +0,0 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import unittest

 from mwparserfromhell.parameter import Parameter
 from mwparserfromhell.template import Template

 class TestParameter(unittest.TestCase):
    def setUp(self):
        self.name = "foo"
        self.value1 = "bar"
        self.value2 = "{{spam}}"
        self.value3 = "bar{{spam}}"
        self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here"
        self.templates2 = [Template("spam")]
        self.templates3 = [Template("spam")]
        self.templates4 = [Template("eggs", [Parameter("1", "spam"),
                                             Parameter("baz", "buz")]),
                           Template("goes")]

    def test_construct(self):
        Parameter(self.name, self.value1)
        Parameter(self.name, self.value2, self.templates2)
        Parameter(name=self.name, value=self.value3)
        Parameter(name=self.name, value=self.value4, templates=self.templates4)

    def test_name(self):
        params = [
            Parameter(self.name, self.value1),
            Parameter(self.name, self.value2, self.templates2),
            Parameter(name=self.name, value=self.value3),
            Parameter(name=self.name, value=self.value4,
                      templates=self.templates4)
        ]
        for param in params:
            self.assertEqual(param.name, self.name)

    def test_value(self):
        tests = [
            (Parameter(self.name, self.value1), self.value1),
            (Parameter(self.name, self.value2, self.templates2), self.value2),
            (Parameter(name=self.name, value=self.value3), self.value3),
            (Parameter(name=self.name, value=self.value4,
                       templates=self.templates4), self.value4)
        ]
        for param, correct in tests:
            self.assertEqual(param.value, correct)

    def test_templates(self):
        tests = [
            (Parameter(self.name, self.value3, self.templates3),
             self.templates3),
            (Parameter(name=self.name, value=self.value4,
                       templates=self.templates4), self.templates4)
        ]
        for param, correct in tests:
            self.assertEqual(param.templates, correct)

    def test_magic(self):
        params = [Parameter(self.name, self.value1),
                  Parameter(self.name, self.value2, self.templates2),
                  Parameter(self.name, self.value3, self.templates3),
                  Parameter(self.name, self.value4, self.templates4)]
        for param in params:
            self.assertEqual(repr(param), repr(param.value))
            self.assertEqual(str(param), str(param.value))
            self.assertIs(param < "eggs", param.value < "eggs")
            self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}")
            self.assertIs(param == "bar", param.value == "bar")
            self.assertIs(param != "bar", param.value != "bar")
            self.assertIs(param > "eggs", param.value > "eggs")
            self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}")
            self.assertEquals(bool(param), bool(param.value))
            self.assertEquals(len(param), len(param.value))
            self.assertEquals(list(param), list(param.value))
            self.assertEquals(param[2], param.value[2])
            self.assertEquals(list(reversed(param)),
                              list(reversed(param.value)))
            self.assertIs("bar" in param, "bar" in param.value)
            self.assertEquals(param + "test", param.value + "test")
            self.assertEquals("test" + param, "test" + param.value)
            # add param
            # add template left
            # add template right

            self.assertEquals(param * 3, Parameter(param.name, param.value * 3,
                                                   param.templates * 3))
            self.assertEquals(3 * param, Parameter(param.name, 3 * param.value,
                                                   3 * param.templates))

            # add param inplace
            # add template implace
            # add str inplace
            # multiply int inplace
            self.assertIsInstance(param, Parameter)
            self.assertIsInstance(param.value, str)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -20,44 +20,50 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.parameter import Parameter
 from mwparserfromhell.parser import Parser
 from mwparserfromhell.template import Template
 from mwparserfromhell import parser
 from mwparserfromhell.nodes import Template, Text, Wikilink
 from mwparserfromhell.nodes.extras import Parameter
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode

 TESTS = [
    ("", []),
    ("abcdef ghijhk", []),
    ("abc{this is not a template}def", []),
    ("neither is {{this one}nor} {this one {despite}} containing braces", []),
    ("this is an acceptable {{template}}", [Template("template")]),
    ("{{multiple}}{{templates}}", [Template("multiple"),
                                   Template("templates")]),
    ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]),
    ("{{{no templates here}}}", []),
    ("{ {{templates here}}}", [Template("templates here")]),
    ("{{{{I do not exist}}}}", []),
    ("{{foo|bar|baz|eggs=spam}}",
     [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"),
                       Parameter("eggs", "spam")])]),
    ("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}",
     [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"),
                           Parameter("2", "pqr"), Parameter("st", "uv"),
                           Parameter("3", "wx"), Parameter("4", "yz")])]),
    ("{{this has a|{{template}}|inside of it}}",
     [Template("this has a", [Parameter("1", "{{template}}",
                                        [Template("template")]),
                              Parameter("2", "inside of it")])]),
    ("{{{{I exist}} }}", [Template("I exist", [] )]),
    ("{{}}")
 ]
 from ._test_tree_equality import TreeEqualityTestCase
 from .compat import range

 class TestParser(unittest.TestCase):
    def test_parse(self):
        parser = Parser()
        for unparsed, parsed in TESTS:
            self.assertEqual(parser.parse(unparsed), parsed)
 class TestParser(TreeEqualityTestCase):
    """Tests for the Parser class itself, which tokenizes and builds nodes."""

    def test_use_c(self):
        """make sure the correct tokenizer is used"""
        if parser.use_c:
            self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
            parser.use_c = False
        self.assertFalse(parser.Parser(None)._tokenizer.USES_C)

    def test_parsing(self):
        """integration test for parsing overall"""
        text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}"
        wrap = lambda L: Wikicode(SmartList(L))
        expected = wrap([
            Text("this is text; "),
            Template(wrap([Text("this")]), [
                Parameter(wrap([Text("is")]), wrap([Text("a")])),
                Parameter(wrap([Text("template")]), wrap([
                    Template(wrap([Text("with")]), [
                        Parameter(wrap([Text("1")]),
                                  wrap([Wikilink(wrap([Text("links")]))]),
                                  showkey=False),
                        Parameter(wrap([Text("2")]),
                                  wrap([Text("in")]), showkey=False)
                    ]),
                    Text("it")
                ]))
            ])
        ])
        actual = parser.Parser(text).parse()
        self.assertWikicodeEqual(expected, actual)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -0,0 +1,43 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.parser.tokenizer import Tokenizer

 from ._test_tokenizer import TokenizerTestCase

 class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
    """Test cases for the Python tokenizer."""

    @classmethod
    def setUpClass(cls):
        cls.tokenizer = Tokenizer

    def test_uses_c(self):
        """make sure the Python tokenizer identifies as not using C"""
        self.assertFalse(Tokenizer.USES_C)
        self.assertFalse(Tokenizer().USES_C)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -0,0 +1,392 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.smart_list import SmartList, _ListProxy

 from .compat import range

 class TestSmartList(unittest.TestCase):
    """Test cases for the SmartList class and its child, _ListProxy."""

    def _test_get_set_del_item(self, builder):
        """Run tests on __get/set/delitem__ of a list built with *builder*."""
        def assign(L, s1, s2, s3, val):
            L[s1:s2:s3] = val
        def delete(L, s1):
            del L[s1]

        list1 = builder([0, 1, 2, 3, "one", "two"])
        list2 = builder(list(range(10)))

        self.assertEqual(1, list1[1])
        self.assertEqual("one", list1[-2])
        self.assertEqual([2, 3], list1[2:4])
        self.assertRaises(IndexError, lambda: list1[6])
        self.assertRaises(IndexError, lambda: list1[-7])

        self.assertEqual([0, 1, 2], list1[:3])
        self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:])
        self.assertEqual([3, "one", "two"], list1[3:])
        self.assertEqual(["one", "two"], list1[-2:])
        self.assertEqual([0, 1], list1[:-4])
        self.assertEqual([], list1[6:])
        self.assertEqual([], list1[4:2])

        self.assertEqual([0, 2, "one"], list1[0:5:2])
        self.assertEqual([0, 2], list1[0:-3:2])
        self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::])
        self.assertEqual([2, 3, "one", "two"], list1[2::])
        self.assertEqual([0, 1, 2, 3], list1[:4:])
        self.assertEqual([2, 3], list1[2:4:])
        self.assertEqual([0, 2, 4, 6, 8], list2[::2])
        self.assertEqual([2, 5, 8], list2[2::3])
        self.assertEqual([0, 3], list2[:6:3])
        self.assertEqual([2, 5, 8], list2[-8:9:3])
        self.assertEqual([], list2[100000:1000:-100])

        list1[3] = 100
        self.assertEqual(100, list1[3])
        list1[-3] = 101
        self.assertEqual([0, 1, 2, 101, "one", "two"], list1)
        list1[5:] = [6, 7, 8]
        self.assertEqual([6, 7, 8], list1[5:])
        self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1)
        list1[2:4] = [-1, -2, -3, -4, -5]
        self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1)
        list1[0:-3] = [99]
        self.assertEqual([99, 6, 7, 8], list1)
        list2[0:6:2] = [100, 102, 104]
        self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2)
        list2[::3] = [200, 203, 206, 209]
        self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2)
        list2[::] = range(7)
        self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2)
        self.assertRaises(ValueError, assign, list2, 0, 5, 2,
                          [100, 102, 104, 106])

        del list2[2]
        self.assertEqual([0, 1, 3, 4, 5, 6], list2)
        del list2[-3]
        self.assertEqual([0, 1, 3, 5, 6], list2)
        self.assertRaises(IndexError, delete, list2, 100)
        self.assertRaises(IndexError, delete, list2, -6)
        list2[:] = range(10)
        del list2[3:6]
        self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2)
        del list2[-2:]
        self.assertEqual([0, 1, 2, 6, 7], list2)
        del list2[:2]
        self.assertEqual([2, 6, 7], list2)
        list2[:] = range(10)
        del list2[2:8:2]
        self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2)

    def _test_add_radd_iadd(self, builder):
        """Run tests on __r/i/add__ of a list built with *builder*."""
        list1 = builder(range(5))
        list2 = builder(range(5, 10))
        self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6])
        self.assertEqual([0, 1, 2, 3, 4], list1)
        self.assertEqual(list(range(10)), list1 + list2)
        self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1)
        self.assertEqual([0, 1, 2, 3, 4], list1)
        list1 += ["foo", "bar", "baz"]
        self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1)

    def _test_other_magic_methods(self, builder):
        """Run tests on other magic methods of a list built with *builder*."""
        list1 = builder([0, 1, 2, 3, "one", "two"])
        list2 = builder([])
        list3 = builder([0, 2, 3, 4])
        list4 = builder([0, 1, 2])

        if py3k:
            self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1))
            self.assertEqual(b"\x00\x01\x02", bytes(list4))
            self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1))
        else:
            self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1))
            self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
            self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))

        self.assertTrue(list1 < list3)
        self.assertTrue(list1 <= list3)
        self.assertFalse(list1 == list3)
        self.assertTrue(list1 != list3)
        self.assertFalse(list1 > list3)
        self.assertFalse(list1 >= list3)

        other1 = [0, 2, 3, 4]
        self.assertTrue(list1 < other1)
        self.assertTrue(list1 <= other1)
        self.assertFalse(list1 == other1)
        self.assertTrue(list1 != other1)
        self.assertFalse(list1 > other1)
        self.assertFalse(list1 >= other1)

        other2 = [0, 0, 1, 2]
        self.assertFalse(list1 < other2)
        self.assertFalse(list1 <= other2)
        self.assertFalse(list1 == other2)
        self.assertTrue(list1 != other2)
        self.assertTrue(list1 > other2)
        self.assertTrue(list1 >= other2)

        other3 = [0, 1, 2, 3, "one", "two"]
        self.assertFalse(list1 < other3)
        self.assertTrue(list1 <= other3)
        self.assertTrue(list1 == other3)
        self.assertFalse(list1 != other3)
        self.assertFalse(list1 > other3)
        self.assertTrue(list1 >= other3)

        self.assertTrue(bool(list1))
        self.assertFalse(bool(list2))

        self.assertEqual(6, len(list1))
        self.assertEqual(0, len(list2))

        out = []
        for obj in list1:
            out.append(obj)
        self.assertEqual([0, 1, 2, 3, "one", "two"], out)

        out = []
        for ch in list2:
            out.append(ch)
        self.assertEqual([], out)

        gen1 = iter(list1)
        out = []
        for i in range(len(list1)):
            out.append(next(gen1))
        self.assertRaises(StopIteration, next, gen1)
        self.assertEqual([0, 1, 2, 3, "one", "two"], out)
        gen2 = iter(list2)
        self.assertRaises(StopIteration, next, gen2)

        self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
        self.assertEqual([], list(reversed(list2)))

        self.assertTrue("one" in list1)
        self.assertTrue(3 in list1)
        self.assertFalse(10 in list1)
        self.assertFalse(0 in list2)

        self.assertEqual([], list2 * 5)
        self.assertEqual([], 5 * list2)
        self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3)
        self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4)
        list4 *= 2
        self.assertEqual([0, 1, 2, 0, 1, 2], list4)

    def _test_list_methods(self, builder):
        """Run tests on the public methods of a list built with *builder*."""
        list1 = builder(range(5))
        list2 = builder(["foo"])
        list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)])

        list1.append(5)
        list1.append(1)
        list1.append(2)
        self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1)

        self.assertEqual(0, list1.count(6))
        self.assertEqual(2, list1.count(1))

        list1.extend(range(5, 8))
        self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)

        self.assertEqual(1, list1.index(1))
        self.assertEqual(6, list1.index(1, 3))
        self.assertEqual(6, list1.index(1, 3, 7))
        self.assertRaises(ValueError, list1.index, 1, 3, 5)

        list1.insert(0, -1)
        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
        list1.insert(-1, 6.5)
        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
        list1.insert(13, 8)
        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)

        self.assertEqual(8, list1.pop())
        self.assertEqual(7, list1.pop())
        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
        self.assertEqual(-1, list1.pop(0))
        self.assertEqual(5, list1.pop(5))
        self.assertEqual(6.5, list1.pop(-1))
        self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
        self.assertEqual("foo", list2.pop())
        self.assertRaises(IndexError, list2.pop)
        self.assertEqual([], list2)

        list1.remove(6)
        self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1)
        list1.remove(1)
        self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1)
        list1.remove(1)
        self.assertEqual([0, 2, 3, 4, 2, 5], list1)
        self.assertRaises(ValueError, list1.remove, 1)

        list1.reverse()
        self.assertEqual([5, 2, 4, 3, 2, 0], list1)

        list1.sort()
        self.assertEqual([0, 2, 2, 3, 4, 5], list1)
        list1.sort(reverse=True)
        self.assertEqual([5, 4, 3, 2, 2, 0], list1)
        if not py3k:
            func = lambda x, y: abs(3 - x) - abs(3 - y)  # Distance from 3
            list1.sort(cmp=func)
            self.assertEqual([3, 4, 2, 2, 5, 0], list1)
            list1.sort(cmp=func, reverse=True)
            self.assertEqual([0, 5, 4, 2, 2, 3], list1)
        list3.sort(key=lambda i: i[1])
        self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
        list3.sort(key=lambda i: i[1], reverse=True)
        self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)

    def test_docs(self):
        """make sure the methods of SmartList/_ListProxy have docstrings"""
        methods = ["append", "count", "extend", "index", "insert", "pop",
                   "remove", "reverse", "sort"]
        for meth in methods:
            expected = getattr(list, meth).__doc__
            smartlist_doc = getattr(SmartList, meth).__doc__
            listproxy_doc = getattr(_ListProxy, meth).__doc__
            self.assertEqual(expected, smartlist_doc)
            self.assertEqual(expected, listproxy_doc)

    def test_doctest(self):
        """make sure the test embedded in SmartList's docstring passes"""
        parent = SmartList([0, 1, 2, 3])
        self.assertEqual([0, 1, 2, 3], parent)
        child = parent[2:]
        self.assertEqual([2, 3], child)
        child.append(4)
        self.assertEqual([2, 3, 4], child)
        self.assertEqual([0, 1, 2, 3, 4], parent)

    def test_parent_get_set_del(self):
        """make sure SmartList's getitem/setitem/delitem work"""
        self._test_get_set_del_item(SmartList)

    def test_parent_add(self):
        """make sure SmartList's add/radd/iadd work"""
        self._test_add_radd_iadd(SmartList)

    def test_parent_unaffected_magics(self):
        """sanity checks against SmartList features that were not modified"""
        self._test_other_magic_methods(SmartList)

    def test_parent_methods(self):
        """make sure SmartList's non-magic methods work, like append()"""
        self._test_list_methods(SmartList)

    def test_child_get_set_del(self):
        """make sure _ListProxy's getitem/setitem/delitem work"""
        self._test_get_set_del_item(lambda L: SmartList(list(L))[:])
        self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:])
        self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1])
        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
        self._test_get_set_del_item(builder)

    def test_child_add(self):
        """make sure _ListProxy's add/radd/iadd work"""
        self._test_add_radd_iadd(lambda L: SmartList(list(L))[:])
        self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:])
        self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1])
        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
        self._test_add_radd_iadd(builder)

    def test_child_other_magics(self):
        """make sure _ListProxy's other magically implemented features work"""
        self._test_other_magic_methods(lambda L: SmartList(list(L))[:])
        self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:])
        self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1])
        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
        self._test_other_magic_methods(builder)

    def test_child_methods(self):
        """make sure _ListProxy's non-magic methods work, like append()"""
        self._test_list_methods(lambda L: SmartList(list(L))[:])
        self._test_list_methods(lambda L: SmartList([999] + list(L))[1:])
        self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1])
        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
        self._test_list_methods(builder)

    def test_influence(self):
        """make sure changes are propagated from parents to children"""
        parent = SmartList([0, 1, 2, 3, 4, 5])
        child1 = parent[2:]
        child2 = parent[2:5]

        parent.append(6)
        child1.append(7)
        child2.append(4.5)
        self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent)
        self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1)
        self.assertEqual([2, 3, 4, 4.5], child2)

        parent.insert(0, -1)
        parent.insert(4, 2.5)
        parent.insert(10, 6.5)
        self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent)
        self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1)
        self.assertEqual([2, 2.5, 3, 4, 4.5], child2)

        self.assertEqual(7, parent.pop())
        self.assertEqual(6.5, child1.pop())
        self.assertEqual(4.5, child2.pop())
        self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent)
        self.assertEqual([2, 2.5, 3, 4, 5, 6], child1)
        self.assertEqual([2, 2.5, 3, 4], child2)

        parent.remove(-1)
        child1.remove(2.5)
        self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent)
        self.assertEqual([2, 3, 4, 5, 6], child1)
        self.assertEqual([2, 3, 4], child2)

        self.assertEqual(0, parent.pop(0))
        self.assertEqual([1, 2, 3, 4, 5, 6], parent)
        self.assertEqual([2, 3, 4, 5, 6], child1)
        self.assertEqual([2, 3, 4], child2)

        child2.reverse()
        self.assertEqual([1, 4, 3, 2, 5, 6], parent)
        self.assertEqual([4, 3, 2, 5, 6], child1)
        self.assertEqual([4, 3, 2], child2)

        parent.extend([7, 8])
        child1.extend([8.1, 8.2])
        child2.extend([1.9, 1.8])
        self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent)
        self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
        self.assertEqual([4, 3, 2, 1.9, 1.8], child2)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -0,0 +1,435 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 from sys import getdefaultencoding
 from types import GeneratorType
 import unittest

 from mwparserfromhell.compat import bytes, py3k, str
 from mwparserfromhell.string_mixin import StringMixIn

 from .compat import range

 class _FakeString(StringMixIn):
    def __init__(self, data):
        self._data = data

    def __unicode__(self):
        return self._data


 class TestStringMixIn(unittest.TestCase):
    """Test cases for the StringMixIn class."""

    def test_docs(self):
        """make sure the various methods of StringMixIn have docstrings"""
        methods = [
            "capitalize", "center", "count", "encode", "endswith",
            "expandtabs", "find", "format", "index", "isalnum", "isalpha",
            "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
            "istitle", "isupper", "join", "ljust", "lower", "lstrip",
            "partition", "replace", "rfind", "rindex", "rjust", "rpartition",
            "rsplit", "rstrip", "split", "splitlines", "startswith", "strip",
            "swapcase", "title", "translate", "upper", "zfill"]
        if py3k:
            methods.extend(["casefold", "format_map", "isidentifier",
                            "isprintable", "maketrans"])
        else:
            methods.append("decode")
        for meth in methods:
            expected = getattr(str, meth).__doc__
            actual = getattr(StringMixIn, meth).__doc__
            self.assertEqual(expected, actual)

    def test_types(self):
        """make sure StringMixIns convert to different types correctly"""
        fstr = _FakeString("fake string")
        self.assertEqual(str(fstr), "fake string")
        self.assertEqual(bytes(fstr), b"fake string")
        if py3k:
            self.assertEqual(repr(fstr), "'fake string'")
        else:
            self.assertEqual(repr(fstr), b"u'fake string'")

        self.assertIsInstance(str(fstr), str)
        self.assertIsInstance(bytes(fstr), bytes)
        if py3k:
            self.assertIsInstance(repr(fstr), str)
        else:
            self.assertIsInstance(repr(fstr), bytes)

    def test_comparisons(self):
        """make sure comparison operators work"""
        str1 = _FakeString("this is a fake string")
        str2 = _FakeString("this is a fake string")
        str3 = _FakeString("fake string, this is")
        str4 = "this is a fake string"
        str5 = "fake string, this is"

        self.assertFalse(str1 > str2)
        self.assertTrue(str1 >= str2)
        self.assertTrue(str1 == str2)
        self.assertFalse(str1 != str2)
        self.assertFalse(str1 < str2)
        self.assertTrue(str1 <= str2)

        self.assertTrue(str1 > str3)
        self.assertTrue(str1 >= str3)
        self.assertFalse(str1 == str3)
        self.assertTrue(str1 != str3)
        self.assertFalse(str1 < str3)
        self.assertFalse(str1 <= str3)

        self.assertFalse(str1 > str4)
        self.assertTrue(str1 >= str4)
        self.assertTrue(str1 == str4)
        self.assertFalse(str1 != str4)
        self.assertFalse(str1 < str4)
        self.assertTrue(str1 <= str4)

        self.assertTrue(str1 > str5)
        self.assertTrue(str1 >= str5)
        self.assertFalse(str1 == str5)
        self.assertTrue(str1 != str5)
        self.assertFalse(str1 < str5)
        self.assertFalse(str1 <= str5)

    def test_other_magics(self):
        """test other magically implemented features, like len() and iter()"""
        str1 = _FakeString("fake string")
        str2 = _FakeString("")
        expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"]

        self.assertTrue(str1)
        self.assertFalse(str2)
        self.assertEqual(11, len(str1))
        self.assertEqual(0, len(str2))

        out = []
        for ch in str1:
            out.append(ch)
        self.assertEqual(expected, out)

        out = []
        for ch in str2:
            out.append(ch)
        self.assertEqual([], out)

        gen1 = iter(str1)
        gen2 = iter(str2)
        self.assertIsInstance(gen1, GeneratorType)
        self.assertIsInstance(gen2, GeneratorType)

        out = []
        for i in range(len(str1)):
            out.append(next(gen1))
        self.assertRaises(StopIteration, next, gen1)
        self.assertEqual(expected, out)
        self.assertRaises(StopIteration, next, gen2)

        self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
        self.assertEqual([], list(reversed(str2)))

        self.assertEqual("f", str1[0])
        self.assertEqual(" ", str1[4])
        self.assertEqual("g", str1[10])
        self.assertEqual("n", str1[-2])
        self.assertRaises(IndexError, lambda: str1[11])
        self.assertRaises(IndexError, lambda: str2[0])

        self.assertTrue("k" in str1)
        self.assertTrue("fake" in str1)
        self.assertTrue("str" in str1)
        self.assertTrue("" in str1)
        self.assertTrue("" in str2)
        self.assertFalse("real" in str1)
        self.assertFalse("s" in str2)

    def test_other_methods(self):
        """test the remaining non-magic methods of StringMixIn"""
        str1 = _FakeString("fake string")
        self.assertEqual("Fake string", str1.capitalize())

        self.assertEqual("  fake string  ", str1.center(15))
        self.assertEqual("  fake string   ", str1.center(16))
        self.assertEqual("qqfake stringqq", str1.center(15, "q"))

        self.assertEqual(1, str1.count("e"))
        self.assertEqual(0, str1.count("z"))
        self.assertEqual(1, str1.count("r", 7))
        self.assertEqual(0, str1.count("r", 8))
        self.assertEqual(1, str1.count("r", 5, 9))
        self.assertEqual(0, str1.count("r", 5, 7))

        if not py3k:
            str2 = _FakeString("fo")
            self.assertEqual(str1, str1.decode())
            actual = _FakeString("\\U00010332\\U0001033f\\U00010344")
            self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape"))
            self.assertRaises(UnicodeError, str2.decode, "punycode")
            self.assertEqual("", str2.decode("punycode", "ignore"))

        str3 = _FakeString("𐌲𐌿𐍄")
        actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
        self.assertEqual(b"fake string", str1.encode())
        self.assertEqual(actual, str3.encode("utf-8"))
        self.assertEqual(actual, str3.encode(encoding="utf-8"))
        if getdefaultencoding() == "ascii":
            self.assertRaises(UnicodeEncodeError, str3.encode)
        elif getdefaultencoding() == "utf-8":
            self.assertEqual(actual, str3.encode())
        self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
        self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
        if getdefaultencoding() == "ascii":
            self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
        elif getdefaultencoding() == "utf-8":
            self.assertEqual(actual, str3.encode(errors="strict"))
        self.assertEqual(b"", str3.encode("ascii", "ignore"))
        if getdefaultencoding() == "ascii":
            self.assertEqual(b"", str3.encode(errors="ignore"))
        elif getdefaultencoding() == "utf-8":
            self.assertEqual(actual, str3.encode(errors="ignore"))

        self.assertTrue(str1.endswith("ing"))
        self.assertFalse(str1.endswith("ingh"))

        str4 = _FakeString("\tfoobar")
        self.assertEqual("fake string", str1)
        self.assertEqual("        foobar", str4.expandtabs())
        self.assertEqual("    foobar", str4.expandtabs(4))

        self.assertEqual(3, str1.find("e"))
        self.assertEqual(-1, str1.find("z"))
        self.assertEqual(7, str1.find("r", 7))
        self.assertEqual(-1, str1.find("r", 8))
        self.assertEqual(7, str1.find("r", 5, 9))
        self.assertEqual(-1, str1.find("r", 5, 7))

        str5 = _FakeString("foo{0}baz")
        str6 = _FakeString("foo{abc}baz")
        str7 = _FakeString("foo{0}{abc}buzz")
        str8 = _FakeString("{0}{1}")
        self.assertEqual("fake string", str1.format())
        self.assertEqual("foobarbaz", str5.format("bar"))
        self.assertEqual("foobarbaz", str6.format(abc="bar"))
        self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz"))
        self.assertRaises(IndexError, str8.format, "abc")

        if py3k:
            self.assertEqual("fake string", str1.format_map({}))
            self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"}))
            self.assertRaises(ValueError, str5.format_map, {0: "abc"})

        self.assertEqual(3, str1.index("e"))
        self.assertRaises(ValueError, str1.index, "z")
        self.assertEqual(7, str1.index("r", 7))
        self.assertRaises(ValueError, str1.index, "r", 8)
        self.assertEqual(7, str1.index("r", 5, 9))
        self.assertRaises(ValueError, str1.index, "r", 5, 7)

        str9 = _FakeString("foobar")
        str10 = _FakeString("foobar123")
        str11 = _FakeString("foo bar")
        self.assertTrue(str9.isalnum())
        self.assertTrue(str10.isalnum())
        self.assertFalse(str11.isalnum())

        self.assertTrue(str9.isalpha())
        self.assertFalse(str10.isalpha())
        self.assertFalse(str11.isalpha())

        str12 = _FakeString("123")
        str13 = _FakeString("\u2155")
        str14 = _FakeString("\u00B2")
        self.assertFalse(str9.isdecimal())
        self.assertTrue(str12.isdecimal())
        self.assertFalse(str13.isdecimal())
        self.assertFalse(str14.isdecimal())

        self.assertFalse(str9.isdigit())
        self.assertTrue(str12.isdigit())
        self.assertFalse(str13.isdigit())
        self.assertTrue(str14.isdigit())

        if py3k:
            self.assertTrue(str9.isidentifier())
            self.assertTrue(str10.isidentifier())
            self.assertFalse(str11.isidentifier())
            self.assertFalse(str12.isidentifier())

        str15 = _FakeString("")
        str16 = _FakeString("FooBar")
        self.assertTrue(str9.islower())
        self.assertFalse(str15.islower())
        self.assertFalse(str16.islower())

        self.assertFalse(str9.isnumeric())
        self.assertTrue(str12.isnumeric())
        self.assertTrue(str13.isnumeric())
        self.assertTrue(str14.isnumeric())

        if py3k:
            str16B = _FakeString("\x01\x02")
            self.assertTrue(str9.isprintable())
            self.assertTrue(str13.isprintable())
            self.assertTrue(str14.isprintable())
            self.assertTrue(str15.isprintable())
            self.assertFalse(str16B.isprintable())

        str17 = _FakeString(" ")
        str18 = _FakeString("\t     \t \r\n")
        self.assertFalse(str1.isspace())
        self.assertFalse(str9.isspace())
        self.assertTrue(str17.isspace())
        self.assertTrue(str18.isspace())

        str19 = _FakeString("This Sentence Looks Like A Title")
        str20 = _FakeString("This sentence doesn't LookLikeATitle")
        self.assertFalse(str15.istitle())
        self.assertTrue(str19.istitle())
        self.assertFalse(str20.istitle())

        str21 = _FakeString("FOOBAR")
        self.assertFalse(str9.isupper())
        self.assertFalse(str15.isupper())
        self.assertTrue(str21.isupper())

        self.assertEqual("foobar", str15.join(["foo", "bar"]))
        self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz")))

        self.assertEqual("fake string    ", str1.ljust(15))
        self.assertEqual("fake string     ", str1.ljust(16))
        self.assertEqual("fake stringqqqq", str1.ljust(15, "q"))

        str22 = _FakeString("ß")
        self.assertEqual("", str15.lower())
        self.assertEqual("foobar", str16.lower())
        self.assertEqual("ß", str22.lower())
        if py3k:
            self.assertEqual("", str15.casefold())
            self.assertEqual("foobar", str16.casefold())
            self.assertEqual("ss", str22.casefold())

        str23 = _FakeString("  fake string  ")
        self.assertEqual("fake string", str1.lstrip())
        self.assertEqual("fake string  ", str23.lstrip())
        self.assertEqual("ke string", str1.lstrip("abcdef"))

        self.assertEqual(("fa", "ke", " string"), str1.partition("ke"))
        self.assertEqual(("fake string", "", ""), str1.partition("asdf"))

        str24 = _FakeString("boo foo moo")
        self.assertEqual("real string", str1.replace("fake", "real"))
        self.assertEqual("bu fu moo", str24.replace("oo", "u", 2))

        self.assertEqual(3, str1.rfind("e"))
        self.assertEqual(-1, str1.rfind("z"))
        self.assertEqual(7, str1.rfind("r", 7))
        self.assertEqual(-1, str1.rfind("r", 8))
        self.assertEqual(7, str1.rfind("r", 5, 9))
        self.assertEqual(-1, str1.rfind("r", 5, 7))

        self.assertEqual(3, str1.rindex("e"))
        self.assertRaises(ValueError, str1.rindex, "z")
        self.assertEqual(7, str1.rindex("r", 7))
        self.assertRaises(ValueError, str1.rindex, "r", 8)
        self.assertEqual(7, str1.rindex("r", 5, 9))
        self.assertRaises(ValueError, str1.rindex, "r", 5, 7)

        self.assertEqual("    fake string", str1.rjust(15))
        self.assertEqual("     fake string", str1.rjust(16))
        self.assertEqual("qqqqfake string", str1.rjust(15, "q"))

        self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke"))
        self.assertEqual(("", "", "fake string"), str1.rpartition("asdf"))

        str25 = _FakeString("   this is a   sentence with  whitespace ")
        actual = ["this", "is", "a", "sentence", "with", "whitespace"]
        self.assertEqual(actual, str25.rsplit())
        self.assertEqual(actual, str25.rsplit(None))
        actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
                  "", "whitespace", ""]
        self.assertEqual(actual, str25.rsplit(" "))
        actual = ["   this is a", "sentence", "with", "whitespace"]
        self.assertEqual(actual, str25.rsplit(None, 3))
        actual = ["   this is a   sentence with", "", "whitespace", ""]
        self.assertEqual(actual, str25.rsplit(" ", 3))
        if py3k:
            actual = ["   this is a", "sentence", "with", "whitespace"]
            self.assertEqual(actual, str25.rsplit(maxsplit=3))

        self.assertEqual("fake string", str1.rstrip())
        self.assertEqual("  fake string", str23.rstrip())
        self.assertEqual("fake stri", str1.rstrip("ngr"))

        actual = ["this", "is", "a", "sentence", "with", "whitespace"]
        self.assertEqual(actual, str25.split())
        self.assertEqual(actual, str25.split(None))
        actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
                  "", "whitespace", ""]
        self.assertEqual(actual, str25.split(" "))
        actual = ["this", "is", "a", "sentence with  whitespace "]
        self.assertEqual(actual, str25.split(None, 3))
        actual = ["", "", "", "this is a   sentence with  whitespace "]
        self.assertEqual(actual, str25.split(" ", 3))
        if py3k:
            actual = ["this", "is", "a", "sentence with  whitespace "]
            self.assertEqual(actual, str25.split(maxsplit=3))

        str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
        self.assertEqual(["lines", "of", "text", "are", "presented", "here"],
                          str26.splitlines())
        self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n",
                           "presented\n", "here"], str26.splitlines(True))

        self.assertTrue(str1.startswith("fake"))
        self.assertFalse(str1.startswith("faker"))

        self.assertEqual("fake string", str1.strip())
        self.assertEqual("fake string", str23.strip())
        self.assertEqual("ke stri", str1.strip("abcdefngr"))

        self.assertEqual("fOObAR", str16.swapcase())

        self.assertEqual("Fake String", str1.title())

        if py3k:
            table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4",
                                    117: "5"})
            table2 = str.maketrans("aeiou", "12345")
            table3 = str.maketrans("aeiou", "12345", "rts")
            self.assertEqual("f1k2 str3ng", str1.translate(table1))
            self.assertEqual("f1k2 str3ng", str1.translate(table2))
            self.assertEqual("f1k2 3ng", str1.translate(table3))
        else:
            table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}
            self.assertEqual("f1k2 str3ng", str1.translate(table))

        self.assertEqual("", str15.upper())
        self.assertEqual("FOOBAR", str16.upper())

        self.assertEqual("123", str12.zfill(3))
        self.assertEqual("000123", str12.zfill(6))

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -1,106 +0,0 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from itertools import permutations
 import unittest

 from mwparserfromhell.parameter import Parameter
 from mwparserfromhell.template import Template

 class TestTemplate(unittest.TestCase):
    def setUp(self):
        self.name = "foo"
        self.bar = Parameter("1", "bar")
        self.baz = Parameter("2", "baz")
        self.eggs = Parameter("eggs", "spam")
        self.params = [self.bar, self.baz, self.eggs]

    def test_construct(self):
        Template(self.name)
        Template(self.name, self.params)
        Template(name=self.name)
        Template(name=self.name, params=self.params)

    def test_name(self):
        templates = [
            Template(self.name),
            Template(self.name, self.params),
            Template(name=self.name),
            Template(name=self.name, params=self.params)
        ]
        for template in templates:
            self.assertEqual(template.name, self.name)

    def test_params(self):
        for template in (Template(self.name), Template(name=self.name)):
            self.assertEqual(template.params, [])
        for template in (Template(self.name, self.params),
                         Template(name=self.name, params=self.params)):
            self.assertEqual(template.params, self.params)

    def test_getitem(self):
        template = Template(name=self.name, params=self.params)
        self.assertIs(template[0], self.bar)
        self.assertIs(template[1], self.baz)
        self.assertIs(template[2], self.eggs)
        self.assertIs(template["1"], self.bar)
        self.assertIs(template["2"], self.baz)
        self.assertIs(template["eggs"], self.eggs)

    def test_render(self):
        tests = [
            (Template(self.name), "{{foo}}"),
            (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}")
        ]
        for template, rendered in tests:
            self.assertEqual(template.render(), rendered)

    def test_repr(self):
        correct1=  'Template(name=foo, params={})'
        correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})'
        tests = [(Template(self.name), correct1),
                 (Template(self.name, self.params), correct2)]
        for template, correct in tests:
            self.assertEqual(repr(template), correct)
            self.assertEqual(str(template), correct)

    def test_cmp(self):
        tmp1 = Template(self.name)
        tmp2 = Template(name=self.name)
        tmp3 = Template(self.name, [])
        tmp4 = Template(name=self.name, params=[])
        tmp5 = Template(self.name, self.params)
        tmp6 = Template(name=self.name, params=self.params)

        for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2):
            self.assertEqual(tmpA, tmpB)

        for tmpA, tmpB in permutations((tmp5, tmp6), 2):
            self.assertEqual(tmpA, tmpB)

        for tmpA in (tmp5, tmp6):
            for tmpB in (tmp1, tmp2, tmp3, tmp4):
                self.assertNotEqual(tmpA, tmpB)
                self.assertNotEqual(tmpB, tmpA)

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -0,0 +1,108 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.parser import tokens

 class TestTokens(unittest.TestCase):
    """Test cases for the Token class and its subclasses."""

    def test_issubclass(self):
        """check that all classes within the tokens module are really Tokens"""
        for name in tokens.__all__:
            klass = getattr(tokens, name)
            self.assertTrue(issubclass(klass, tokens.Token))
            self.assertIsInstance(klass(), klass)
            self.assertIsInstance(klass(), tokens.Token)

    def test_attributes(self):
        """check that Token attributes can be managed properly"""
        token1 = tokens.Token()
        token2 = tokens.Token(foo="bar", baz=123)

        self.assertEqual("bar", token2.foo)
        self.assertEqual(123, token2.baz)
        self.assertRaises(KeyError, lambda: token1.foo)
        self.assertRaises(KeyError, lambda: token2.bar)

        token1.spam = "eggs"
        token2.foo = "ham"
        del token2.baz

        self.assertEqual("eggs", token1.spam)
        self.assertEqual("ham", token2.foo)
        self.assertRaises(KeyError, lambda: token2.baz)
        self.assertRaises(KeyError, delattr, token2, "baz")

    def test_repr(self):
        """check that repr() on a Token works as expected"""
        token1 = tokens.Token()
        token2 = tokens.Token(foo="bar", baz=123)
        token3 = tokens.Text(text="earwig" * 100)
        hundredchars = ("earwig" * 100)[:97] + "..."

        self.assertEqual("Token()", repr(token1))
        if py3k:
            token2repr1 = "Token(foo='bar', baz=123)"
            token2repr2 = "Token(baz=123, foo='bar')"
            token3repr = "Text(text='" + hundredchars + "')"
        else:
            token2repr1 = "Token(foo=u'bar', baz=123)"
            token2repr2 = "Token(baz=123, foo=u'bar')"
            token3repr = "Text(text=u'" + hundredchars + "')"
        token2repr = repr(token2)
        self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
        self.assertEqual(token3repr, repr(token3))

    def test_equality(self):
        """check that equivalent tokens are considered equal"""
        token1 = tokens.Token()
        token2 = tokens.Token()
        token3 = tokens.Token(foo="bar", baz=123)
        token4 = tokens.Text(text="asdf")
        token5 = tokens.Text(text="asdf")
        token6 = tokens.TemplateOpen(text="asdf")

        self.assertEqual(token1, token2)
        self.assertEqual(token2, token1)
        self.assertEqual(token4, token5)
        self.assertEqual(token5, token4)
        self.assertNotEqual(token1, token3)
        self.assertNotEqual(token2, token3)
        self.assertNotEqual(token4, token6)
        self.assertNotEqual(token5, token6)

    def test_repr_equality(self):
        "check that eval(repr(token)) == token"
        tests = [
            tokens.Token(),
            tokens.Token(foo="bar", baz=123),
            tokens.Text(text="earwig")
        ]
        for token in tests:
            self.assertEqual(token, eval(repr(token), vars(tokens)))

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -0,0 +1,67 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.nodes import Template, Text
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.utils import parse_anything
 from mwparserfromhell.wikicode import Wikicode

 from ._test_tree_equality import TreeEqualityTestCase

 class TestUtils(TreeEqualityTestCase):
    """Tests for the utils module, which provides parse_anything()."""

    def test_parse_anything_valid(self):
        """tests for valid input to utils.parse_anything()"""
        wrap = lambda L: Wikicode(SmartList(L))
        textify = lambda L: wrap([Text(item) for item in L])
        tests = [
            (wrap([Text("foobar")]), textify(["foobar"])),
            (Template(wrap([Text("spam")])),
                wrap([Template(textify(["spam"]))])),
            ("fóóbar", textify(["fóóbar"])),
            (b"foob\xc3\xa1r", textify(["foobár"])),
            (123, textify(["123"])),
            (True, textify(["True"])),
            (None, wrap([])),
            ([Text("foo"), Text("bar"), Text("baz")],
                textify(["foo", "bar", "baz"])),
            ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456],
                textify(["foo", "bar", "baz", "123", "456"])),
            ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"]))
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, parse_anything(test))

    def test_parse_anything_invalid(self):
        """tests for invalid input to utils.parse_anything()"""
        self.assertRaises(ValueError, parse_anything, Ellipsis)
        self.assertRaises(ValueError, parse_anything, object)
        self.assertRaises(ValueError, parse_anything, object())
        self.assertRaises(ValueError, parse_anything, type)
        self.assertRaises(ValueError, parse_anything, ["foo", [object]])

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -0,0 +1,599 @@
 name:   no_params
 label:  simplest type of template
 input:  "{{template}}"
 output: [TemplateOpen(), Text(text="template"), TemplateClose()]

 ---

 name:   one_param_unnamed
 label:  basic template with one unnamed parameter
 input:  "{{foo|bar}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()]

 ---

 name:   one_param_named
 label:  basic template with one named parameter
 input:  "{{foo|bar=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   multiple_unnamed_params
 label:  basic template with multiple unnamed parameters
 input:  "{{foo|bar|baz|biz|buzz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()]

 ---

 name:   multiple_named_params
 label:  basic template with multiple named parameters
 input:  "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]

 ---

 name:   multiple_mixed_params
 label:  basic template with multiple unnamed/named parameters
 input:  "{{foo|bar=baz|biz|buzz=buff|usr|bin}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()]

 ---

 name:   multiple_mixed_params2
 label:  basic template with multiple unnamed/named parameters in another order
 input:  "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]

 ---

 name:   nested_unnamed_param
 label:  nested template as an unnamed parameter
 input:  "{{foo|{{bar}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_named_param_value
 label:  nested template as a parameter value with a named parameter
 input:  "{{foo|bar={{baz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_named_param_name_and_value
 label:  nested templates as a parameter name and value
 input:  "{{foo|{{bar}}={{baz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start
 label:  nested template at the beginning of a template name
 input:  "{{{{foo}}bar}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()]

 ---

 name:   nested_name_start_unnamed_param
 label:  nested template at the beginning of a template name and as an unnamed parameter
 input:  "{{{{foo}}bar|{{baz}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start_named_param_value
 label:  nested template at the beginning of a template name and as a parameter value with a named parameter
 input:  "{{{{foo}}bar|baz={{biz}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start_named_param_name_and_value
 label:  nested template at the beginning of a template name and as a parameter name and value
 input:  "{{{{foo}}bar|{{baz}}={{biz}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_end
 label:  nested template at the end of a template name
 input:  "{{foo{{bar}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_end_unnamed_param
 label:  nested template at the end of a template name and as an unnamed parameter
 input:  "{{foo{{bar}}|{{baz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_end_named_param_value
 label:  nested template at the end of a template name and as a parameter value with a named parameter
 input:  "{{foo{{bar}}|baz={{biz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_end_named_param_name_and_value
 label:  nested template at the end of a template name and as a parameter name and value
 input:  "{{foo{{bar}}|{{baz}}={{biz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_mid
 label:  nested template in the middle of a template name
 input:  "{{foo{{bar}}baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()]

 ---

 name:   nested_name_mid_unnamed_param
 label:  nested template in the middle of a template name and as an unnamed parameter
 input:  "{{foo{{bar}}baz|{{biz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_mid_named_param_value
 label:  nested template in the middle of a template name and as a parameter value with a named parameter
 input:  "{{foo{{bar}}baz|biz={{buzz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_mid_named_param_name_and_value
 label:  nested template in the middle of a template name and as a parameter name and value
 input:  "{{foo{{bar}}baz|{{biz}}={{buzz}}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start_end
 label:  nested template at the beginning and end of a template name
 input:  "{{{{foo}}{{bar}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start_end_unnamed_param
 label:  nested template at the beginning and end of a template name and as an unnamed parameter
 input:  "{{{{foo}}{{bar}}|{{baz}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start_end_named_param_value
 label:  nested template at the beginning and end of a template name and as a parameter value with a named parameter
 input:  "{{{{foo}}{{bar}}|baz={{biz}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_name_start_end_named_param_name_and_value
 label:  nested template at the beginning and end of a template name and as a parameter name and value
 input:  "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}"
 output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_names_multiple
 label:  multiple nested templates within nested templates
 input:  "{{{{{{{{foo}}bar}}baz}}biz}}"
 output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()]

 ---

 name:   nested_names_multiple_unnamed_param
 label:  multiple nested templates within nested templates with a nested unnamed parameter
 input:  "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}"
 output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_names_multiple_named_param_value
 label:  multiple nested templates within nested templates with a nested parameter value in a named parameter
 input:  "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}"
 output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]

 ---

 name:   nested_names_multiple_named_param_name_and_value
 label:  multiple nested templates within nested templates with a nested parameter name and value
 input:  "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}"
 output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]

 ---

 name:   mixed_nested_templates
 label:  mixed assortment of nested templates within template names, parameter names, and values
 input:  "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
 output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]

 ---

 name:   newlines_start
 label:  a newline at the start of a template name
 input:  "{{\nfoobar}}"
 output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()]

 ---

 name:   newlines_end
 label:  a newline at the end of a template name
 input:  "{{foobar\n}}"
 output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()]

 ---

 name:   newlines_start_end
 label:  a newline at the start and end of a template name
 input:  "{{\nfoobar\n}}"
 output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()]

 ---

 name:   newlines_mid
 label:  a newline at the middle of a template name
 input:  "{{foo\nbar}}"
 output: [Text(text="{{foo\nbar}}")]

 ---

 name:   newlines_start_mid
 label:  a newline at the start and middle of a template name
 input:  "{{\nfoo\nbar}}"
 output: [Text(text="{{\nfoo\nbar}}")]

 ---

 name:   newlines_mid_end
 label:  a newline at the middle and end of a template name
 input:  "{{foo\nbar\n}}"
 output: [Text(text="{{foo\nbar\n}}")]

 ---

 name:   newlines_start_mid_end
 label:  a newline at the start, middle, and end of a template name
 input:  "{{\nfoo\nbar\n}}"
 output: [Text(text="{{\nfoo\nbar\n}}")]

 ---

 name:   newlines_unnamed_param
 label:  newlines within an unnamed template parameter
 input:  "{{foo|\nb\nar\n}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]

 ---

 name:   newlines_enclose_template_name_unnamed_param
 label:  newlines enclosing a template name and within an unnamed template parameter
 input:  "{{\nfoo\n|\nb\nar\n}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]

 ---

 name:   newlines_within_template_name_unnamed_param
 label:  newlines within a template name and within an unnamed template parameter
 input:  "{{\nfo\no\n|\nb\nar\n}}"
 output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")]

 ---

 name:   newlines_enclose_template_name_named_param_value
 label:  newlines enclosing a template name and within a named parameter value
 input:  "{{\nfoo\n|1=\nb\nar\n}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()]

 ---

 name:   newlines_within_template_name_named_param_value
 label:  newlines within a template name and within a named parameter value
 input:  "{{\nf\noo\n|1=\nb\nar\n}}"
 output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")]

 ---

 name:   newlines_named_param_name
 label:  newlines within a parameter name
 input:  "{{foo|\nb\nar\n=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   newlines_named_param_name_param_value
 label:  newlines within a parameter name and within a parameter value
 input:  "{{foo|\nb\nar\n=\nba\nz\n}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()]

 ---

 name:   newlines_enclose_template_name_named_param_name
 label:  newlines enclosing a template name and within a parameter name
 input:  "{{\nfoo\n|\nb\nar\n=baz}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   newlines_enclose_template_name_named_param_name_param_value
 label:  newlines enclosing a template name and within a parameter name and within a parameter value
 input:  "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()]

 ---

 name:   newlines_within_template_name_named_param_name
 label:  newlines within a template name and within a parameter name
 input:  "{{\nfo\no\n|\nb\nar\n=baz}}"
 output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")]

 ---

 name:   newlines_within_template_name_named_param_name_param_value
 label:  newlines within a template name and within a parameter name and within a parameter value
 input:  "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}"
 output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")]

 ---

 name:   newlines_wildcard
 label:  a random, complex assortment of templates and newlines
 input:  "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()]

 ---

 name:   newlines_wildcard_redux
 label:  an even more random and complex assortment of templates and newlines
 input:  "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()]

 ---

 name:   newlines_wildcard_redux_invalid
 label:  a variation of the newlines_wildcard_redux test that is invalid
 input:  "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
 output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")]

 ---

 name:   invalid_name_left_brace_middle
 label:  invalid characters in template name: left brace in middle
 input:  "{{foo{bar}}"
 output: [Text(text="{{foo{bar}}")]

 ---

 name:   invalid_name_right_brace_middle
 label:  invalid characters in template name: right brace in middle
 input:  "{{foo}bar}}"
 output: [Text(text="{{foo}bar}}")]

 ---

 name:   invalid_name_left_braces
 label:  invalid characters in template name: two left braces in middle
 input:  "{{foo{b{ar}}"
 output: [Text(text="{{foo{b{ar}}")]

 ---

 name:   invalid_name_left_bracket_middle
 label:  invalid characters in template name: left bracket in middle
 input:  "{{foo[bar}}"
 output: [Text(text="{{foo[bar}}")]

 ---

 name:   invalid_name_right_bracket_middle
 label:  invalid characters in template name: right bracket in middle
 input:  "{{foo]bar}}"
 output: [Text(text="{{foo]bar}}")]

 ---

 name:   invalid_name_left_bracket_start
 label:  invalid characters in template name: left bracket at start
 input:  "{{[foobar}}"
 output: [Text(text="{{[foobar}}")]

 ---

 name:   invalid_name_right_bracket_start
 label:  invalid characters in template name: right bracket at end
 input:  "{{foobar]}}"
 output: [Text(text="{{foobar]}}")]

 ---

 name:   valid_name_left_brace_start
 label:  valid characters in template name: left brace at start
 input:  "{{{foobar}}"
 output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()]

 ---

 name:   valid_unnamed_param_left_brace
 label:  valid characters in unnamed template parameter: left brace
 input:  "{{foo|ba{r}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()]

 ---

 name:   valid_unnamed_param_braces
 label:  valid characters in unnamed template parameter: left and right braces
 input:  "{{foo|ba{r}}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")]

 ---

 name:   valid_param_name_braces
 label:  valid characters in template parameter name: left and right braces
 input:  "{{foo|ba{r}=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   valid_param_name_brackets
 label:  valid characters in unnamed template parameter: left and right brackets
 input:  "{{foo|ba[r]=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   valid_param_name_double_left_brackets
 label:  valid characters in unnamed template parameter: double left brackets
 input:  "{{foo|bar[[in\nvalid=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   valid_param_name_double_right_brackets
 label:  valid characters in unnamed template parameter: double right brackets
 input:  "{{foo|bar]]=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   valid_param_name_double_brackets
 label:  valid characters in unnamed template parameter: double left and right brackets
 input:  "{{foo|bar[[in\nvalid]]=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

 ---

 name:   invalid_param_name_double_left_braces
 label:  invalid characters in template parameter name: double left braces
 input:  "{{foo|bar{{in\nvalid=baz}}"
 output: [Text(text="{{foo|bar{{in\nvalid=baz}}")]

 ---

 name:   invalid_param_name_double_braces
 label:  invalid characters in template parameter name: double left and right braces
 input:  "{{foo|bar{{in\nvalid}}=baz}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")]

 ---

 name:   incomplete_plain
 label:  incomplete templates that should fail gracefully: no close whatsoever
 input:  "{{stuff}} {{foobar"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")]

 ---

 name:   incomplete_right_brace
 label:  incomplete templates that should fail gracefully: only one right brace
 input:  "{{stuff}} {{foobar}"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")]

 ---

 name:   incomplete_pipe
 label:  incomplete templates that should fail gracefully: a pipe
 input:  "{{stuff}} {{foobar|"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")]

 ---

 name:   incomplete_unnamed_param
 label:  incomplete templates that should fail gracefully: an unnamed parameter
 input:  "{{stuff}} {{foo|bar"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")]

 ---

 name:   incomplete_unnamed_param_pipe
 label:  incomplete templates that should fail gracefully: an unnamed parameter, then a pipe
 input:  "{{stuff}} {{foo|bar|"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")]

 ---

 name:   incomplete_valueless_param
 label:  incomplete templates that should fail gracefully: an a named parameter with no value
 input:  "{{stuff}} {{foo|bar="
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")]

 ---

 name:   incomplete_valueless_param_pipe
 label:  incomplete templates that should fail gracefully: a named parameter with no value, then a pipe
 input:  "{{stuff}} {{foo|bar=|"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")]

 ---

 name:   incomplete_named_param
 label:  incomplete templates that should fail gracefully: a named parameter with a value
 input:  "{{stuff}} {{foo|bar=baz"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")]

 ---

 name:   incomplete_named_param_pipe
 label:  incomplete templates that should fail gracefully: a named parameter with a value, then a paipe
 input:  "{{stuff}} {{foo|bar=baz|"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")]

 ---

 name:   incomplete_two_unnamed_params
 label:  incomplete templates that should fail gracefully: two unnamed parameters
 input:  "{{stuff}} {{foo|bar|baz"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")]

 ---

 name:   incomplete_unnamed_param_valueless_param
 label:  incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value
 input:  "{{stuff}} {{foo|bar|baz="
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")]

 ---

 name:   incomplete_unnamed_param_named_param
 label:  incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value
 input:  "{{stuff}} {{foo|bar|baz=biz"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")]

 ---

 name:   incomplete_named_param_unnamed_param
 label:  incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter
 input:  "{{stuff}} {{foo|bar=baz|biz"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")]

 ---

 name:   incomplete_named_param_valueless_param
 label:  incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value
 input:  "{{stuff}} {{foo|bar=baz|biz="
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")]

 ---

 name:   incomplete_two_named_params
 label:  incomplete templates that should fail gracefully: two named parameters with values
 input:  "{{stuff}} {{foo|bar=baz|biz=buzz"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")]

 ---

 name:   incomplete_nested_template_as_unnamed_param
 label:  incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter
 input:  "{{stuff}} {{foo|{{bar}}"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()]

 ---

 name:   incomplete_nested_template_as_param_value
 label:  incomplete templates that should fail gracefully: a valid nested template as a parameter value
 input:  "{{stuff}} {{foo|bar={{baz}}"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()]
--- a/tests/tokenizer/text.mwtest
+++ b/tests/tokenizer/text.mwtest
@@ -0,0 +1,25 @@
 name:   basic
 label:  sanity check for basic text parsing, no gimmicks
 input:  "foobar"
 output: [Text(text="foobar")]

 ---

 name:   newlines
 label:  slightly more complex text parsing, with newlines
 input:  "This is a line of text.\nThis is another line of text.\nThis is another."
 output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")]

 ---

 name:   unicode
 label:  ensure unicode data is handled properly
 input:  "Thís ís å sëñtënce with diœcritiçs."
 output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")]

 ---

 name:   unicode2
 label:  additional unicode check for non-BMP codepoints
 input:  "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰"
 output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]