From 03120bc97e01674fc00f815304e7b644a3681e32 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 21 Sep 2012 23:21:41 -0400
Subject: [PATCH 001/180] Version bump for 0.2.dev; fix a documentation bug.

---
 docs/conf.py                         | 4 ++--
 docs/index.rst                       | 2 +-
 mwparserfromhell/__init__.py         | 2 +-
 mwparserfromhell/parser/tokenizer.py | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index c537d37..6cc3664 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -48,9 +48,9 @@ copyright = u'2012 Ben Kurtovic'
 # built documents.
 #
 # The short X.Y version.
-version = '0.1'
+version = '0.2'
 # The full version, including alpha/beta/rc tags.
-release = '0.1.1'
+release = '0.2.dev'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/index.rst b/docs/index.rst
index e198783..24f42f2 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,4 +1,4 @@
-MWParserFromHell v0.1 Documentation
+MWParserFromHell v0.2 Documentation
 ===================================
 
 :py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python
diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index bdf5712..4f73a0e 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -31,7 +31,7 @@ from __future__ import unicode_literals
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.1.1"
+__version__ = "0.2.dev"
 __email__ = "ben.kurtovic@verizon.net"
 
 from . import nodes, parser, smart_list, string_mixin, wikicode
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index a8ce88f..ca645b0 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -86,7 +86,7 @@ class Tokenizer(object):
     def _pop(self, keep_context=False):
         """Pop the current stack/context/textbuffer, returing the stack.
 
-        If *keep_context is ``True``, then we will replace the underlying
+        If *keep_context* is ``True``, then we will replace the underlying
         stack's context with the current stack's.
         """
         self._push_textbuffer()

From e0660f8bc31a00c3119d13d2d37bcf18042b3102 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 22 Sep 2012 22:47:05 -0400
Subject: [PATCH 002/180] Committing this C work for now.

---
 docs/conf.py                        |   5 +-
 mwparserfromhell/parser/builder.c   |  24 +++
 mwparserfromhell/parser/tokenizer.c | 322 ++++++++++++++++++++++++++++++++++++
 setup.py                            |   9 +-
 4 files changed, 357 insertions(+), 3 deletions(-)
 create mode 100644 mwparserfromhell/parser/builder.c
 create mode 100644 mwparserfromhell/parser/tokenizer.c

diff --git a/docs/conf.py b/docs/conf.py
index 6cc3664..cff089b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -17,6 +17,7 @@ import sys, os
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('..'))
+import mwparserfromhell
 
 # -- General configuration -----------------------------------------------------
 
@@ -48,9 +49,9 @@ copyright = u'2012 Ben Kurtovic'
 # built documents.
 #
 # The short X.Y version.
-version = '0.2'
+version = ".".join(mwparserfromhell.__version__.split(".", 2)[:2])
 # The full version, including alpha/beta/rc tags.
-release = '0.2.dev'
+release = mwparserfromhell.__version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/mwparserfromhell/parser/builder.c b/mwparserfromhell/parser/builder.c
new file mode 100644
index 0000000..7cbe236
--- /dev/null
+++ b/mwparserfromhell/parser/builder.c
@@ -0,0 +1,24 @@
+/*
+Builder for MWParserFromHell
+Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include <Python.h>
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
new file mode 100644
index 0000000..3fdc370
--- /dev/null
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -0,0 +1,322 @@
+/*
+Tokenizer for MWParserFromHell
+Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif
+
+#include <Python.h>
+#include "structmember.h"
+
+static const Py_UNICODE* OUT_OF_BOUNDS = "";
+static const Py_UNICODE* MARKERS[] = {"{", "}", "[", "]", "<", ">", "|", "=",
+                                      "&", "#", "*", ";", ":", "/", "-", "!",
+                                      "\n", OUT_OF_BOUNDS};
+
+static PyMethodDef
+module_methods[] = {
+    {NULL}
+};
+
+typedef struct {
+    PyObject_HEAD
+    PyObject* text;        /* text to tokenize */
+    PyObject* stacks;      /* token stacks */
+    PyObject* topstack;    /* topmost stack */
+    Py_ssize_t head;       /* current position in text */
+    Py_ssize_t length;     /* length of text */
+    Py_ssize_t global;     /* global context */
+} Tokenizer;
+
+static PyObject*
+Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
+{
+    Tokenizer *self;
+
+    self = (Tokenizer*) type->tp_alloc(type, 0);
+    if (self != NULL) {
+
+        self->text = Py_None;
+        Py_INCREF(Py_None);
+
+        self->stacks = PyList_New(0);
+        if (self->stacks == NULL) {
+            Py_DECREF(self);
+            return NULL;
+        }
+
+        self->head = 0;
+        self->length = 0;
+        self->global = 0;
+    }
+
+    return (PyObject*) self;
+}
+
+static void
+Tokenizer_dealloc(Tokenizer* self)
+{
+    Py_XDECREF(self->text);
+    Py_XDECREF(self->stacks);
+    Py_XDECREF(self->topstack);
+    self->ob_type->tp_free((PyObject*) self);
+}
+
+static int
+Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
+{
+    static char* kwlist[] = {NULL};
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist))
+        return -1;
+    return 0;
+}
+
+#define Tokenizer_STACK(self) PyList_GET_ITEM(self->topstack, 0)
+#define Tokenizer_CONTEXT(self) PyList_GET_ITEM(self->topstack, 1)
+#define Tokenizer_TEXTBUFFER(self) PyList_GET_ITEM(self->topstack, 2)
+
+static int
+Tokenizer_set_context(Tokenizer* self, Py_ssize_t value)
+{
+    if (PyList_SetItem(self->topstack, 1, PyInt_FromSsize_t(value)))
+        return -1;
+    return 0;
+}
+
+static int
+Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value)
+{
+    if (PyList_SetItem(self->topstack, 2, value))
+        return -1;
+    return 0;
+}
+
+/*
+    Add a new token stack, context, and textbuffer to the list.
+*/
+static int
+Tokenizer_push(Tokenizer* self, int context)
+{
+    PyObject* top = PyList_New(3);
+    PyList_SET_ITEM(top, 0, PyList_New(0));
+    PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(0));
+    PyList_SET_ITEM(top, 2, PyList_New(0));
+
+    Py_XDECREF(self->topstack);
+    self->topstack = top;
+
+    if (PyList_Append(self->stacks, top))
+        return -1;
+    return 0;
+}
+
+/*
+    Push the textbuffer onto the stack as a Text node and clear it.
+*/
+static int
+Tokenizer_push_textbuffer(Tokenizer* self)
+{
+    if (PyList_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
+
+        PyObject* text;
+        // tokens.Text(text="".join(self._textbuffer))
+
+        if (PyList_Append(Tokenizer_STACK(self), text)
+            return -1;
+
+        if (Tokenizer_set_textbuffer(self, PyList_New(0)))
+            return -1;
+
+        return 0;
+    }
+}
+
+/*
+    Pop the current stack/context/textbuffer, returing the stack.
+*/
+static PyObject*
+Tokenizer_pop(Tokenizer* self)
+{
+    if (Tokenizer_push_textbuffer(self))
+        return NULL;
+
+    self->stacks // POP!?
+}
+
+/*
+    Pop the current stack/context/textbuffer, returing the stack. We will also
+    replace the underlying stack's context with the current stack's.
+*/
+static PyObject*
+Tokenizer_pop_keeping_context(Tokenizer* self)
+{
+    if (Tokenizer_push_textbuffer(self))
+        return NULL;
+}
+
+/*
+    Read the value at a relative point in the wikicode.
+*/
+static Py_UNICODE*
+Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
+{
+    Py_ssize_t index = self->head + delta;
+
+    if (index >= self->length) {
+        return OUT_OF_BOUNDS;
+    }
+
+    PyObject* item = PySequence_Fast_GET_ITEM(self->text, index);
+    return PyUnicode_AS_UNICODE(item);
+}
+
+/*
+    Parse the wikicode string, using *context* for when to stop.
+*/
+static PyObject*
+Tokenizer_parse(Tokenizer* self, int context)
+{
+    Py_UNICODE* this;
+
+    Tokenizer_push(self, context);
+
+    while (1) {
+        this = Tokenizer_read(self, 0);
+        if (this not in MARKERS) {
+            WRITE TEXT
+        }
+        if (this == OUT_OF_BOUNDS) {
+            return Tokenizer_push(self);
+        }
+        printf("%p %i %c\n", this, *this, *this);
+        self->head++;
+    }
+}
+
+/*
+    Build a list of tokens from a string of wikicode and return it.
+*/
+static PyObject*
+Tokenizer_tokenize(Tokenizer* self, PyObject *args)
+{
+    PyObject* text;
+
+    if (!PyArg_ParseTuple(args, "U", &text)) {
+        /* Failed to parse a Unicode object; try a string instead. */
+        PyErr_Clear();
+        const char* encoded;
+        Py_ssize_t size;
+
+        if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) {
+            return NULL;
+        }
+
+        PyObject* temp;
+        temp = PyUnicode_FromStringAndSize(encoded, size);
+        if (text == NULL)
+            return NULL;
+
+        Py_XDECREF(self->text);
+        text = PySequence_Fast(temp, "expected a sequence");
+        Py_XDECREF(temp);
+        self->text = text;
+    }
+    else {
+        Py_XDECREF(self->text);
+        self->text = PySequence_Fast(text, "expected a sequence");
+    }
+
+    self->length = PySequence_Length(self->text);
+
+    return Tokenizer_parse(self, 0);
+}
+
+static PyMethodDef
+Tokenizer_methods[] = {
+    {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS,
+    "Build a list of tokens from a string of wikicode and return it."},
+    {NULL}
+};
+
+static PyMemberDef
+Tokenizer_members[] = {
+    {NULL}
+};
+
+static PyTypeObject
+TokenizerType = {
+    PyObject_HEAD_INIT(NULL)
+    0,                                                      /* ob_size */
+    "_tokenizer.CTokenizer",                                /* tp_name */
+    sizeof(Tokenizer),                                      /* tp_basicsize */
+    0,                                                      /* tp_itemsize */
+    (destructor) Tokenizer_dealloc,                         /* tp_dealloc */
+    0,                                                      /* tp_print */
+    0,                                                      /* tp_getattr */
+    0,                                                      /* tp_setattr */
+    0,                                                      /* tp_compare */
+    0,                                                      /* tp_repr */
+    0,                                                      /* tp_as_number */
+    0,                                                      /* tp_as_sequence */
+    0,                                                      /* tp_as_mapping */
+    0,                                                      /* tp_hash  */
+    0,                                                      /* tp_call */
+    0,                                                      /* tp_str */
+    0,                                                      /* tp_getattro */
+    0,                                                      /* tp_setattro */
+    0,                                                      /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                                     /* tp_flags */
+    "Creates a list of tokens from a string of wikicode.",  /* tp_doc */
+    0,                                                      /* tp_traverse */
+    0,                                                      /* tp_clear */
+    0,                                                      /* tp_richcompare */
+    0,                                                      /* tp_weaklistoffset */
+    0,                                                      /* tp_iter */
+    0,                                                      /* tp_iternext */
+    Tokenizer_methods,                                      /* tp_methods */
+    Tokenizer_members,                                      /* tp_members */
+    0,                                                      /* tp_getset */
+    0,                                                      /* tp_base */
+    0,                                                      /* tp_dict */
+    0,                                                      /* tp_descr_get */
+    0,                                                      /* tp_descr_set */
+    0,                                                      /* tp_dictoffset */
+    (initproc) Tokenizer_init,                              /* tp_init */
+    0,                                                      /* tp_alloc */
+    Tokenizer_new,                                          /* tp_new */
+};
+
+PyMODINIT_FUNC
+init_tokenizer(void)
+{
+    PyObject* module;
+
+    TokenizerType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&TokenizerType) < 0)
+        return;
+
+    module = Py_InitModule("_tokenizer", module_methods);
+
+    Py_INCREF(&TokenizerType);
+    PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
+}
diff --git a/setup.py b/setup.py
index 9faa56c..3664626 100644
--- a/setup.py
+++ b/setup.py
@@ -21,16 +21,23 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from setuptools import setup, find_packages
+from setuptools import setup, find_packages, Extension
 
 from mwparserfromhell import __version__
 
 with open("README.rst") as fp:
     long_docs = fp.read()
 
+builder = Extension("mwparserfromhell.parser._builder",
+                    sources = ["mwparserfromhell/parser/builder.c"])
+
+tokenizer = Extension("mwparserfromhell.parser._tokenizer",
+                    sources = ["mwparserfromhell/parser/tokenizer.c"])
+
 setup(
     name = "mwparserfromhell",
     packages = find_packages(exclude=("tests",)),
+    ext_modules = [builder, tokenizer],
     test_suite = "tests",
     version = __version__,
     author = "Ben Kurtovic",

From 4cc4791d4871b833454ade8d9f52ee35e8bca742 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 01:29:27 -0400
Subject: [PATCH 003/180] Adding a bunch more, and implementing
 Tokenizer_push_textbuffer.

---
 mwparserfromhell/parser/tokenizer.c | 116 ++++++++++++++++++++++++++++++------
 setup.py                            |   2 +-
 2 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 3fdc370..aec7b1d 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -28,10 +28,15 @@ SOFTWARE.
 #include <Python.h>
 #include "structmember.h"
 
-static const Py_UNICODE* OUT_OF_BOUNDS = "";
-static const Py_UNICODE* MARKERS[] = {"{", "}", "[", "]", "<", ">", "|", "=",
-                                      "&", "#", "*", ";", ":", "/", "-", "!",
-                                      "\n", OUT_OF_BOUNDS};
+#define PU (Py_UNICODE*)
+static const Py_UNICODE* OUT_OF_BOUNDS = PU"";
+static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">",
+                                      PU"|", PU"=", PU"&", PU"#", PU"*", PU";",
+                                      PU":", PU"/", PU"-", PU"!", PU"\n", PU""};
+#undef PU
+
+static PyObject* contexts;
+static PyObject* tokens;
 
 static PyMethodDef
 module_methods[] = {
@@ -60,7 +65,7 @@ Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
         Py_INCREF(Py_None);
 
         self->stacks = PyList_New(0);
-        if (self->stacks == NULL) {
+        if (!self->stacks) {
             Py_DECREF(self);
             return NULL;
         }
@@ -91,9 +96,9 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
     return 0;
 }
 
-#define Tokenizer_STACK(self) PyList_GET_ITEM(self->topstack, 0)
-#define Tokenizer_CONTEXT(self) PyList_GET_ITEM(self->topstack, 1)
-#define Tokenizer_TEXTBUFFER(self) PyList_GET_ITEM(self->topstack, 2)
+#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0)
+#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1)
+#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2)
 
 static int
 Tokenizer_set_context(Tokenizer* self, Py_ssize_t value)
@@ -136,19 +141,65 @@ Tokenizer_push(Tokenizer* self, int context)
 static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
-    if (PyList_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
+    if (PySequence_Fast_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
+        PyObject* sep = PyUnicode_FromString("");
+        if (!sep) return -1;
+        PyObject* text = PyUnicode_Join(sep, Tokenizer_TEXTBUFFER(self));
+        Py_DECREF(sep);
+        if (!text) return -1;
+
+        PyObject* klass = PyObject_GetAttrString(tokens, "Text");
+        if (!klass) return -1;
+        PyObject* args = PyTuple_New(0);
+        if (!args) return -1;
+        PyObject* kwargs = PyDict_New();
+        if (!kwargs) return -1;
+        PyDict_SetItemString(kwargs, "text", text);
+        Py_DECREF(text);
+
+        PyObject* token = PyInstance_New(klass, args, kwargs);
+        if (!token) {
+            Py_DECREF(klass);
+            Py_DECREF(args);
+            Py_DECREF(kwargs);
+            return -1;
+        }
 
-        PyObject* text;
-        // tokens.Text(text="".join(self._textbuffer))
+        Py_DECREF(klass);
+        Py_DECREF(args);
+        Py_DECREF(kwargs);
 
-        if (PyList_Append(Tokenizer_STACK(self), text)
+        if (PyList_Append(Tokenizer_STACK(self), token)) {
+            Py_XDECREF(token);
             return -1;
+        }
+
+        Py_XDECREF(token);
 
         if (Tokenizer_set_textbuffer(self, PyList_New(0)))
             return -1;
+    }
+    return 0;
+}
 
-        return 0;
+static int
+Tokenizer_delete_top_of_stack(Tokenizer* self)
+{
+    if (PySequence_DelItem(self->stacks, -1))
+        return -1;
+    Py_DECREF(self->topstack);
+
+    Py_ssize_t size = PySequence_Fast_GET_SIZE(self->stacks);
+    if (size > 0) {
+        PyObject* top = PySequence_Fast_GET_ITEM(self->stacks, size - 1);
+        self->topstack = top;
+        Py_INCREF(top);
+    }
+    else {
+        self->topstack = NULL;
     }
+
+    return 0;
 }
 
 /*
@@ -160,7 +211,13 @@ Tokenizer_pop(Tokenizer* self)
     if (Tokenizer_push_textbuffer(self))
         return NULL;
 
-    self->stacks // POP!?
+    PyObject* stack = Tokenizer_STACK(self);
+    Py_INCREF(stack);
+
+    if (Tokenizer_delete_top_of_stack(self))
+        return NULL;
+
+    return stack;
 }
 
 /*
@@ -172,6 +229,19 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
 {
     if (Tokenizer_push_textbuffer(self))
         return NULL;
+
+    PyObject* stack = Tokenizer_STACK(self);
+    PyObject* context = Tokenizer_CONTEXT(self);
+    Py_INCREF(stack);
+    Py_INCREF(context);
+
+    if (Tokenizer_delete_top_of_stack(self))
+        return NULL;
+
+    if (PyList_SetItem(self->topstack, 1, context))
+        return NULL;
+
+    return stack;
 }
 
 /*
@@ -183,7 +253,7 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
     Py_ssize_t index = self->head + delta;
 
     if (index >= self->length) {
-        return OUT_OF_BOUNDS;
+        return (Py_UNICODE*) OUT_OF_BOUNDS;
     }
 
     PyObject* item = PySequence_Fast_GET_ITEM(self->text, index);
@@ -202,11 +272,11 @@ Tokenizer_parse(Tokenizer* self, int context)
 
     while (1) {
         this = Tokenizer_read(self, 0);
-        if (this not in MARKERS) {
+     /*   if (this not in MARKERS) {
             WRITE TEXT
-        }
+        } */
         if (this == OUT_OF_BOUNDS) {
-            return Tokenizer_push(self);
+            return Tokenizer_pop(self);
         }
         printf("%p %i %c\n", this, *this, *this);
         self->head++;
@@ -233,7 +303,7 @@ Tokenizer_tokenize(Tokenizer* self, PyObject *args)
 
         PyObject* temp;
         temp = PyUnicode_FromStringAndSize(encoded, size);
-        if (text == NULL)
+        if (!text)
             return NULL;
 
         Py_XDECREF(self->text);
@@ -319,4 +389,12 @@ init_tokenizer(void)
 
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
+
+    PyObject* globals = PyEval_GetGlobals();
+    PyObject* locals = PyEval_GetLocals();
+    PyObject* fromlist = PyList_New(0);
+
+    contexts = PyImport_ImportModuleLevel("contexts", globals, locals, fromlist, 1);
+    tokens = PyImport_ImportModuleLevel("tokens", globals, locals, fromlist, 1);
+    Py_DECREF(fromlist);
 }
diff --git a/setup.py b/setup.py
index 3664626..e348ce5 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ builder = Extension("mwparserfromhell.parser._builder",
                     sources = ["mwparserfromhell/parser/builder.c"])
 
 tokenizer = Extension("mwparserfromhell.parser._tokenizer",
-                    sources = ["mwparserfromhell/parser/tokenizer.c"])
+                      sources = ["mwparserfromhell/parser/tokenizer.c"])
 
 setup(
     name = "mwparserfromhell",

From 9c4aba13912c9d5b274a61a5f7c6d9945f72c0b6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 03:40:19 -0400
Subject: [PATCH 004/180] Adding a few more functions.

---
 mwparserfromhell/parser/tokenizer.c | 114 ++++++++++++++++++++++++++++++++----
 1 file changed, 101 insertions(+), 13 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index aec7b1d..99c9bfc 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -26,15 +26,20 @@ SOFTWARE.
 #endif
 
 #include <Python.h>
+#include "setjmp.h"
 #include "structmember.h"
 
+static PyObject* EMPTY;
+
 #define PU (Py_UNICODE*)
-static const Py_UNICODE* OUT_OF_BOUNDS = PU"";
 static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">",
                                       PU"|", PU"=", PU"&", PU"#", PU"*", PU";",
                                       PU":", PU"/", PU"-", PU"!", PU"\n", PU""};
 #undef PU
 
+static jmp_buf exception_env;
+static const int BAD_ROUTE = 1;
+
 static PyObject* contexts;
 static PyObject* tokens;
 
@@ -142,10 +147,7 @@ static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
     if (PySequence_Fast_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
-        PyObject* sep = PyUnicode_FromString("");
-        if (!sep) return -1;
-        PyObject* text = PyUnicode_Join(sep, Tokenizer_TEXTBUFFER(self));
-        Py_DECREF(sep);
+        PyObject* text = PyUnicode_Join(EMPTY, Tokenizer_TEXTBUFFER(self));
         if (!text) return -1;
 
         PyObject* klass = PyObject_GetAttrString(tokens, "Text");
@@ -174,7 +176,7 @@ Tokenizer_push_textbuffer(Tokenizer* self)
             return -1;
         }
 
-        Py_XDECREF(token);
+        Py_DECREF(token);
 
         if (Tokenizer_set_textbuffer(self, PyList_New(0)))
             return -1;
@@ -245,19 +247,104 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
 }
 
 /*
+    Fail the current tokenization route.
+
+    Discards the current stack/context/textbuffer and "raises a BAD_ROUTE
+    exception", which is implemented using longjmp().
+*/
+static void
+Tokenizer_fail_route(Tokenizer* self)
+{
+    Tokenizer_pop(self);
+    longjmp(exception_env, BAD_ROUTE);
+}
+
+/*
+    Write a token to the end of the current token stack.
+*/
+static int
+Tokenizer_write(Tokenizer* self, PyObject* token)
+{
+    if (Tokenizer_push_textbuffer(self))
+        return -1;
+
+    if (PyList_Append(Tokenizer_STACK(self), token)) {
+        Py_XDECREF(token);
+        return -1;
+    }
+
+    Py_XDECREF(token);
+    return 0;
+}
+
+/*
+    Write a token to the beginning of the current token stack.
+*/
+static int
+Tokenizer_write_first(Tokenizer* self, PyObject* token)
+{
+    if (Tokenizer_push_textbuffer(self))
+        return -1;
+
+    if (PyList_Insert(Tokenizer_STACK(self), 0, token)) {
+        Py_XDECREF(token);
+        return -1;
+    }
+
+    Py_XDECREF(token);
+    return 0;
+}
+
+/*
+    Write text to the current textbuffer.
+*/
+static int
+Tokenizer_write_text(Tokenizer* self, PyObject* text)
+{
+    if (PyList_Append(Tokenizer_TEXTBUFFER(self), text)) {
+        Py_XDECREF(text);
+        return -1;
+    }
+
+    Py_XDECREF(text);
+    return 0;
+}
+
+/*
+    Write a series of tokens to the current stack at once.
+*/
+static int
+Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
+{
+    if (Tokenizer_push_textbuffer(self))
+        Py_XDECREF(tokenlist);
+        return -1;
+
+    PyObject* stack = Tokenizer_STACK(self);
+    Py_ssize_t size = PySequence_Fast_GET_SIZE(stack);
+
+    if (PyList_SetSlice(stack, size, size, tokenlist)) {
+        Py_XDECREF(tokenlist);
+        return -1;
+    }
+
+    Py_XDECREF(tokenlist);
+    return 0;
+}
+
+/*
     Read the value at a relative point in the wikicode.
 */
-static Py_UNICODE*
+static PyObject*
 Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 {
     Py_ssize_t index = self->head + delta;
 
     if (index >= self->length) {
-        return (Py_UNICODE*) OUT_OF_BOUNDS;
+        return EMPTY;
     }
 
-    PyObject* item = PySequence_Fast_GET_ITEM(self->text, index);
-    return PyUnicode_AS_UNICODE(item);
+    return PySequence_Fast_GET_ITEM(self->text, index);
 }
 
 /*
@@ -266,7 +353,7 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 static PyObject*
 Tokenizer_parse(Tokenizer* self, int context)
 {
-    Py_UNICODE* this;
+    PyObject* this;
 
     Tokenizer_push(self, context);
 
@@ -275,10 +362,9 @@ Tokenizer_parse(Tokenizer* self, int context)
      /*   if (this not in MARKERS) {
             WRITE TEXT
         } */
-        if (this == OUT_OF_BOUNDS) {
+        if (this == EMPTY) {
             return Tokenizer_pop(self);
         }
-        printf("%p %i %c\n", this, *this, *this);
         self->head++;
     }
 }
@@ -390,6 +476,8 @@ init_tokenizer(void)
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
+    EMPTY = PyUnicode_FromString("");
+
     PyObject* globals = PyEval_GetGlobals();
     PyObject* locals = PyEval_GetLocals();
     PyObject* fromlist = PyList_New(0);

From 5267c30cf60b9c03cdf908112f8bffc390a87ac1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 03:57:04 -0400
Subject: [PATCH 005/180] Fix refcount handling; implement
 Tokenizer_write_text_then_stack.

---
 mwparserfromhell/parser/tokenizer.c | 52 +++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 99c9bfc..3f7e84e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -247,10 +247,9 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
 }
 
 /*
-    Fail the current tokenization route.
-
-    Discards the current stack/context/textbuffer and "raises a BAD_ROUTE
-    exception", which is implemented using longjmp().
+    Fail the current tokenization route. Discards the current
+    stack/context/textbuffer and "raises a BAD_ROUTE exception", which is
+    implemented using longjmp().
 */
 static void
 Tokenizer_fail_route(Tokenizer* self)
@@ -268,12 +267,9 @@ Tokenizer_write(Tokenizer* self, PyObject* token)
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
-    if (PyList_Append(Tokenizer_STACK(self), token)) {
-        Py_XDECREF(token);
+    if (PyList_Append(Tokenizer_STACK(self), token))
         return -1;
-    }
 
-    Py_XDECREF(token);
     return 0;
 }
 
@@ -286,12 +282,9 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
-    if (PyList_Insert(Tokenizer_STACK(self), 0, token)) {
-        Py_XDECREF(token);
+    if (PyList_Insert(Tokenizer_STACK(self), 0, token))
         return -1;
-    }
 
-    Py_XDECREF(token);
     return 0;
 }
 
@@ -301,12 +294,9 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
 static int
 Tokenizer_write_text(Tokenizer* self, PyObject* text)
 {
-    if (PyList_Append(Tokenizer_TEXTBUFFER(self), text)) {
-        Py_XDECREF(text);
+    if (PyList_Append(Tokenizer_TEXTBUFFER(self), text))
         return -1;
-    }
 
-    Py_XDECREF(text);
     return 0;
 }
 
@@ -317,18 +307,40 @@ static int
 Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 {
     if (Tokenizer_push_textbuffer(self))
-        Py_XDECREF(tokenlist);
         return -1;
 
     PyObject* stack = Tokenizer_STACK(self);
     Py_ssize_t size = PySequence_Fast_GET_SIZE(stack);
 
-    if (PyList_SetSlice(stack, size, size, tokenlist)) {
-        Py_XDECREF(tokenlist);
+    if (PyList_SetSlice(stack, size, size, tokenlist))
+        return -1;
+
+    return 0;
+}
+
+/*
+    Pop the current stack, write text, and then write the stack.
+*/
+static int
+Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text)
+{
+    PyObject* stack = Tokenizer_pop(self);
+    if (Tokenizer_write_text(self, text)) {
+        Py_XDECREF(stack);
         return -1;
     }
 
-    Py_XDECREF(tokenlist);
+    if (stack) {
+        if (PySequence_Fast_GET_SIZE(stack) > 0) {
+            if (Tokenizer_write_all(self, stack)) {
+                Py_DECREF(stack);
+                return -1;
+            }
+        }
+        Py_DECREF(stack);
+    }
+
+    self->head--;
     return 0;
 }
 

From 8729d20f078df40c50a70ee7cbd392b534173a88 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 17:40:46 -0400
Subject: [PATCH 006/180] Fill out Tokenizer_parse(); build a bunch of empty
 function definitions.

---
 mwparserfromhell/parser/tokenizer.c | 252 ++++++++++++++++++++++++++++++++++--
 1 file changed, 240 insertions(+), 12 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 3f7e84e..0d18473 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -26,8 +26,8 @@ SOFTWARE.
 #endif
 
 #include <Python.h>
-#include "setjmp.h"
-#include "structmember.h"
+#include <setjmp.h>
+#include <structmember.h>
 
 static PyObject* EMPTY;
 
@@ -35,7 +35,10 @@ static PyObject* EMPTY;
 static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">",
                                       PU"|", PU"=", PU"&", PU"#", PU"*", PU";",
                                       PU":", PU"/", PU"-", PU"!", PU"\n", PU""};
-#undef PU
+static const int NUM_MARKERS = 17;
+
+#define CONTEXT(name) PyInt_AsSsize_t((PyIntObject*) \
+                                          PyObject_GetAttrString(contexts, name))
 
 static jmp_buf exception_env;
 static const int BAD_ROUTE = 1;
@@ -103,6 +106,7 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 
 #define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0)
 #define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1)
+#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t((PyIntObject*) Tokenizer_CONTEXT(self))
 #define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2)
 
 static int
@@ -125,11 +129,11 @@ Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value)
     Add a new token stack, context, and textbuffer to the list.
 */
 static int
-Tokenizer_push(Tokenizer* self, int context)
+Tokenizer_push(Tokenizer* self, Py_ssize_t context)
 {
     PyObject* top = PyList_New(3);
     PyList_SET_ITEM(top, 0, PyList_New(0));
-    PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(0));
+    PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(context));
     PyList_SET_ITEM(top, 2, PyList_New(0));
 
     Py_XDECREF(self->topstack);
@@ -345,7 +349,7 @@ Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text)
 }
 
 /*
-    Read the value at a relative point in the wikicode.
+    Read the value at a relative point in the wikicode, forwards.
 */
 static PyObject*
 Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
@@ -360,23 +364,247 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 }
 
 /*
-    Parse the wikicode string, using *context* for when to stop.
+    Read the value at a relative point in the wikicode, backwards.
 */
 static PyObject*
-Tokenizer_parse(Tokenizer* self, int context)
+Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
+{
+    if (delta > self->head) {
+        return EMPTY;
+    }
+
+    Py_ssize_t index = self->head - delta;
+    return PySequence_Fast_GET_ITEM(self->text, index);
+}
+
+static int
+Tokenizer_parse_template_or_argument(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_parse_template(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_parse_argument(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_verify_safe(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_handle_template_param(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_handle_template_param_value(Tokenizer* self)
 {
-    PyObject* this;
+
+}
+
+static PyObject*
+Tokenizer_handle_template_end(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_handle_argument_separator(Tokenizer* self)
+{
+
+}
+
+static PyObject*
+Tokenizer_handle_argument_end(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_parse_wikilink(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_handle_wikilink_separator(Tokenizer* self)
+{
+
+}
+
+static PyObject*
+Tokenizer_handle_wikilink_end(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_parse_heading(Tokenizer* self)
+{
+
+}
+
+static PyObject*
+Tokenizer_handle_heading_end(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_really_parse_entity(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_parse_entity(Tokenizer* self)
+{
+
+}
+
+static int
+Tokenizer_parse_comment(Tokenizer* self)
+{
+
+}
+
+
+/*
+    Parse the wikicode string, using context for when to stop.
+*/
+static PyObject*
+Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
+{
+    Py_ssize_t fail_contexts = (
+        CONTEXT("TEMPLATE") | CONTEXT("ARGUMENT") | CONTEXT("HEADING") |
+        CONTEXT("COMMENT"));
+
+    PyObject *this, *next;
+    Py_UNICODE *this_data, *next_data, *next_next_data, *last_data;
+    Py_ssize_t this_context;
+    int is_marker, i;
 
     Tokenizer_push(self, context);
 
     while (1) {
         this = Tokenizer_read(self, 0);
-     /*   if (this not in MARKERS) {
-            WRITE TEXT
-        } */
+        this_data = PyUnicode_AS_UNICODE(this);
+
+        is_marker = 0;
+        for (i = 0; i < NUM_MARKERS; i++) {
+            if (MARKERS[i] == this_data) {
+                is_marker = 1;
+                break;
+            }
+        }
+
+        if (!is_marker) {
+            Tokenizer_write_text(self, this);
+            self->head++;
+            continue;
+        }
+
+        this_context = Tokenizer_CONTEXT_VAL(self);
+
         if (this == EMPTY) {
+            if (this_context & fail_contexts) {
+                Tokenizer_fail_route(self);
+            }
             return Tokenizer_pop(self);
         }
+
+        next = Tokenizer_read(self, 1);
+        next_data = PyUnicode_AS_UNICODE(next);
+
+        if (this_context & CONTEXT("COMMENT")) {
+            if (this_data == next_data && next_data == PU "-") {
+                if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU ">") {
+                    return Tokenizer_pop(self);
+                }
+            }
+            Tokenizer_write_text(self, this);
+        }
+        else if (this_data == next_data && next_data == PU "{") {
+            Tokenizer_parse_template_or_argument(self);
+        }
+        else if (this_data == PU "|" && this_context & CONTEXT("TEMPLATE")) {
+            Tokenizer_handle_template_param(self);
+        }
+        else if (this_data == PU "=" && this_context & CONTEXT("TEMPLATE_PARAM_KEY")) {
+            Tokenizer_handle_template_param_value(self);
+        }
+        else if (this_data == next_data && next_data == PU "}" &&
+                 this_context & CONTEXT("TEMPLATE")) {
+            Tokenizer_handle_template_end(self);
+        }
+        else if (this_data == PU "|" && this_context & CONTEXT("ARGUMENT_NAME")) {
+            Tokenizer_handle_argument_separator(self);
+        }
+        else if (this_data == next_data && next_data == PU "}" &&
+                 this_context & CONTEXT("ARGUMENT")) {
+            if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU "}") {
+                return Tokenizer_handle_argument_end(self);
+            }
+            Tokenizer_write_text(self, this);
+        }
+        else if (this_data == next_data && next_data == PU "[") {
+            if (!(this_context & CONTEXT("WIKILINK_TITLE"))) {
+                Tokenizer_parse_wikilink(self);
+            }
+            else {
+                Tokenizer_write_text(self, this);
+            }
+        }
+        else if (this_data == PU "|" && this_context & CONTEXT("WIKILINK_TITLE")) {
+            Tokenizer_handle_wikilink_separator(self);
+        }
+        else if (this_data == next_data && next_data == PU "]" &&
+                 this_context & CONTEXT("WIKILINK")) {
+            return Tokenizer_handle_wikilink_end(self);
+        }
+        else if (this_data == PU "=" && !(self->global & CONTEXT("GL_HEADING"))) {
+            last_data = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
+            if (last_data == PU "\n" || last_data == PU "") {
+                Tokenizer_parse_heading(self);
+            }
+            else {
+                Tokenizer_write_text(self, this);
+            }
+        }
+        else if (this_data == PU "=" && this_context & CONTEXT("HEADING")) {
+            return Tokenizer_handle_heading_end(self);
+        }
+        else if (this_data == PU "\n" && this_context & CONTEXT("HEADING")) {
+            Tokenizer_fail_route(self);
+        }
+        else if (this_data == PU "&") {
+            Tokenizer_parse_entity(self);
+        }
+        else if (this_data == PU "<" && next_data == PU "!") {
+            next_next_data = PyUnicode_AS_UNICODE(Tokenizer_read(self, 2));
+            if (next_next_data == PyUnicode_AS_UNICODE(Tokenizer_read(self, 3)) &&
+                    next_next_data == PU "-") {
+                Tokenizer_parse_comment(self);
+            }
+            else {
+                Tokenizer_write_text(self, this);
+            }
+        }
+        else {
+            Tokenizer_write_text(self, this);
+        }
+
         self->head++;
     }
 }

From 1ecb0e0d4485e71f9d49555d114df56ac9f0acff Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 17:48:57 -0400
Subject: [PATCH 007/180] Fix Tokenizer_verify_safe()'s prototype; add
 documentation.

---
 mwparserfromhell/parser/tokenizer.c | 57 +++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 0d18473..ad013cb 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -377,109 +377,162 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
     return PySequence_Fast_GET_ITEM(self->text, index);
 }
 
+/*
+    Parse a template or argument at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_template_or_argument(Tokenizer* self)
 {
 
 }
 
+/*
+    Parse a template at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_template(Tokenizer* self)
 {
 
 }
 
+/*
+    Parse an argument at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_argument(Tokenizer* self)
 {
 
 }
 
+/*
+    Verify that there are no unsafe characters in the current stack. The route
+    will be failed if the name contains any element of unsafes in it (not
+    merely at the beginning or end). This is used when parsing a template name
+    or parameter key, which cannot contain newlines.
+*/
 static int
-Tokenizer_verify_safe(Tokenizer* self)
+Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[])
 {
 
 }
 
+/*
+    Handle a template parameter at the head of the string.
+*/
 static int
 Tokenizer_handle_template_param(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle a template parameter's value at the head of the string.
+*/
 static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle the end of a template at the head of the string.
+*/
 static PyObject*
 Tokenizer_handle_template_end(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle the separator between an argument's name and default.
+*/
 static int
 Tokenizer_handle_argument_separator(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle the end of an argument at the head of the string.
+*/
 static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
 
 }
 
+/*
+    Parse an internal wikilink at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_wikilink(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle the separator between a wikilink's title and its text.
+*/
 static int
 Tokenizer_handle_wikilink_separator(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle the end of a wikilink at the head of the string.
+*/
 static PyObject*
 Tokenizer_handle_wikilink_end(Tokenizer* self)
 {
 
 }
 
+/*
+    Parse a section heading at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_heading(Tokenizer* self)
 {
 
 }
 
+/*
+    Handle the end of a section heading at the head of the string.
+*/
 static PyObject*
 Tokenizer_handle_heading_end(Tokenizer* self)
 {
 
 }
 
+/*
+    Actually parse an HTML entity and ensure that it is valid.
+*/
 static int
 Tokenizer_really_parse_entity(Tokenizer* self)
 {
 
 }
 
+/*
+    Parse an HTML entity at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_entity(Tokenizer* self)
 {
 
 }
 
+/*
+    Parse an HTML comment at the head of the wikicode string.
+*/
 static int
 Tokenizer_parse_comment(Tokenizer* self)
 {
 
 }
 
-
 /*
     Parse the wikicode string, using context for when to stop.
 */

From 7fc45783b78772b5b689f3b724481997e23cd4ca Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 18:30:04 -0400
Subject: [PATCH 008/180] Add a header file; improve context handling.

---
 mwparserfromhell/parser/tokenizer.c | 131 +++---------------------
 mwparserfromhell/parser/tokenizer.h | 199 ++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 117 deletions(-)
 create mode 100644 mwparserfromhell/parser/tokenizer.h

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index ad013cb..41713e2 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -21,45 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */
 
-#ifndef PY_SSIZE_T_CLEAN
-#define PY_SSIZE_T_CLEAN
-#endif
-
-#include <Python.h>
-#include <setjmp.h>
-#include <structmember.h>
-
-static PyObject* EMPTY;
-
-#define PU (Py_UNICODE*)
-static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">",
-                                      PU"|", PU"=", PU"&", PU"#", PU"*", PU";",
-                                      PU":", PU"/", PU"-", PU"!", PU"\n", PU""};
-static const int NUM_MARKERS = 17;
-
-#define CONTEXT(name) PyInt_AsSsize_t((PyIntObject*) \
-                                          PyObject_GetAttrString(contexts, name))
-
-static jmp_buf exception_env;
-static const int BAD_ROUTE = 1;
-
-static PyObject* contexts;
-static PyObject* tokens;
-
-static PyMethodDef
-module_methods[] = {
-    {NULL}
-};
-
-typedef struct {
-    PyObject_HEAD
-    PyObject* text;        /* text to tokenize */
-    PyObject* stacks;      /* token stacks */
-    PyObject* topstack;    /* topmost stack */
-    Py_ssize_t head;       /* current position in text */
-    Py_ssize_t length;     /* length of text */
-    Py_ssize_t global;     /* global context */
-} Tokenizer;
+#include "tokenizer.h"
 
 static PyObject*
 Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
@@ -104,11 +66,6 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
     return 0;
 }
 
-#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0)
-#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1)
-#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t((PyIntObject*) Tokenizer_CONTEXT(self))
-#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2)
-
 static int
 Tokenizer_set_context(Tokenizer* self, Py_ssize_t value)
 {
@@ -539,9 +496,7 @@ Tokenizer_parse_comment(Tokenizer* self)
 static PyObject*
 Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 {
-    Py_ssize_t fail_contexts = (
-        CONTEXT("TEMPLATE") | CONTEXT("ARGUMENT") | CONTEXT("HEADING") |
-        CONTEXT("COMMENT"));
+    Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
 
     PyObject *this, *next;
     Py_UNICODE *this_data, *next_data, *next_next_data, *last_data;
@@ -580,7 +535,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         next = Tokenizer_read(self, 1);
         next_data = PyUnicode_AS_UNICODE(next);
 
-        if (this_context & CONTEXT("COMMENT")) {
+        if (this_context & LC_COMMENT) {
             if (this_data == next_data && next_data == PU "-") {
                 if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU ">") {
                     return Tokenizer_pop(self);
@@ -591,42 +546,40 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         else if (this_data == next_data && next_data == PU "{") {
             Tokenizer_parse_template_or_argument(self);
         }
-        else if (this_data == PU "|" && this_context & CONTEXT("TEMPLATE")) {
+        else if (this_data == PU "|" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_param(self);
         }
-        else if (this_data == PU "=" && this_context & CONTEXT("TEMPLATE_PARAM_KEY")) {
+        else if (this_data == PU "=" && this_context & LC_TEMPLATE_PARAM_KEY) {
             Tokenizer_handle_template_param_value(self);
         }
-        else if (this_data == next_data && next_data == PU "}" &&
-                 this_context & CONTEXT("TEMPLATE")) {
+        else if (this_data == next_data && next_data == PU "}" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_end(self);
         }
-        else if (this_data == PU "|" && this_context & CONTEXT("ARGUMENT_NAME")) {
+        else if (this_data == PU "|" && this_context & LC_ARGUMENT_NAME) {
             Tokenizer_handle_argument_separator(self);
         }
-        else if (this_data == next_data && next_data == PU "}" &&
-                 this_context & CONTEXT("ARGUMENT")) {
+        else if (this_data == next_data && next_data == PU "}" && this_context & LC_ARGUMENT) {
             if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU "}") {
                 return Tokenizer_handle_argument_end(self);
             }
             Tokenizer_write_text(self, this);
         }
         else if (this_data == next_data && next_data == PU "[") {
-            if (!(this_context & CONTEXT("WIKILINK_TITLE"))) {
+            if (!(this_context & LC_WIKILINK_TITLE)) {
                 Tokenizer_parse_wikilink(self);
             }
             else {
                 Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == PU "|" && this_context & CONTEXT("WIKILINK_TITLE")) {
+        else if (this_data == PU "|" && this_context & LC_WIKILINK_TITLE) {
             Tokenizer_handle_wikilink_separator(self);
         }
         else if (this_data == next_data && next_data == PU "]" &&
-                 this_context & CONTEXT("WIKILINK")) {
+                 this_context & LC_WIKILINK) {
             return Tokenizer_handle_wikilink_end(self);
         }
-        else if (this_data == PU "=" && !(self->global & CONTEXT("GL_HEADING"))) {
+        else if (this_data == PU "=" && !(self->global & GL_HEADING)) {
             last_data = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
             if (last_data == PU "\n" || last_data == PU "") {
                 Tokenizer_parse_heading(self);
@@ -635,10 +588,10 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
                 Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == PU "=" && this_context & CONTEXT("HEADING")) {
+        else if (this_data == PU "=" && this_context & LC_HEADING) {
             return Tokenizer_handle_heading_end(self);
         }
-        else if (this_data == PU "\n" && this_context & CONTEXT("HEADING")) {
+        else if (this_data == PU "\n" && this_context & LC_HEADING) {
             Tokenizer_fail_route(self);
         }
         else if (this_data == PU "&") {
@@ -700,61 +653,6 @@ Tokenizer_tokenize(Tokenizer* self, PyObject *args)
     return Tokenizer_parse(self, 0);
 }
 
-static PyMethodDef
-Tokenizer_methods[] = {
-    {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS,
-    "Build a list of tokens from a string of wikicode and return it."},
-    {NULL}
-};
-
-static PyMemberDef
-Tokenizer_members[] = {
-    {NULL}
-};
-
-static PyTypeObject
-TokenizerType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                                                      /* ob_size */
-    "_tokenizer.CTokenizer",                                /* tp_name */
-    sizeof(Tokenizer),                                      /* tp_basicsize */
-    0,                                                      /* tp_itemsize */
-    (destructor) Tokenizer_dealloc,                         /* tp_dealloc */
-    0,                                                      /* tp_print */
-    0,                                                      /* tp_getattr */
-    0,                                                      /* tp_setattr */
-    0,                                                      /* tp_compare */
-    0,                                                      /* tp_repr */
-    0,                                                      /* tp_as_number */
-    0,                                                      /* tp_as_sequence */
-    0,                                                      /* tp_as_mapping */
-    0,                                                      /* tp_hash  */
-    0,                                                      /* tp_call */
-    0,                                                      /* tp_str */
-    0,                                                      /* tp_getattro */
-    0,                                                      /* tp_setattro */
-    0,                                                      /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT,                                     /* tp_flags */
-    "Creates a list of tokens from a string of wikicode.",  /* tp_doc */
-    0,                                                      /* tp_traverse */
-    0,                                                      /* tp_clear */
-    0,                                                      /* tp_richcompare */
-    0,                                                      /* tp_weaklistoffset */
-    0,                                                      /* tp_iter */
-    0,                                                      /* tp_iternext */
-    Tokenizer_methods,                                      /* tp_methods */
-    Tokenizer_members,                                      /* tp_members */
-    0,                                                      /* tp_getset */
-    0,                                                      /* tp_base */
-    0,                                                      /* tp_dict */
-    0,                                                      /* tp_descr_get */
-    0,                                                      /* tp_descr_set */
-    0,                                                      /* tp_dictoffset */
-    (initproc) Tokenizer_init,                              /* tp_init */
-    0,                                                      /* tp_alloc */
-    Tokenizer_new,                                          /* tp_new */
-};
-
 PyMODINIT_FUNC
 init_tokenizer(void)
 {
@@ -775,7 +673,6 @@ init_tokenizer(void)
     PyObject* locals = PyEval_GetLocals();
     PyObject* fromlist = PyList_New(0);
 
-    contexts = PyImport_ImportModuleLevel("contexts", globals, locals, fromlist, 1);
     tokens = PyImport_ImportModuleLevel("tokens", globals, locals, fromlist, 1);
     Py_DECREF(fromlist);
 }
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
new file mode 100644
index 0000000..c504dd8
--- /dev/null
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -0,0 +1,199 @@
+/*
+Tokenizer Header File for MWParserFromHell
+Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif
+
+#include <Python.h>
+#include <setjmp.h>
+#include <structmember.h>
+
+#define PU (Py_UNICODE*)
+
+static const Py_UNICODE* MARKERS[] = {
+    PU "{", PU "}", PU "[", PU "]", PU "<", PU ">", PU "|", PU "=", PU "&",
+    PU "#", PU "*", PU ";", PU ":", PU "/", PU "-", PU "!", PU "\n", PU ""};
+static const int NUM_MARKERS = 17;
+
+static jmp_buf exception_env;
+static const int BAD_ROUTE = 1;
+
+static PyObject* EMPTY;
+static PyObject* tokens;
+
+
+/* Local contexts: */
+
+static const Py_ssize_t LC_TEMPLATE =             0x0007;
+static const Py_ssize_t LC_TEMPLATE_NAME =        0x0001;
+static const Py_ssize_t LC_TEMPLATE_PARAM_KEY =   0x0002;
+static const Py_ssize_t LC_TEMPLATE_PARAM_VALUE = 0x0004;
+
+static const Py_ssize_t LC_ARGUMENT =             0x0018;
+static const Py_ssize_t LC_ARGUMENT_NAME =        0x0008;
+static const Py_ssize_t LC_ARGUMENT_DEFAULT =     0x0010;
+
+static const Py_ssize_t LC_WIKILINK =             0x0060;
+static const Py_ssize_t LC_WIKILINK_TITLE =       0x0020;
+static const Py_ssize_t LC_WIKILINK_TEXT =        0x0040;
+
+static const Py_ssize_t LC_HEADING =              0x1f80;
+static const Py_ssize_t LC_HEADING_LEVEL_1 =      0x0080;
+static const Py_ssize_t LC_HEADING_LEVEL_2 =      0x0100;
+static const Py_ssize_t LC_HEADING_LEVEL_3 =      0x0200;
+static const Py_ssize_t LC_HEADING_LEVEL_4 =      0x0400;
+static const Py_ssize_t LC_HEADING_LEVEL_5 =      0x0800;
+static const Py_ssize_t LC_HEADING_LEVEL_6 =      0x1000;
+
+static const Py_ssize_t LC_COMMENT =              0x2000;
+
+
+/* Global contexts: */
+
+static const Py_ssize_t GL_HEADING = 0x1;
+
+
+/* Tokenizer object definition: */
+
+typedef struct {
+    PyObject_HEAD
+    PyObject* text;        /* text to tokenize */
+    PyObject* stacks;      /* token stacks */
+    PyObject* topstack;    /* topmost stack */
+    Py_ssize_t head;       /* current position in text */
+    Py_ssize_t length;     /* length of text */
+    Py_ssize_t global;     /* global context */
+} Tokenizer;
+
+
+/* Some macros for accessing Tokenizer data: */
+
+#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0)
+#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1)
+#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t(Tokenizer_CONTEXT(self))
+#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2)
+
+
+/* Tokenizer function prototypes: */
+
+static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds);
+static void Tokenizer_dealloc(Tokenizer* self);
+static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds);
+static int Tokenizer_set_context(Tokenizer* self, Py_ssize_t value);
+static int Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value);
+static int Tokenizer_push(Tokenizer* self, Py_ssize_t context);
+static int Tokenizer_push_textbuffer(Tokenizer* self);
+static int Tokenizer_delete_top_of_stack(Tokenizer* self);
+static PyObject* Tokenizer_pop(Tokenizer* self);
+static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self);
+static void Tokenizer_fail_route(Tokenizer* self);
+static int Tokenizer_write(Tokenizer* self, PyObject* token);
+static int Tokenizer_write_first(Tokenizer* self, PyObject* token);
+static int Tokenizer_write_text(Tokenizer* self, PyObject* text);
+static int Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist);
+static int Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text);
+static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta);
+static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta);
+static int Tokenizer_parse_template_or_argument(Tokenizer* self);
+static int Tokenizer_parse_template(Tokenizer* self);
+static int Tokenizer_parse_argument(Tokenizer* self);
+static int Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[]);
+static int Tokenizer_handle_template_param(Tokenizer* self);
+static int Tokenizer_handle_template_param_value(Tokenizer* self);
+static PyObject* Tokenizer_handle_template_end(Tokenizer* self);
+static int Tokenizer_handle_argument_separator(Tokenizer* self);
+static PyObject* Tokenizer_handle_argument_end(Tokenizer* self);
+static int Tokenizer_parse_wikilink(Tokenizer* self);
+static int Tokenizer_handle_wikilink_separator(Tokenizer* self);
+static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self);
+static int Tokenizer_parse_heading(Tokenizer* self);
+static PyObject* Tokenizer_handle_heading_end(Tokenizer* self);
+static int Tokenizer_really_parse_entity(Tokenizer* self);
+static int Tokenizer_parse_entity(Tokenizer* self);
+static int Tokenizer_parse_comment(Tokenizer* self);
+static PyObject* Tokenizer_parse(Tokenizer* self, Py_ssize_t context);
+static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject *args);
+
+
+/* More structs for creating the Tokenizer type: */
+
+static PyMethodDef
+Tokenizer_methods[] = {
+    {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS,
+    "Build a list of tokens from a string of wikicode and return it."},
+    {NULL}
+};
+
+static PyMemberDef
+Tokenizer_members[] = {
+    {NULL}
+};
+
+static PyMethodDef
+module_methods[] = {
+    {NULL}
+};
+
+static PyTypeObject
+TokenizerType = {
+    PyObject_HEAD_INIT(NULL)
+    0,                                                      /* ob_size */
+    "_tokenizer.CTokenizer",                                /* tp_name */
+    sizeof(Tokenizer),                                      /* tp_basicsize */
+    0,                                                      /* tp_itemsize */
+    (destructor) Tokenizer_dealloc,                         /* tp_dealloc */
+    0,                                                      /* tp_print */
+    0,                                                      /* tp_getattr */
+    0,                                                      /* tp_setattr */
+    0,                                                      /* tp_compare */
+    0,                                                      /* tp_repr */
+    0,                                                      /* tp_as_number */
+    0,                                                      /* tp_as_sequence */
+    0,                                                      /* tp_as_mapping */
+    0,                                                      /* tp_hash  */
+    0,                                                      /* tp_call */
+    0,                                                      /* tp_str */
+    0,                                                      /* tp_getattro */
+    0,                                                      /* tp_setattro */
+    0,                                                      /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                                     /* tp_flags */
+    "Creates a list of tokens from a string of wikicode.",  /* tp_doc */
+    0,                                                      /* tp_traverse */
+    0,                                                      /* tp_clear */
+    0,                                                      /* tp_richcompare */
+    0,                                                      /* tp_weaklistoffset */
+    0,                                                      /* tp_iter */
+    0,                                                      /* tp_iternext */
+    Tokenizer_methods,                                      /* tp_methods */
+    Tokenizer_members,                                      /* tp_members */
+    0,                                                      /* tp_getset */
+    0,                                                      /* tp_base */
+    0,                                                      /* tp_dict */
+    0,                                                      /* tp_descr_get */
+    0,                                                      /* tp_descr_set */
+    0,                                                      /* tp_dictoffset */
+    (initproc) Tokenizer_init,                              /* tp_init */
+    0,                                                      /* tp_alloc */
+    Tokenizer_new,                                          /* tp_new */
+};

From 6edc24037eff257e82cfe3d86d3d2b253d2b5fa5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 19:14:23 -0400
Subject: [PATCH 009/180] Implement Tokenizer_parse_template_or_argument().

---
 mwparserfromhell/parser/tokenizer.c  | 118 ++++++++++++++++++++++++++++-------
 mwparserfromhell/parser/tokenizer.h  |   3 +-
 mwparserfromhell/parser/tokenizer.py |   2 +-
 3 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 41713e2..4877773 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -313,9 +313,8 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 {
     Py_ssize_t index = self->head + delta;
 
-    if (index >= self->length) {
+    if (index >= self->length)
         return EMPTY;
-    }
 
     return PySequence_Fast_GET_ITEM(self->text, index);
 }
@@ -326,9 +325,8 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 static PyObject*
 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
 {
-    if (delta > self->head) {
+    if (delta > self->head)
         return EMPTY;
-    }
 
     Py_ssize_t index = self->head - delta;
     return PySequence_Fast_GET_ITEM(self->text, index);
@@ -340,7 +338,84 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
 static int
 Tokenizer_parse_template_or_argument(Tokenizer* self)
 {
+    self->head += 2;
+    unsigned int braces = 2, i;
+
+    while (Tokenizer_READ(self, 0) == PU "{") {
+        self->head++;
+        braces++;
+    }
+    Tokenizer_push(self, 0);
+
+    while (braces) {
+        if (braces == 1) {
+            PyObject* text = PyUnicode_FromString("{");
+
+            if (Tokenizer_write_text_then_stack(self, text)) {
+                Py_XDECREF(text);
+                return -1;
+            }
+
+            Py_XDECREF(text);
+            return 0;
+        }
+
+        if (braces == 2) {
+            if (setjmp(exception_env) == BAD_ROUTE) {
+                PyObject* text = PyUnicode_FromString("{{");
+
+                if (Tokenizer_write_text_then_stack(self, text)) {
+                    Py_XDECREF(text);
+                    return -1;
+                }
+
+                Py_XDECREF(text);
+                return 0;
+            } else {
+                Tokenizer_parse_template(self);
+            }
+            break;
+        }
+
+        if (setjmp(exception_env) == BAD_ROUTE) {
+            if (setjmp(exception_env) == BAD_ROUTE) {
+                char bracestr[braces];
+                for (i = 0; i < braces; i++) {
+                        bracestr[i] = *"{";
+                }
+                PyObject* text = PyUnicode_FromString(bracestr);
+
+                if (Tokenizer_write_text_then_stack(self, text)) {
+                    Py_XDECREF(text);
+                    return -1;
+                }
+
+                Py_XDECREF(text);
+                return 0;
+            }
+            else {
+                Tokenizer_parse_template(self);
+                braces -= 2;
+            }
+        }
+        else {
+            Tokenizer_parse_argument(self);
+            braces -= 3;
+        }
+
+        if (braces) {
+            self->head++;
+        }
+    }
 
+    PyObject* tokenlist = Tokenizer_pop(self);
+    if (Tokenizer_write_all(self, tokenlist)) {
+        Py_DECREF(tokenlist);
+        return -1;
+    }
+
+    Py_DECREF(tokenlist);
+    return 0;
 }
 
 /*
@@ -498,8 +573,8 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 {
     Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
 
-    PyObject *this, *next;
-    Py_UNICODE *this_data, *next_data, *next_next_data, *last_data;
+    PyObject *this;
+    Py_UNICODE *this_data, *next, *next_next, *last;
     Py_ssize_t this_context;
     int is_marker, i;
 
@@ -532,18 +607,17 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             return Tokenizer_pop(self);
         }
 
-        next = Tokenizer_read(self, 1);
-        next_data = PyUnicode_AS_UNICODE(next);
+        next = Tokenizer_READ(self, 1);
 
         if (this_context & LC_COMMENT) {
-            if (this_data == next_data && next_data == PU "-") {
-                if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU ">") {
+            if (this_data == next && next == PU "-") {
+                if (Tokenizer_READ(self, 2) == PU ">") {
                     return Tokenizer_pop(self);
                 }
             }
             Tokenizer_write_text(self, this);
         }
-        else if (this_data == next_data && next_data == PU "{") {
+        else if (this_data == next && next == PU "{") {
             Tokenizer_parse_template_or_argument(self);
         }
         else if (this_data == PU "|" && this_context & LC_TEMPLATE) {
@@ -552,19 +626,19 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         else if (this_data == PU "=" && this_context & LC_TEMPLATE_PARAM_KEY) {
             Tokenizer_handle_template_param_value(self);
         }
-        else if (this_data == next_data && next_data == PU "}" && this_context & LC_TEMPLATE) {
+        else if (this_data == next && next == PU "}" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_end(self);
         }
         else if (this_data == PU "|" && this_context & LC_ARGUMENT_NAME) {
             Tokenizer_handle_argument_separator(self);
         }
-        else if (this_data == next_data && next_data == PU "}" && this_context & LC_ARGUMENT) {
-            if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU "}") {
+        else if (this_data == next && next == PU "}" && this_context & LC_ARGUMENT) {
+            if (Tokenizer_READ(self, 2) == PU "}") {
                 return Tokenizer_handle_argument_end(self);
             }
             Tokenizer_write_text(self, this);
         }
-        else if (this_data == next_data && next_data == PU "[") {
+        else if (this_data == next && next == PU "[") {
             if (!(this_context & LC_WIKILINK_TITLE)) {
                 Tokenizer_parse_wikilink(self);
             }
@@ -575,13 +649,12 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         else if (this_data == PU "|" && this_context & LC_WIKILINK_TITLE) {
             Tokenizer_handle_wikilink_separator(self);
         }
-        else if (this_data == next_data && next_data == PU "]" &&
-                 this_context & LC_WIKILINK) {
+        else if (this_data == next && next == PU "]" && this_context & LC_WIKILINK) {
             return Tokenizer_handle_wikilink_end(self);
         }
         else if (this_data == PU "=" && !(self->global & GL_HEADING)) {
-            last_data = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
-            if (last_data == PU "\n" || last_data == PU "") {
+            last = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
+            if (last == PU "\n" || last == PU "") {
                 Tokenizer_parse_heading(self);
             }
             else {
@@ -597,10 +670,9 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         else if (this_data == PU "&") {
             Tokenizer_parse_entity(self);
         }
-        else if (this_data == PU "<" && next_data == PU "!") {
-            next_next_data = PyUnicode_AS_UNICODE(Tokenizer_read(self, 2));
-            if (next_next_data == PyUnicode_AS_UNICODE(Tokenizer_read(self, 3)) &&
-                    next_next_data == PU "-") {
+        else if (this_data == PU "<" && next == PU "!") {
+            next_next = Tokenizer_READ(self, 2);
+            if (next_next == Tokenizer_READ(self, 3) && next_next == PU "-") {
                 Tokenizer_parse_comment(self);
             }
             else {
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index c504dd8..3f7dfdf 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -87,12 +87,13 @@ typedef struct {
 } Tokenizer;
 
 
-/* Some macros for accessing Tokenizer data: */
+/* Macros for accessing Tokenizer data: */
 
 #define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0)
 #define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1)
 #define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t(Tokenizer_CONTEXT(self))
 #define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2)
+#define Tokenizer_READ(self, num) PyUnicode_AS_UNICODE(Tokenizer_read(self, num))
 
 
 /* Tokenizer function prototypes: */
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index ca645b0..364455d 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -162,8 +162,8 @@ class Tokenizer(object):
         self._head += 2
         braces = 2
         while self._read() == "{":
-            braces += 1
             self._head += 1
+            braces += 1
         self._push()
 
         while braces:

From 0d720a7ef13e7e377dd0d47c88d1e68c717e8b2c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 20:35:49 -0400
Subject: [PATCH 010/180] Implement Tokenizer_parse_template(); NOARGS and
 NOKWARGS.

---
 mwparserfromhell/parser/tokenizer.c | 50 +++++++++++++++++++++++++++----------
 mwparserfromhell/parser/tokenizer.h |  2 ++
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 4877773..d9b953b 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -111,29 +111,22 @@ Tokenizer_push_textbuffer(Tokenizer* self)
         PyObject* text = PyUnicode_Join(EMPTY, Tokenizer_TEXTBUFFER(self));
         if (!text) return -1;
 
-        PyObject* klass = PyObject_GetAttrString(tokens, "Text");
-        if (!klass) return -1;
-        PyObject* args = PyTuple_New(0);
-        if (!args) return -1;
+        PyObject* class = PyObject_GetAttrString(tokens, "Text");
+        if (!class) return -1;
         PyObject* kwargs = PyDict_New();
         if (!kwargs) return -1;
         PyDict_SetItemString(kwargs, "text", text);
         Py_DECREF(text);
 
-        PyObject* token = PyInstance_New(klass, args, kwargs);
+        PyObject* token = PyInstance_New(class, NOARGS, kwargs);
+        Py_DECREF(class);
+        Py_DECREF(kwargs);
         if (!token) {
-            Py_DECREF(klass);
-            Py_DECREF(args);
-            Py_DECREF(kwargs);
             return -1;
         }
 
-        Py_DECREF(klass);
-        Py_DECREF(args);
-        Py_DECREF(kwargs);
-
         if (PyList_Append(Tokenizer_STACK(self), token)) {
-            Py_XDECREF(token);
+            Py_DECREF(token);
             return -1;
         }
 
@@ -424,7 +417,36 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
 static int
 Tokenizer_parse_template(Tokenizer* self)
 {
+    Py_ssize_t reset = self->head;
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        self->head = reset;
+        longjmp(exception_env, BAD_ROUTE);
+    }
+    else {
+        PyObject* template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
+        if (!template) return -1;
+
+        PyObject* class = PyObject_GetAttrString(tokens, "TemplateOpen");
+        if (!class) return -1;
+        PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
 
+        Tokenizer_write_first(self, token);
+        Py_DECREF(token);
+
+        Tokenizer_write_all(self, template);
+        Py_DECREF(template);
+
+        class = PyObject_GetAttrString(tokens, "TemplateClose");
+        if (!class) return -1;
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
+
+        Tokenizer_write(self, token);
+        Py_DECREF(token);
+    }
 }
 
 /*
@@ -740,6 +762,8 @@ init_tokenizer(void)
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
     EMPTY = PyUnicode_FromString("");
+    NOARGS = PyTuple_New(0);
+    NOKWARGS = PyDict_New();
 
     PyObject* globals = PyEval_GetGlobals();
     PyObject* locals = PyEval_GetLocals();
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 3f7dfdf..d6c97c8 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -40,6 +40,8 @@ static jmp_buf exception_env;
 static const int BAD_ROUTE = 1;
 
 static PyObject* EMPTY;
+static PyObject* NOARGS;
+static PyObject* NOKWARGS;
 static PyObject* tokens;
 
 

From 849016f73488eb4eee51fb8c0b16f49231e2dc3b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 23 Sep 2012 21:27:08 -0400
Subject: [PATCH 011/180] Implement Tokenizer_verify_safe() and some others.

---
 mwparserfromhell/parser/tokenizer.c | 164 +++++++++++++++++++++++++++++++++---
 mwparserfromhell/parser/tokenizer.h |   2 +-
 2 files changed, 155 insertions(+), 11 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d9b953b..3d3b95f 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -121,9 +121,7 @@ Tokenizer_push_textbuffer(Tokenizer* self)
         PyObject* token = PyInstance_New(class, NOARGS, kwargs);
         Py_DECREF(class);
         Py_DECREF(kwargs);
-        if (!token) {
-            return -1;
-        }
+        if (!token) return -1;
 
         if (PyList_Append(Tokenizer_STACK(self), token)) {
             Py_DECREF(token);
@@ -417,25 +415,34 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
 static int
 Tokenizer_parse_template(Tokenizer* self)
 {
+    PyObject *template, *class, *token;
     Py_ssize_t reset = self->head;
+
     if (setjmp(exception_env) == BAD_ROUTE) {
         self->head = reset;
         longjmp(exception_env, BAD_ROUTE);
     }
+
     else {
-        PyObject* template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
+        template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
         if (!template) return -1;
 
-        PyObject* class = PyObject_GetAttrString(tokens, "TemplateOpen");
+        class = PyObject_GetAttrString(tokens, "TemplateOpen");
         if (!class) return -1;
-        PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
         Py_DECREF(class);
         if (!token) return -1;
 
-        Tokenizer_write_first(self, token);
+        if (Tokenizer_write_first(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
         Py_DECREF(token);
 
-        Tokenizer_write_all(self, template);
+        if (Tokenizer_write_all(self, template)) {
+            Py_DECREF(template);
+            return -1;
+        }
         Py_DECREF(template);
 
         class = PyObject_GetAttrString(tokens, "TemplateClose");
@@ -444,9 +451,14 @@ Tokenizer_parse_template(Tokenizer* self)
         Py_DECREF(class);
         if (!token) return -1;
 
-        Tokenizer_write(self, token);
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
         Py_DECREF(token);
     }
+
+    return 0;
 }
 
 /*
@@ -455,7 +467,50 @@ Tokenizer_parse_template(Tokenizer* self)
 static int
 Tokenizer_parse_argument(Tokenizer* self)
 {
+    PyObject *argument, *class, *token;
+    Py_ssize_t reset = self->head;
+
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        self->head = reset;
+        longjmp(exception_env, BAD_ROUTE);
+    }
+
+    else {
+        argument = Tokenizer_parse(self, LC_ARGUMENT_NAME);
+        if (!argument) return -1;
+
+        class = PyObject_GetAttrString(tokens, "ArgumentOpen");
+        if (!class) return -1;
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
+
+        if (Tokenizer_write_first(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
+        Py_DECREF(token);
+
+        if (Tokenizer_write_all(self, argument)) {
+            Py_DECREF(argument);
+            return -1;
+        }
+        Py_DECREF(argument);
+
+        class = PyObject_GetAttrString(tokens, "ArgumentClose");
+        if (!class) return -1;
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
 
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
+        Py_DECREF(token);
+    }
+
+    return 0;
 }
 
 /*
@@ -465,9 +520,98 @@ Tokenizer_parse_argument(Tokenizer* self)
     or parameter key, which cannot contain newlines.
 */
 static int
-Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[])
+Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
 {
+    if (Tokenizer_push_textbuffer(self))
+        return -1;
 
+    PyObject* stack = Tokenizer_STACK(self);
+    if (stack) {
+        PyObject* textlist = PyList_New(0);
+        if (!textlist) return -1;
+
+        PyObject* class = PyObject_GetAttrString(tokens, "Text");
+        if (!class) {
+            Py_DECREF(textlist);
+            return -1;
+        }
+
+        int i;
+        Py_ssize_t length = PySequence_Fast_GET_SIZE(stack);
+        PyObject *token, *textdata;
+
+        for (i = 0; i < length; i++) {
+            token = PySequence_Fast_GET_ITEM(stack, i);
+            switch (PyObject_IsInstance(token, class)) {
+                case -1:
+                    Py_DECREF(textlist);
+                    Py_DECREF(class);
+                    return -1;
+                case 0:
+                    break;
+                case 1:
+                    textdata = PyObject_GetAttrString(token, "text");
+                    if (!textdata) {
+                        Py_DECREF(textlist);
+                        Py_DECREF(class);
+                        return -1;
+                    }
+                    if (PyList_Append(textlist, textdata)) {
+                        Py_DECREF(textlist);
+                        Py_DECREF(class);
+                        Py_DECREF(textdata);
+                        return -1;
+                    }
+                    Py_DECREF(textdata);
+            }
+        }
+        Py_DECREF(class);
+
+        PyObject* text = PyUnicode_Join(EMPTY, textlist);
+        if (!text) {
+            Py_DECREF(textlist);
+            return -1;
+        }
+        Py_DECREF(textlist);
+
+        PyObject* stripped = PyObject_CallMethod(text, "strip", NULL);
+        if (!stripped) {
+            Py_DECREF(text);
+            return -1;
+        }
+        Py_DECREF(text);
+
+        const char* unsafe_char;
+        PyObject* unsafe;
+        i = 0;
+        while (1) {
+            unsafe_char = unsafes[i];
+            if (!unsafe_char) break;
+
+            unsafe = PyUnicode_FromString(unsafe_char);
+
+            if (!unsafe) {
+                Py_DECREF(stripped);
+                return -1;
+            }
+
+            switch (PyUnicode_Contains(stripped, unsafe)) {
+                case -1:
+                    Py_DECREF(stripped);
+                    Py_DECREF(unsafe);
+                    return -1;
+                case 0:
+                    break;
+                case 1:
+                    Py_DECREF(stripped);
+                    Py_DECREF(unsafe);
+                    Tokenizer_fail_route(self);
+            }
+            i++;
+        }
+    }
+
+    return 0;
 }
 
 /*
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index d6c97c8..951e238 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -121,7 +121,7 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta);
 static int Tokenizer_parse_template_or_argument(Tokenizer* self);
 static int Tokenizer_parse_template(Tokenizer* self);
 static int Tokenizer_parse_argument(Tokenizer* self);
-static int Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[]);
+static int Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]);
 static int Tokenizer_handle_template_param(Tokenizer* self);
 static int Tokenizer_handle_template_param_value(Tokenizer* self);
 static PyObject* Tokenizer_handle_template_end(Tokenizer* self);

From 17af353fb652e01eb61584c0f5c6248edd17e9be Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 24 Sep 2012 21:18:44 -0400
Subject: [PATCH 012/180] Implement Tokenizer_handle_template_param().

---
 mwparserfromhell/parser/tokenizer.c | 62 +++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 3d3b95f..3ab2437 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -206,7 +206,8 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
 static void
 Tokenizer_fail_route(Tokenizer* self)
 {
-    Tokenizer_pop(self);
+    PyObject* stack = Tokenizer_pop(self);
+    Py_XDECREF(stack);
     longjmp(exception_env, BAD_ROUTE);
 }
 
@@ -400,6 +401,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
     }
 
     PyObject* tokenlist = Tokenizer_pop(self);
+    if (!tokenlist) return -1;
     if (Tokenizer_write_all(self, tokenlist)) {
         Py_DECREF(tokenlist);
         return -1;
@@ -543,10 +545,6 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
         for (i = 0; i < length; i++) {
             token = PySequence_Fast_GET_ITEM(stack, i);
             switch (PyObject_IsInstance(token, class)) {
-                case -1:
-                    Py_DECREF(textlist);
-                    Py_DECREF(class);
-                    return -1;
                 case 0:
                     break;
                 case 1:
@@ -563,6 +561,11 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
                         return -1;
                     }
                     Py_DECREF(textdata);
+                    break;
+                case -1:
+                    Py_DECREF(textlist);
+                    Py_DECREF(class);
+                    return -1;
             }
         }
         Py_DECREF(class);
@@ -596,16 +599,17 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
             }
 
             switch (PyUnicode_Contains(stripped, unsafe)) {
-                case -1:
-                    Py_DECREF(stripped);
-                    Py_DECREF(unsafe);
-                    return -1;
                 case 0:
                     break;
                 case 1:
                     Py_DECREF(stripped);
                     Py_DECREF(unsafe);
                     Tokenizer_fail_route(self);
+                    break;
+                case -1:
+                    Py_DECREF(stripped);
+                    Py_DECREF(unsafe);
+                    return -1;
             }
             i++;
         }
@@ -620,7 +624,47 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
 static int
 Tokenizer_handle_template_param(Tokenizer* self)
 {
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
 
+    if (context & LC_TEMPLATE_NAME) {
+        if (Tokenizer_verify_safe(self, {"\n", "{", "}", "[", "]"}))
+            return -1;
+        if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME))
+            return -1;
+    }
+    else if (context & LC_TEMPLATE_PARAM_VALUE) {
+        if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_PARAM_VALUE))
+            return -1;
+    }
+
+    if (context & LC_TEMPLATE_PARAM_KEY) {
+        PyObject* stack = Tokenizer_pop_keeping_context(self);
+        if (!stack) return -1;
+        if (Tokenizer_write_all(stack)) {
+            Py_DECREF(stack);
+            return -1;
+        }
+        Py_DECREF(stack);
+    }
+    else {
+        if (Tokenizer_set_context(self, context | LC_TEMPLATE_PARAM_KEY))
+            return -1;
+    }
+
+    class = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
+    if (!class) return -1;
+    token = PyInstance_New(class, NOARGS, NOKWARGS);
+    Py_DECREF(class);
+    if (!token) return -1;
+
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
+
+    Tokenizer_push(self, Tokenizer_CONTEXT_VAL(self));
+    return 0;
 }
 
 /*

From 41535992a1a3488724435f4482642c6aa40bca45 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 17:09:27 -0400
Subject: [PATCH 013/180] Implement Tokenizer_handle_template_param_value().

---
 mwparserfromhell/parser/tokenizer.c  | 45 ++++++++++++++++++++++++++++++++----
 mwparserfromhell/parser/tokenizer.py | 17 ++++++--------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 3ab2437..e7699fd 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -627,7 +627,8 @@ Tokenizer_handle_template_param(Tokenizer* self)
     Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
 
     if (context & LC_TEMPLATE_NAME) {
-        if (Tokenizer_verify_safe(self, {"\n", "{", "}", "[", "]"}))
+        const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+        if (Tokenizer_verify_safe(self, unsafes))
             return -1;
         if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME))
             return -1;
@@ -640,7 +641,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
     if (context & LC_TEMPLATE_PARAM_KEY) {
         PyObject* stack = Tokenizer_pop_keeping_context(self);
         if (!stack) return -1;
-        if (Tokenizer_write_all(stack)) {
+        if (Tokenizer_write_all(self, stack)) {
             Py_DECREF(stack);
             return -1;
         }
@@ -651,9 +652,9 @@ Tokenizer_handle_template_param(Tokenizer* self)
             return -1;
     }
 
-    class = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
+    PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
     if (!class) return -1;
-    token = PyInstance_New(class, NOARGS, NOKWARGS);
+    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
     Py_DECREF(class);
     if (!token) return -1;
 
@@ -673,7 +674,43 @@ Tokenizer_handle_template_param(Tokenizer* self)
 static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        PyObject* stack = Tokenizer_pop(self);
+        Py_XDECREF(stack);
+        longjmp(exception_env, BAD_ROUTE);
+    }
+
+    else {
+        const char* unsafes[] = {"\n", "{{", "}}"};
+        if (Tokenizer_verify_safe(self, unsafes))
+            return -1;
+    }
+
+    PyObject* stack = Tokenizer_pop_keeping_context(self);
+    if (!stack) return -1;
+    if (Tokenizer_write_all(self, stack)) {
+        Py_DECREF(stack);
+        return -1;
+    }
+    Py_DECREF(stack);
 
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
+    context ^= LC_TEMPLATE_PARAM_KEY;
+    context |= LC_TEMPLATE_PARAM_VALUE;
+    if (Tokenizer_set_context(self, context))
+        return -1;
+
+    PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamEquals");
+    if (!class) return -1;
+    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    Py_DECREF(class);
+    if (!token) return -1;
+
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
 }
 
 /*
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 364455d..508344e 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -197,10 +197,9 @@ class Tokenizer(object):
         except BadRoute:
             self._head = reset
             raise
-        else:
-            self._write_first(tokens.TemplateOpen())
-            self._write_all(template)
-            self._write(tokens.TemplateClose())
+        self._write_first(tokens.TemplateOpen())
+        self._write_all(template)
+        self._write(tokens.TemplateClose())
 
     def _parse_argument(self):
         """Parse an argument at the head of the wikicode string."""
@@ -210,10 +209,9 @@ class Tokenizer(object):
         except BadRoute:
             self._head = reset
             raise
-        else:
-            self._write_first(tokens.ArgumentOpen())
-            self._write_all(argument)
-            self._write(tokens.ArgumentClose())
+        self._write_first(tokens.ArgumentOpen())
+        self._write_all(argument)
+        self._write(tokens.ArgumentClose())
 
     def _verify_safe(self, unsafes):
         """Verify that there are no unsafe characters in the current stack.
@@ -249,8 +247,7 @@ class Tokenizer(object):
         except BadRoute:
             self._pop()
             raise
-        else:
-            self._write_all(self._pop(keep_context=True))
+        self._write_all(self._pop(keep_context=True))
         self._context ^= contexts.TEMPLATE_PARAM_KEY
         self._context |= contexts.TEMPLATE_PARAM_VALUE
         self._write(tokens.TemplateParamEquals())

From f401ede179b469118ac936a8646e5f5a3be128d4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 17:32:43 -0400
Subject: [PATCH 014/180] Implementing more stuff.

---
 mwparserfromhell/parser/tokenizer.c | 84 +++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index e7699fd..b895f6c 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -259,6 +259,39 @@ Tokenizer_write_text(Tokenizer* self, PyObject* text)
 static int
 Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 {
+    if (PySequence_Fast_GET_SIZE(tokenlist) > 0) {
+        PyObject* token = PySequence_Fast_GET_ITEM(tokenlist, 0);
+        PyObject* class = PyObject_GetAttrString(tokens, "Text");
+        if (!class) return -1;
+
+        switch (PyObject_IsInstance(token, class)) {
+            case 0:
+                break;
+            case 1:
+                PyObject* text = PyObject_GetAttrString(token, "text");
+                if (!text) {
+                    Py_DECREF(class);
+                    return -1;
+                }
+                if (PySequence_DelItem(tokenlist, 0)) {
+                    Py_DECREF(text);
+                    Py_DECREF(class);
+                    return -1;
+                }
+                if (Tokenizer_write_text(self, text)) {
+                    Py_DECREF(text);
+                    Py_DECREF(class);
+                    return -1;
+                }
+                Py_DECREF(text);
+                break
+            case -1:
+                Py_DECREF(class);
+                return -1;
+        }
+        Py_DECREF(class);
+    }
+
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
@@ -711,6 +744,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
         return -1;
     }
     Py_DECREF(token);
+    return 0;
 }
 
 /*
@@ -719,7 +753,27 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_template_end(Tokenizer* self)
 {
+    PyObject* stack;
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
 
+    if (context & LC_TEMPLATE_NAME) {
+        const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+        if (Tokenizer_verify_safe(self, unsafes))
+            return NULL;
+    }
+    else if (context & LC_TEMPLATE_PARAM_KEY) {
+        stack = Tokenizer_pop_keeping_context(self);
+        if (!stack) return NULL;
+        if (Tokenizer_write_all(self, stack)) {
+            Py_DECREF(stack);
+            return NULL;
+        }
+        Py_DECREF(stack);
+    }
+
+    self->head++;
+    stack = Tokenizer_pop(self);
+    return stack;
 }
 
 /*
@@ -728,7 +782,28 @@ Tokenizer_handle_template_end(Tokenizer* self)
 static int
 Tokenizer_handle_argument_separator(Tokenizer* self)
 {
+    const char* unsafes[] = {"\n", "{{", "}}"};
+    if (Tokenizer_verify_safe(self, unsafes))
+        return -1;
+
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
+    context ^= LC_ARGUMENT_NAME;
+    context |= LC_ARGUMENT_DEFAULT;
+    if (Tokenizer_set_context(self, context))
+        return -1;
+
+    PyObject* class = PyObject_GetAttrString(tokens, "ArgumentSeparator");
+    if (!class) return -1;
+    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    Py_DECREF(class);
+    if (!token) return -1;
 
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
+    return 0;
 }
 
 /*
@@ -737,7 +812,16 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
+    if (context & LC_ARGUMENT_NAME) {
+        const char* unsafes[] = {"\n", "{{", "}}"};
+        if (Tokenizer_verify_safe(self, unsafes))
+            return NULL;
+    }
 
+    self->head += 2;
+    PyObject* stack = Tokenizer_pop(self);
+    return stack;
 }
 
 /*

From 707ecc383740165096d74c471e5f1b739f752f71 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 17:51:23 -0400
Subject: [PATCH 015/180] Implement Tokenizer_parse_wikilink() and more.

---
 mwparserfromhell/parser/tokenizer.c | 83 ++++++++++++++++++++++++++++++++++---
 1 file changed, 77 insertions(+), 6 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index b895f6c..9068d94 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -264,11 +264,12 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
         PyObject* class = PyObject_GetAttrString(tokens, "Text");
         if (!class) return -1;
 
+        PyObject* text;
         switch (PyObject_IsInstance(token, class)) {
             case 0:
                 break;
             case 1:
-                PyObject* text = PyObject_GetAttrString(token, "text");
+                text = PyObject_GetAttrString(token, "text");
                 if (!text) {
                     Py_DECREF(class);
                     return -1;
@@ -284,7 +285,7 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
                     return -1;
                 }
                 Py_DECREF(text);
-                break
+                break;
             case -1:
                 Py_DECREF(class);
                 return -1;
@@ -463,13 +464,20 @@ Tokenizer_parse_template(Tokenizer* self)
         if (!template) return -1;
 
         class = PyObject_GetAttrString(tokens, "TemplateOpen");
-        if (!class) return -1;
+        if (!class) {
+            Py_DECREF(template);
+            return -1;
+        }
         token = PyInstance_New(class, NOARGS, NOKWARGS);
         Py_DECREF(class);
-        if (!token) return -1;
+        if (!token) {
+            Py_DECREF(template);
+            return -1;
+        }
 
         if (Tokenizer_write_first(self, token)) {
             Py_DECREF(token);
+            Py_DECREF(template);
             return -1;
         }
         Py_DECREF(token);
@@ -515,13 +523,20 @@ Tokenizer_parse_argument(Tokenizer* self)
         if (!argument) return -1;
 
         class = PyObject_GetAttrString(tokens, "ArgumentOpen");
-        if (!class) return -1;
+        if (!class) {
+            Py_DECREF(argument);
+            return -1;
+        }
         token = PyInstance_New(class, NOARGS, NOKWARGS);
         Py_DECREF(class);
-        if (!token) return -1;
+        if (!token) {
+            Py_DECREF(argument);
+            return -1;
+        }
 
         if (Tokenizer_write_first(self, token)) {
             Py_DECREF(token);
+            Py_DECREF(argument);
             return -1;
         }
         Py_DECREF(token);
@@ -830,7 +845,63 @@ Tokenizer_handle_argument_end(Tokenizer* self)
 static int
 Tokenizer_parse_wikilink(Tokenizer* self)
 {
+    self->head += 2;
+    Py_ssize_t reset = self->head - 1;
+
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        self->head = reset;
+        PyObject* text = PyUnicode_FromString("[[");
+        if (!text) return -1;
+        if (Tokenizer_write_text(self, text)) {
+            Py_XDECREF(text);
+            return -1;
+        }
+    }
+
+    else {
+        PyObject *class, *token;
+        PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+        if (!wikilink) return -1;
+
+        class = PyObject_GetAttrString(tokens, "WikilinkOpen");
+        if (!class) {
+            Py_DECREF(wikilink);
+            return -1;
+        }
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) {
+            Py_DECREF(wikilink);
+            return -1;
+        }
+
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            Py_DECREF(wikilink);
+            return -1;
+        }
+        Py_DECREF(token);
 
+        if (Tokenizer_write_all(self, wikilink)) {
+            Py_DECREF(wikilink);
+            return -1;
+        }
+        Py_DECREF(wikilink);
+
+        class = PyObject_GetAttrString(tokens, "WikilinkClose");
+        if (!class) return -1;
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
+
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
+        Py_DECREF(token);
+    }
+
+    return 0;
 }
 
 /*

From 7c29a2a65e253ad5a9473fe7fc65786666889d1a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 17:54:38 -0400
Subject: [PATCH 016/180] Implement
 Tokenizer_handle_wikilink_separator()/_end().

---
 mwparserfromhell/parser/tokenizer.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 9068d94..907c55e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -827,8 +827,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-    if (context & LC_ARGUMENT_NAME) {
+    if (Tokenizer_CONTEXT_VAL(self) & LC_ARGUMENT_NAME) {
         const char* unsafes[] = {"\n", "{{", "}}"};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
@@ -910,7 +909,28 @@ Tokenizer_parse_wikilink(Tokenizer* self)
 static int
 Tokenizer_handle_wikilink_separator(Tokenizer* self)
 {
+    const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+    if (Tokenizer_verify_safe(self, unsafes))
+        return -1;
+
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
+    context ^= LC_WIKILINK_TITLE;
+    context |= LC_WIKILINK_TEXT;
+    if (Tokenizer_set_context(self, context))
+        return -1;
+
+    PyObject* class = PyObject_GetAttrString(tokens, "WikilinkSeparator");
+    if (!class) return -1;
+    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    Py_DECREF(class);
+    if (!token) return -1;
 
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
+    return 0;
 }
 
 /*
@@ -919,7 +939,15 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_wikilink_end(Tokenizer* self)
 {
+    if (Tokenizer_CONTEXT_VAL(self) & LC_WIKILINK_TITLE) {
+        const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+        if (Tokenizer_verify_safe(self, unsafes))
+            return NULL;
+    }
 
+    self->head += 1;
+    PyObject* stack = Tokenizer_pop(self);
+    return stack;
 }
 
 /*

From 150f3311290a8569eb960084e070eb23f6e70c3c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 18:11:29 -0400
Subject: [PATCH 017/180] Implement Tokenizer_parse_entity(),
 Tokenizer_parse_comment().

---
 mwparserfromhell/parser/tokenizer.c | 79 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 907c55e..d302ea2 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -856,7 +856,6 @@ Tokenizer_parse_wikilink(Tokenizer* self)
             return -1;
         }
     }
-
     else {
         PyObject *class, *token;
         PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
@@ -899,7 +898,6 @@ Tokenizer_parse_wikilink(Tokenizer* self)
         }
         Py_DECREF(token);
     }
-
     return 0;
 }
 
@@ -983,7 +981,29 @@ Tokenizer_really_parse_entity(Tokenizer* self)
 static int
 Tokenizer_parse_entity(Tokenizer* self)
 {
+    Py_ssize_t reset = self->head;
+    if (Tokenizer_push(self, 0))
+        return -1;
 
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        self->head = reset;
+        if (Tokenizer_write_text(self, Tokenizer_read(self, 0)))
+            return -1;
+    }
+    else {
+        if (Tokenizer_really_parse_entity(self))
+            return -1;
+
+        PyObject* tokenlist = Tokenizer_pop(self);
+        if (!tokenlist) return -1;
+        if (Tokenizer_write_all(self, tokenlist)) {
+            Py_DECREF(tokenlist);
+            return -1;
+        }
+
+        Py_DECREF(tokenlist);
+    }
+    return 0;
 }
 
 /*
@@ -992,7 +1012,62 @@ Tokenizer_parse_entity(Tokenizer* self)
 static int
 Tokenizer_parse_comment(Tokenizer* self)
 {
+    self->head += 4;
+    Py_ssize_t reset = self->head - 1;
 
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        self->head = reset;
+        PyObject* text = PyUnicode_FromString("<!--");
+        if (!text) return -1;
+        if (Tokenizer_write_text(self, text)) {
+            Py_XDECREF(text);
+            return -1;
+        }
+    }
+    else {
+        PyObject *class, *token;
+        PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+        if (!comment) return -1;
+
+        class = PyObject_GetAttrString(tokens, "CommentStart");
+        if (!class) {
+            Py_DECREF(comment);
+            return -1;
+        }
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) {
+            Py_DECREF(comment);
+            return -1;
+        }
+
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            Py_DECREF(comment);
+            return -1;
+        }
+        Py_DECREF(token);
+
+        if (Tokenizer_write_all(self, comment)) {
+            Py_DECREF(comment);
+            return -1;
+        }
+        Py_DECREF(comment);
+
+        class = PyObject_GetAttrString(tokens, "CommentEnd");
+        if (!class) return -1;
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
+
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
+        Py_DECREF(token);
+        self->head += 2;
+    }
+    return 0;
 }
 
 /*

From 48188bfa99a01a52dcde7adb97ae03759987e59e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 19:12:04 -0400
Subject: [PATCH 018/180] Implement Tokenizer_parse_heading(), some other
 stuff.

---
 mwparserfromhell/parser/tokenizer.c | 122 +++++++++++++++++++++++++++++++++---
 mwparserfromhell/parser/tokenizer.h |  10 ++-
 2 files changed, 124 insertions(+), 8 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d302ea2..a17ec69 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -112,9 +112,16 @@ Tokenizer_push_textbuffer(Tokenizer* self)
         if (!text) return -1;
 
         PyObject* class = PyObject_GetAttrString(tokens, "Text");
-        if (!class) return -1;
+        if (!class) {
+            Py_DECREF(text);
+            return -1;
+        }
         PyObject* kwargs = PyDict_New();
-        if (!kwargs) return -1;
+        if (!kwargs) {
+            Py_DECREF(class);
+            Py_DECREF(text);
+            return -1;
+        }
         PyDict_SetItemString(kwargs, "text", text);
         Py_DECREF(text);
 
@@ -406,9 +413,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
         if (setjmp(exception_env) == BAD_ROUTE) {
             if (setjmp(exception_env) == BAD_ROUTE) {
                 char bracestr[braces];
-                for (i = 0; i < braces; i++) {
-                        bracestr[i] = *"{";
-                }
+                for (i = 0; i < braces; i++) bracestr[i] = *"{";
                 PyObject* text = PyUnicode_FromString(bracestr);
 
                 if (Tokenizer_write_text_then_stack(self, text)) {
@@ -954,13 +959,116 @@ Tokenizer_handle_wikilink_end(Tokenizer* self)
 static int
 Tokenizer_parse_heading(Tokenizer* self)
 {
+    self->global |= GL_HEADING;
+    Py_ssize_t reset = self->head;
+    self->head += 1;
+    Py_ssize_t best = 1, i;
+    PyObject* text;
+
+    while (Tokenizer_READ(self, 0) == PU "=") {
+        best++;
+        self->head++;
+    }
+
+    Py_ssize_t context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
+
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        self->head = reset + best - 1;
+        char blocks[best];
+        for (i = 0; i < best; i++) blocks[i] = *"{";
+        text = PyUnicode_FromString(blocks);
+        if (!text) return -1;
+
+        if (Tokenizer_write_text_then_stack(self, text)) {
+            Py_DECREF(text);
+            return -1;
+        }
+        Py_DECREF(text);
+        self->global ^= GL_HEADING;
+    }
+    else {
+        HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
+        if (!heading) return -1;
+
+        PyObject* level = PyInt_FromSsize_t(heading->level);
+        if (!level) {
+            Py_DECREF(heading->title);
+            return -1;
+        }
+
+        PyObject* class = PyObject_GetAttrString(tokens, "HeadingStart");
+        if (!class) {
+            Py_DECREF(level);
+            Py_DECREF(heading->title);
+            return -1;
+        }
+        PyObject* kwargs = PyDict_New();
+        if (!kwargs) {
+            Py_DECREF(class);
+            Py_DECREF(level);
+            Py_DECREF(heading->title);
+            return -1;
+        }
+        PyDict_SetItemString(kwargs, "level", level);
+        Py_DECREF(level);
+
+        PyObject* token = PyInstance_New(class, NOARGS, kwargs);
+        Py_DECREF(class);
+        Py_DECREF(kwargs);
+        if (!token) return -1;
+
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            Py_DECREF(heading->title);
+            return -1;
+        }
+        Py_DECREF(token);
 
+        if (heading->level < best) {
+            Py_ssize_t diff = best - heading->level;
+            char diffblocks[diff];
+            for (i = 0; i < diff; i++) diffblocks[i] = *"{";
+            PyObject* text = PyUnicode_FromString(diffblocks);
+            if (!text) {
+                Py_DECREF(heading->title);
+                return -1;
+            }
+
+            if (Tokenizer_write_text_then_stack(self, text)) {
+                Py_DECREF(text);
+                Py_DECREF(heading->title);
+                return -1;
+            }
+            Py_DECREF(text);
+        }
+
+        if (Tokenizer_write_all(self, heading->title)) {
+            Py_DECREF(heading->title);
+            return -1;
+        }
+        Py_DECREF(heading->title);
+
+        class = PyObject_GetAttrString(tokens, "HeadingEnd");
+        if (!class) return -1;
+        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        Py_DECREF(class);
+        if (!token) return -1;
+
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
+        Py_DECREF(token);
+
+        self->global ^= GL_HEADING;
+    }
+    return 0;
 }
 
 /*
     Handle the end of a section heading at the head of the string.
 */
-static PyObject*
+static HeadingData*
 Tokenizer_handle_heading_end(Tokenizer* self)
 {
 
@@ -1167,7 +1275,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             }
         }
         else if (this_data == PU "=" && this_context & LC_HEADING) {
-            return Tokenizer_handle_heading_end(self);
+            return (PyObject*) Tokenizer_handle_heading_end(self);
         }
         else if (this_data == PU "\n" && this_context & LC_HEADING) {
             Tokenizer_fail_route(self);
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 951e238..3a87a37 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -76,6 +76,14 @@ static const Py_ssize_t LC_COMMENT =              0x2000;
 static const Py_ssize_t GL_HEADING = 0x1;
 
 
+/* Miscellaneous structs: */
+
+typedef struct {
+    PyObject* title;
+    Py_ssize_t level;
+} HeadingData;
+
+
 /* Tokenizer object definition: */
 
 typedef struct {
@@ -131,7 +139,7 @@ static int Tokenizer_parse_wikilink(Tokenizer* self);
 static int Tokenizer_handle_wikilink_separator(Tokenizer* self);
 static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self);
 static int Tokenizer_parse_heading(Tokenizer* self);
-static PyObject* Tokenizer_handle_heading_end(Tokenizer* self);
+static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self);
 static int Tokenizer_really_parse_entity(Tokenizer* self);
 static int Tokenizer_parse_entity(Tokenizer* self);
 static int Tokenizer_parse_comment(Tokenizer* self);

From c50de647e25e146c0d065d7c33f1c9a0cd6a0315 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 25 Sep 2012 20:17:41 -0400
Subject: [PATCH 019/180] Implement Tokenizer_handle_heading_end() and malloc
 for HeadingData.

---
 mwparserfromhell/parser/tokenizer.c | 95 ++++++++++++++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 6 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index a17ec69..31bebe8 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -962,8 +962,9 @@ Tokenizer_parse_heading(Tokenizer* self)
     self->global |= GL_HEADING;
     Py_ssize_t reset = self->head;
     self->head += 1;
-    Py_ssize_t best = 1, i;
+    Py_ssize_t best = 1;
     PyObject* text;
+    int i;
 
     while (Tokenizer_READ(self, 0) == PU "=") {
         best++;
@@ -988,11 +989,11 @@ Tokenizer_parse_heading(Tokenizer* self)
     }
     else {
         HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
-        if (!heading) return -1;
 
         PyObject* level = PyInt_FromSsize_t(heading->level);
         if (!level) {
             Py_DECREF(heading->title);
+            free(heading);
             return -1;
         }
 
@@ -1000,6 +1001,7 @@ Tokenizer_parse_heading(Tokenizer* self)
         if (!class) {
             Py_DECREF(level);
             Py_DECREF(heading->title);
+            free(heading);
             return -1;
         }
         PyObject* kwargs = PyDict_New();
@@ -1007,6 +1009,7 @@ Tokenizer_parse_heading(Tokenizer* self)
             Py_DECREF(class);
             Py_DECREF(level);
             Py_DECREF(heading->title);
+            free(heading);
             return -1;
         }
         PyDict_SetItemString(kwargs, "level", level);
@@ -1015,11 +1018,16 @@ Tokenizer_parse_heading(Tokenizer* self)
         PyObject* token = PyInstance_New(class, NOARGS, kwargs);
         Py_DECREF(class);
         Py_DECREF(kwargs);
-        if (!token) return -1;
+        if (!token) {
+            Py_DECREF(heading->title);
+            free(heading);
+            return -1;
+        }
 
         if (Tokenizer_write(self, token)) {
             Py_DECREF(token);
             Py_DECREF(heading->title);
+            free(heading);
             return -1;
         }
         Py_DECREF(token);
@@ -1027,16 +1035,18 @@ Tokenizer_parse_heading(Tokenizer* self)
         if (heading->level < best) {
             Py_ssize_t diff = best - heading->level;
             char diffblocks[diff];
-            for (i = 0; i < diff; i++) diffblocks[i] = *"{";
+            for (i = 0; i < diff; i++) diffblocks[i] = *"=";
             PyObject* text = PyUnicode_FromString(diffblocks);
             if (!text) {
                 Py_DECREF(heading->title);
+                free(heading);
                 return -1;
             }
 
             if (Tokenizer_write_text_then_stack(self, text)) {
                 Py_DECREF(text);
                 Py_DECREF(heading->title);
+                free(heading);
                 return -1;
             }
             Py_DECREF(text);
@@ -1044,9 +1054,11 @@ Tokenizer_parse_heading(Tokenizer* self)
 
         if (Tokenizer_write_all(self, heading->title)) {
             Py_DECREF(heading->title);
+            free(heading);
             return -1;
         }
         Py_DECREF(heading->title);
+        free(heading);
 
         class = PyObject_GetAttrString(tokens, "HeadingEnd");
         if (!class) return -1;
@@ -1071,7 +1083,79 @@ Tokenizer_parse_heading(Tokenizer* self)
 static HeadingData*
 Tokenizer_handle_heading_end(Tokenizer* self)
 {
+    Py_ssize_t reset = self->head;
+    self->head += 1;
+    Py_ssize_t best = 1;
+    PyObject* text;
+    int i;
+
+    while (Tokenizer_READ(self, 0) == PU "=") {
+        best++;
+        self->head++;
+    }
+
+    Py_ssize_t current = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);       // FIXME
+    Py_ssize_t level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
+
+    if (setjmp(exception_env) == BAD_ROUTE) {
+        if (level < best) {
+            Py_ssize_t diff = best - level;
+            char diffblocks[diff];
+            for (i = 0; i < diff; i++) diffblocks[i] = *"=";
+            text = PyUnicode_FromString(diffblocks);
+            if (!text) return NULL;
+
+            if (Tokenizer_write_text_then_stack(self, text)) {
+                Py_DECREF(text);
+                return NULL;
+            }
+            Py_DECREF(text);
+        }
+
+        self->head = reset + best - 1;
+    }
+    else {
+        Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
+        HeadingData* after = (HeadingData*) Tokenizer_parse(self, context);
 
+        char blocks[best];
+        for (i = 0; i < best; i++) blocks[i] = *"=";
+        text = PyUnicode_FromString(blocks);
+        if (!text) {
+            Py_DECREF(after->title);
+            free(after);
+            return NULL;
+        }
+
+        if (Tokenizer_write_text_then_stack(self, text)) {
+            Py_DECREF(text);
+            Py_DECREF(after->title);
+            free(after);
+            return NULL;
+        }
+        Py_DECREF(text);
+
+        if (Tokenizer_write_all(self, after->title)) {
+            Py_DECREF(after->title);
+            free(after);
+            return NULL;
+        }
+        Py_DECREF(after->title);
+        level = after->level;
+        free(after);
+    }
+
+    PyObject* stack = Tokenizer_pop(self);
+    if (!stack) return NULL;
+
+    HeadingData* heading = malloc(sizeof(HeadingData));
+    if (!heading) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    heading->title = stack;
+    heading->level = level;
+    return heading;
 }
 
 /*
@@ -1184,11 +1268,10 @@ Tokenizer_parse_comment(Tokenizer* self)
 static PyObject*
 Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 {
-    Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
-
     PyObject *this;
     Py_UNICODE *this_data, *next, *next_next, *last;
     Py_ssize_t this_context;
+    Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
     int is_marker, i;
 
     Tokenizer_push(self, context);

From bca09e16871d945f6415b582d8e6ab0c0ab07839 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 27 Sep 2012 20:07:21 -0400
Subject: [PATCH 020/180] Bugfix for Python 2.6.

---
 mwparserfromhell/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index e558177..a1b6b8f 100755
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -10,7 +10,7 @@ types are meant to be imported directly from within the parser's modules.
 
 import sys
 
-py3k = sys.version_info.major == 3
+py3k = sys.version_info[0] == 3
 
 if py3k:
     bytes = bytes

From 17a09e395aa9e42017195c5790566f8cba249853 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 29 Sep 2012 12:13:31 -0400
Subject: [PATCH 021/180] Fix some bugs.

---
 mwparserfromhell/parser/tokenizer.c | 59 +++++++++++++++++------------
 mwparserfromhell/parser/tokenizer.h | 75 +++++++++++++++++++------------------
 2 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 31bebe8..7ba7472 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -26,25 +26,7 @@ SOFTWARE.
 static PyObject*
 Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
 {
-    Tokenizer *self;
-
-    self = (Tokenizer*) type->tp_alloc(type, 0);
-    if (self != NULL) {
-
-        self->text = Py_None;
-        Py_INCREF(Py_None);
-
-        self->stacks = PyList_New(0);
-        if (!self->stacks) {
-            Py_DECREF(self);
-            return NULL;
-        }
-
-        self->head = 0;
-        self->length = 0;
-        self->global = 0;
-    }
-
+    Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0);
     return (PyObject*) self;
 }
 
@@ -63,6 +45,22 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
     static char* kwlist[] = {NULL};
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist))
         return -1;
+
+    self->text = Py_None;
+    self->topstack = Py_None;
+    Py_INCREF(Py_None);
+    Py_INCREF(Py_None);
+
+    self->stacks = PyList_New(0);
+    if (!self->stacks) {
+        Py_DECREF(self);
+        return -1;
+    }
+
+    self->head = 0;
+    self->length = 0;
+    self->global = 0;
+
     return 0;
 }
 
@@ -89,6 +87,7 @@ static int
 Tokenizer_push(Tokenizer* self, Py_ssize_t context)
 {
     PyObject* top = PyList_New(3);
+    if (!top) return -1;
     PyList_SET_ITEM(top, 0, PyList_New(0));
     PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(context));
     PyList_SET_ITEM(top, 2, PyList_New(0));
@@ -1094,7 +1093,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         self->head++;
     }
 
-    Py_ssize_t current = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);       // FIXME
+    Py_ssize_t current = log2(Tokenizer_CONTEXT_VAL(self) / LC_HEADING_LEVEL_1) + 1;
     Py_ssize_t level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
 
     if (setjmp(exception_env) == BAD_ROUTE) {
@@ -1387,7 +1386,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
     Build a list of tokens from a string of wikicode and return it.
 */
 static PyObject*
-Tokenizer_tokenize(Tokenizer* self, PyObject *args)
+Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
     PyObject* text;
 
@@ -1439,10 +1438,24 @@ init_tokenizer(void)
     NOARGS = PyTuple_New(0);
     NOKWARGS = PyDict_New();
 
+    char* name = "mwparserfromhell.parser";
     PyObject* globals = PyEval_GetGlobals();
     PyObject* locals = PyEval_GetLocals();
-    PyObject* fromlist = PyList_New(0);
+    PyObject* fromlist = PyList_New(1);
+    if (!fromlist) return;
+    PyObject* submodname = PyBytes_FromString("tokens");
+    if (!submodname) {
+        Py_DECREF(fromlist);
+        return;
+    }
+    PyList_SET_ITEM(fromlist, 0, submodname);
 
-    tokens = PyImport_ImportModuleLevel("tokens", globals, locals, fromlist, 1);
+    PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
     Py_DECREF(fromlist);
+    if (!tokmodule) {
+        return;
+    }
+
+    tokens = PyObject_GetAttrString(tokmodule, "tokens");
+    Py_DECREF(tokmodule);
 }
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 3a87a37..7ba9c40 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -26,6 +26,7 @@ SOFTWARE.
 #endif
 
 #include <Python.h>
+#include <math.h>
 #include <setjmp.h>
 #include <structmember.h>
 
@@ -108,43 +109,43 @@ typedef struct {
 
 /* Tokenizer function prototypes: */
 
-static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds);
-static void Tokenizer_dealloc(Tokenizer* self);
-static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds);
-static int Tokenizer_set_context(Tokenizer* self, Py_ssize_t value);
-static int Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value);
-static int Tokenizer_push(Tokenizer* self, Py_ssize_t context);
-static int Tokenizer_push_textbuffer(Tokenizer* self);
-static int Tokenizer_delete_top_of_stack(Tokenizer* self);
-static PyObject* Tokenizer_pop(Tokenizer* self);
-static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self);
-static void Tokenizer_fail_route(Tokenizer* self);
-static int Tokenizer_write(Tokenizer* self, PyObject* token);
-static int Tokenizer_write_first(Tokenizer* self, PyObject* token);
-static int Tokenizer_write_text(Tokenizer* self, PyObject* text);
-static int Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist);
-static int Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text);
-static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta);
-static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta);
-static int Tokenizer_parse_template_or_argument(Tokenizer* self);
-static int Tokenizer_parse_template(Tokenizer* self);
-static int Tokenizer_parse_argument(Tokenizer* self);
-static int Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]);
-static int Tokenizer_handle_template_param(Tokenizer* self);
-static int Tokenizer_handle_template_param_value(Tokenizer* self);
-static PyObject* Tokenizer_handle_template_end(Tokenizer* self);
-static int Tokenizer_handle_argument_separator(Tokenizer* self);
-static PyObject* Tokenizer_handle_argument_end(Tokenizer* self);
-static int Tokenizer_parse_wikilink(Tokenizer* self);
-static int Tokenizer_handle_wikilink_separator(Tokenizer* self);
-static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self);
-static int Tokenizer_parse_heading(Tokenizer* self);
-static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self);
-static int Tokenizer_really_parse_entity(Tokenizer* self);
-static int Tokenizer_parse_entity(Tokenizer* self);
-static int Tokenizer_parse_comment(Tokenizer* self);
-static PyObject* Tokenizer_parse(Tokenizer* self, Py_ssize_t context);
-static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject *args);
+static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
+static void Tokenizer_dealloc(Tokenizer*);
+static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
+static int Tokenizer_set_context(Tokenizer*, Py_ssize_t);
+static int Tokenizer_set_textbuffer(Tokenizer*, PyObject*);
+static int Tokenizer_push(Tokenizer*, Py_ssize_t);
+static int Tokenizer_push_textbuffer(Tokenizer*);
+static int Tokenizer_delete_top_of_stack(Tokenizer*);
+static PyObject* Tokenizer_pop(Tokenizer*);
+static PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
+static void Tokenizer_fail_route(Tokenizer*);
+static int Tokenizer_write(Tokenizer*, PyObject*);
+static int Tokenizer_write_first(Tokenizer*, PyObject*);
+static int Tokenizer_write_text(Tokenizer*, PyObject*);
+static int Tokenizer_write_all(Tokenizer*, PyObject*);
+static int Tokenizer_write_text_then_stack(Tokenizer*, PyObject*);
+static PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t);
+static PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t);
+static int Tokenizer_parse_template_or_argument(Tokenizer*);
+static int Tokenizer_parse_template(Tokenizer*);
+static int Tokenizer_parse_argument(Tokenizer*);
+static int Tokenizer_verify_safe(Tokenizer*, const char* []);
+static int Tokenizer_handle_template_param(Tokenizer*);
+static int Tokenizer_handle_template_param_value(Tokenizer*);
+static PyObject* Tokenizer_handle_template_end(Tokenizer*);
+static int Tokenizer_handle_argument_separator(Tokenizer*);
+static PyObject* Tokenizer_handle_argument_end(Tokenizer*);
+static int Tokenizer_parse_wikilink(Tokenizer*);
+static int Tokenizer_handle_wikilink_separator(Tokenizer*);
+static PyObject* Tokenizer_handle_wikilink_end(Tokenizer*);
+static int Tokenizer_parse_heading(Tokenizer*);
+static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
+static int Tokenizer_really_parse_entity(Tokenizer*);
+static int Tokenizer_parse_entity(Tokenizer*);
+static int Tokenizer_parse_comment(Tokenizer*);
+static PyObject* Tokenizer_parse(Tokenizer*, Py_ssize_t);
+static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 
 
 /* More structs for creating the Tokenizer type: */

From 0be18bc5b8db231a33e083f298ac170743af7fe9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 29 Sep 2012 12:40:59 -0400
Subject: [PATCH 022/180] Fix Py_UNICODE comparisons.

---
 mwparserfromhell/parser/tokenizer.c | 58 ++++++++++++++++++-------------------
 mwparserfromhell/parser/tokenizer.h | 10 +++----
 2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 7ba7472..9f7e37d 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -373,7 +373,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
     self->head += 2;
     unsigned int braces = 2, i;
 
-    while (Tokenizer_READ(self, 0) == PU "{") {
+    while (*Tokenizer_READ(self, 0) == *"{") {
         self->head++;
         braces++;
     }
@@ -965,7 +965,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     PyObject* text;
     int i;
 
-    while (Tokenizer_READ(self, 0) == PU "=") {
+    while (*Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
@@ -1088,7 +1088,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     PyObject* text;
     int i;
 
-    while (Tokenizer_READ(self, 0) == PU "=") {
+    while (*Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
@@ -1268,7 +1268,7 @@ static PyObject*
 Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 {
     PyObject *this;
-    Py_UNICODE *this_data, *next, *next_next, *last;
+    Py_UNICODE this_data, next, next_next, last;
     Py_ssize_t this_context;
     Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
     int is_marker, i;
@@ -1277,11 +1277,11 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 
     while (1) {
         this = Tokenizer_read(self, 0);
-        this_data = PyUnicode_AS_UNICODE(this);
+        this_data = *PyUnicode_AS_UNICODE(this);
 
         is_marker = 0;
         for (i = 0; i < NUM_MARKERS; i++) {
-            if (MARKERS[i] == this_data) {
+            if (*MARKERS[i] == this_data) {
                 is_marker = 1;
                 break;
             }
@@ -1295,45 +1295,45 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 
         this_context = Tokenizer_CONTEXT_VAL(self);
 
-        if (this == EMPTY) {
+        if (this_data == *"") {
             if (this_context & fail_contexts) {
                 Tokenizer_fail_route(self);
             }
             return Tokenizer_pop(self);
         }
 
-        next = Tokenizer_READ(self, 1);
+        next = *Tokenizer_READ(self, 1);
 
         if (this_context & LC_COMMENT) {
-            if (this_data == next && next == PU "-") {
-                if (Tokenizer_READ(self, 2) == PU ">") {
+            if (this_data == next && next == *"-") {
+                if (*Tokenizer_READ(self, 2) == *">") {
                     return Tokenizer_pop(self);
                 }
             }
             Tokenizer_write_text(self, this);
         }
-        else if (this_data == next && next == PU "{") {
+        else if (this_data == next && next == *"{") {
             Tokenizer_parse_template_or_argument(self);
         }
-        else if (this_data == PU "|" && this_context & LC_TEMPLATE) {
+        else if (this_data == *"|" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_param(self);
         }
-        else if (this_data == PU "=" && this_context & LC_TEMPLATE_PARAM_KEY) {
+        else if (this_data == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) {
             Tokenizer_handle_template_param_value(self);
         }
-        else if (this_data == next && next == PU "}" && this_context & LC_TEMPLATE) {
+        else if (this_data == next && next == *"}" && this_context & LC_TEMPLATE) {
             Tokenizer_handle_template_end(self);
         }
-        else if (this_data == PU "|" && this_context & LC_ARGUMENT_NAME) {
+        else if (this_data == *"|" && this_context & LC_ARGUMENT_NAME) {
             Tokenizer_handle_argument_separator(self);
         }
-        else if (this_data == next && next == PU "}" && this_context & LC_ARGUMENT) {
-            if (Tokenizer_READ(self, 2) == PU "}") {
+        else if (this_data == next && next == *"}" && this_context & LC_ARGUMENT) {
+            if (*Tokenizer_READ(self, 2) == *"}") {
                 return Tokenizer_handle_argument_end(self);
             }
             Tokenizer_write_text(self, this);
         }
-        else if (this_data == next && next == PU "[") {
+        else if (this_data == next && next == *"[") {
             if (!(this_context & LC_WIKILINK_TITLE)) {
                 Tokenizer_parse_wikilink(self);
             }
@@ -1341,33 +1341,33 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
                 Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == PU "|" && this_context & LC_WIKILINK_TITLE) {
+        else if (this_data == *"|" && this_context & LC_WIKILINK_TITLE) {
             Tokenizer_handle_wikilink_separator(self);
         }
-        else if (this_data == next && next == PU "]" && this_context & LC_WIKILINK) {
+        else if (this_data == next && next == *"]" && this_context & LC_WIKILINK) {
             return Tokenizer_handle_wikilink_end(self);
         }
-        else if (this_data == PU "=" && !(self->global & GL_HEADING)) {
-            last = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
-            if (last == PU "\n" || last == PU "") {
+        else if (this_data == *"=" && !(self->global & GL_HEADING)) {
+            last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
+            if (last == *"\n" || last == *"") {
                 Tokenizer_parse_heading(self);
             }
             else {
                 Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == PU "=" && this_context & LC_HEADING) {
+        else if (this_data == *"=" && this_context & LC_HEADING) {
             return (PyObject*) Tokenizer_handle_heading_end(self);
         }
-        else if (this_data == PU "\n" && this_context & LC_HEADING) {
+        else if (this_data == *"\n" && this_context & LC_HEADING) {
             Tokenizer_fail_route(self);
         }
-        else if (this_data == PU "&") {
+        else if (this_data == *"&") {
             Tokenizer_parse_entity(self);
         }
-        else if (this_data == PU "<" && next == PU "!") {
-            next_next = Tokenizer_READ(self, 2);
-            if (next_next == Tokenizer_READ(self, 3) && next_next == PU "-") {
+        else if (this_data == *"<" && next == *"!") {
+            next_next = *Tokenizer_READ(self, 2);
+            if (next_next == *Tokenizer_READ(self, 3) && next_next == *"-") {
                 Tokenizer_parse_comment(self);
             }
             else {
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 7ba9c40..d55e9d1 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -30,12 +30,10 @@ SOFTWARE.
 #include <setjmp.h>
 #include <structmember.h>
 
-#define PU (Py_UNICODE*)
-
-static const Py_UNICODE* MARKERS[] = {
-    PU "{", PU "}", PU "[", PU "]", PU "<", PU ">", PU "|", PU "=", PU "&",
-    PU "#", PU "*", PU ";", PU ":", PU "/", PU "-", PU "!", PU "\n", PU ""};
-static const int NUM_MARKERS = 17;
+static const char* MARKERS[] = {
+    "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
+    "!", "\n", ""};
+static const int NUM_MARKERS = 18;
 
 static jmp_buf exception_env;
 static const int BAD_ROUTE = 1;

From 9993f1ba76de47afd0f42fa88c8fd0ab219bd2e5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 29 Sep 2012 13:00:12 -0400
Subject: [PATCH 023/180] Fix object calls.

---
 mwparserfromhell/parser/tokenizer.c | 31 +++++++++++++++----------------
 mwparserfromhell/parser/tokenizer.h |  1 -
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 9f7e37d..1489b8a 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -124,7 +124,7 @@ Tokenizer_push_textbuffer(Tokenizer* self)
         PyDict_SetItemString(kwargs, "text", text);
         Py_DECREF(text);
 
-        PyObject* token = PyInstance_New(class, NOARGS, kwargs);
+        PyObject* token = PyObject_Call(class, NOARGS, kwargs);
         Py_DECREF(class);
         Py_DECREF(kwargs);
         if (!token) return -1;
@@ -472,7 +472,7 @@ Tokenizer_parse_template(Tokenizer* self)
             Py_DECREF(template);
             return -1;
         }
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) {
             Py_DECREF(template);
@@ -494,7 +494,7 @@ Tokenizer_parse_template(Tokenizer* self)
 
         class = PyObject_GetAttrString(tokens, "TemplateClose");
         if (!class) return -1;
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) return -1;
 
@@ -531,7 +531,7 @@ Tokenizer_parse_argument(Tokenizer* self)
             Py_DECREF(argument);
             return -1;
         }
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) {
             Py_DECREF(argument);
@@ -553,7 +553,7 @@ Tokenizer_parse_argument(Tokenizer* self)
 
         class = PyObject_GetAttrString(tokens, "ArgumentClose");
         if (!class) return -1;
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) return -1;
 
@@ -706,7 +706,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
 
     PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
     if (!class) return -1;
-    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    PyObject* token = PyObject_CallObject(class, NULL);
     Py_DECREF(class);
     if (!token) return -1;
 
@@ -754,7 +754,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
 
     PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamEquals");
     if (!class) return -1;
-    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    PyObject* token = PyObject_CallObject(class, NULL);
     Py_DECREF(class);
     if (!token) return -1;
 
@@ -813,7 +813,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 
     PyObject* class = PyObject_GetAttrString(tokens, "ArgumentSeparator");
     if (!class) return -1;
-    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    PyObject* token = PyObject_CallObject(class, NULL);
     Py_DECREF(class);
     if (!token) return -1;
 
@@ -870,7 +870,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
             Py_DECREF(wikilink);
             return -1;
         }
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) {
             Py_DECREF(wikilink);
@@ -892,7 +892,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
 
         class = PyObject_GetAttrString(tokens, "WikilinkClose");
         if (!class) return -1;
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) return -1;
 
@@ -923,7 +923,7 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
 
     PyObject* class = PyObject_GetAttrString(tokens, "WikilinkSeparator");
     if (!class) return -1;
-    PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS);
+    PyObject* token = PyObject_CallObject(class, NULL);
     Py_DECREF(class);
     if (!token) return -1;
 
@@ -1014,7 +1014,7 @@ Tokenizer_parse_heading(Tokenizer* self)
         PyDict_SetItemString(kwargs, "level", level);
         Py_DECREF(level);
 
-        PyObject* token = PyInstance_New(class, NOARGS, kwargs);
+        PyObject* token = PyObject_Call(class, NOARGS, kwargs);
         Py_DECREF(class);
         Py_DECREF(kwargs);
         if (!token) {
@@ -1061,7 +1061,7 @@ Tokenizer_parse_heading(Tokenizer* self)
 
         class = PyObject_GetAttrString(tokens, "HeadingEnd");
         if (!class) return -1;
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) return -1;
 
@@ -1225,7 +1225,7 @@ Tokenizer_parse_comment(Tokenizer* self)
             Py_DECREF(comment);
             return -1;
         }
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) {
             Py_DECREF(comment);
@@ -1247,7 +1247,7 @@ Tokenizer_parse_comment(Tokenizer* self)
 
         class = PyObject_GetAttrString(tokens, "CommentEnd");
         if (!class) return -1;
-        token = PyInstance_New(class, NOARGS, NOKWARGS);
+        token = PyObject_CallObject(class, NULL);
         Py_DECREF(class);
         if (!token) return -1;
 
@@ -1436,7 +1436,6 @@ init_tokenizer(void)
 
     EMPTY = PyUnicode_FromString("");
     NOARGS = PyTuple_New(0);
-    NOKWARGS = PyDict_New();
 
     char* name = "mwparserfromhell.parser";
     PyObject* globals = PyEval_GetGlobals();
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index d55e9d1..776f38a 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -40,7 +40,6 @@ static const int BAD_ROUTE = 1;
 
 static PyObject* EMPTY;
 static PyObject* NOARGS;
-static PyObject* NOKWARGS;
 static PyObject* tokens;
 
 

From 8e4819ea370fa6a13054fb7ce0562f900184b6fe Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 5 Oct 2012 18:57:02 -0400
Subject: [PATCH 024/180] Fix exception implementation; some other cleanup and
 fixes.

---
 mwparserfromhell/parser/tokenizer.c | 515 ++++++++++++++++++------------------
 mwparserfromhell/parser/tokenizer.h |   7 +-
 2 files changed, 260 insertions(+), 262 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 1489b8a..d604219 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -206,15 +206,15 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
 
 /*
     Fail the current tokenization route. Discards the current
-    stack/context/textbuffer and "raises a BAD_ROUTE exception", which is
-    implemented using longjmp().
+    stack/context/textbuffer and raises a BadRoute exception.
 */
-static void
+static void*
 Tokenizer_fail_route(Tokenizer* self)
 {
     PyObject* stack = Tokenizer_pop(self);
     Py_XDECREF(stack);
-    longjmp(exception_env, BAD_ROUTE);
+    PyErr_SetNone(BadRoute);
+    return NULL;
 }
 
 /*
@@ -393,9 +393,11 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
         }
 
         if (braces == 2) {
-            if (setjmp(exception_env) == BAD_ROUTE) {
-                PyObject* text = PyUnicode_FromString("{{");
+            if (Tokenizer_parse_template(self)) return -1;
 
+            if (PyErr_Occurred()) {
+                PyErr_Clear();
+                PyObject* text = PyUnicode_FromString("{{");
                 if (Tokenizer_write_text_then_stack(self, text)) {
                     Py_XDECREF(text);
                     return -1;
@@ -403,14 +405,20 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
 
                 Py_XDECREF(text);
                 return 0;
-            } else {
-                Tokenizer_parse_template(self);
             }
             break;
         }
 
-        if (setjmp(exception_env) == BAD_ROUTE) {
-            if (setjmp(exception_env) == BAD_ROUTE) {
+        if (Tokenizer_parse_argument(self)) return -1;
+        braces -= 3;
+
+        if (PyErr_Occurred()) {
+            PyErr_Clear();
+            if (Tokenizer_parse_template(self)) return -1;
+            braces -= 2;
+
+            if (PyErr_Occurred()) {
+                PyErr_Clear();
                 char bracestr[braces];
                 for (i = 0; i < braces; i++) bracestr[i] = *"{";
                 PyObject* text = PyUnicode_FromString(bracestr);
@@ -423,14 +431,6 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
                 Py_XDECREF(text);
                 return 0;
             }
-            else {
-                Tokenizer_parse_template(self);
-                braces -= 2;
-            }
-        }
-        else {
-            Tokenizer_parse_argument(self);
-            braces -= 3;
         }
 
         if (braces) {
@@ -458,52 +458,49 @@ Tokenizer_parse_template(Tokenizer* self)
     PyObject *template, *class, *token;
     Py_ssize_t reset = self->head;
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
+    if (PyErr_Occurred()) {
         self->head = reset;
-        longjmp(exception_env, BAD_ROUTE);
+        return 0;
     }
+    if (!template) return -1;
 
-    else {
-        template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
-        if (!template) return -1;
-
-        class = PyObject_GetAttrString(tokens, "TemplateOpen");
-        if (!class) {
-            Py_DECREF(template);
-            return -1;
-        }
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) {
-            Py_DECREF(template);
-            return -1;
-        }
+    class = PyObject_GetAttrString(tokens, "TemplateOpen");
+    if (!class) {
+        Py_DECREF(template);
+        return -1;
+    }
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) {
+        Py_DECREF(template);
+        return -1;
+    }
 
-        if (Tokenizer_write_first(self, token)) {
-            Py_DECREF(token);
-            Py_DECREF(template);
-            return -1;
-        }
+    if (Tokenizer_write_first(self, token)) {
         Py_DECREF(token);
+        Py_DECREF(template);
+        return -1;
+    }
+    Py_DECREF(token);
 
-        if (Tokenizer_write_all(self, template)) {
-            Py_DECREF(template);
-            return -1;
-        }
+    if (Tokenizer_write_all(self, template)) {
         Py_DECREF(template);
+        return -1;
+    }
+    Py_DECREF(template);
 
-        class = PyObject_GetAttrString(tokens, "TemplateClose");
-        if (!class) return -1;
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) return -1;
+    class = PyObject_GetAttrString(tokens, "TemplateClose");
+    if (!class) return -1;
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) return -1;
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
+        return -1;
     }
+    Py_DECREF(token);
 
     return 0;
 }
@@ -517,52 +514,49 @@ Tokenizer_parse_argument(Tokenizer* self)
     PyObject *argument, *class, *token;
     Py_ssize_t reset = self->head;
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    argument = Tokenizer_parse(self, LC_ARGUMENT_NAME);
+    if (PyErr_Occurred()) {
         self->head = reset;
-        longjmp(exception_env, BAD_ROUTE);
+        return 0;
     }
+    if (!argument) return -1;
 
-    else {
-        argument = Tokenizer_parse(self, LC_ARGUMENT_NAME);
-        if (!argument) return -1;
-
-        class = PyObject_GetAttrString(tokens, "ArgumentOpen");
-        if (!class) {
-            Py_DECREF(argument);
-            return -1;
-        }
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) {
-            Py_DECREF(argument);
-            return -1;
-        }
+    class = PyObject_GetAttrString(tokens, "ArgumentOpen");
+    if (!class) {
+        Py_DECREF(argument);
+        return -1;
+    }
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) {
+        Py_DECREF(argument);
+        return -1;
+    }
 
-        if (Tokenizer_write_first(self, token)) {
-            Py_DECREF(token);
-            Py_DECREF(argument);
-            return -1;
-        }
+    if (Tokenizer_write_first(self, token)) {
         Py_DECREF(token);
+        Py_DECREF(argument);
+        return -1;
+    }
+    Py_DECREF(token);
 
-        if (Tokenizer_write_all(self, argument)) {
-            Py_DECREF(argument);
-            return -1;
-        }
+    if (Tokenizer_write_all(self, argument)) {
         Py_DECREF(argument);
+        return -1;
+    }
+    Py_DECREF(argument);
 
-        class = PyObject_GetAttrString(tokens, "ArgumentClose");
-        if (!class) return -1;
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) return -1;
+    class = PyObject_GetAttrString(tokens, "ArgumentClose");
+    if (!class) return -1;
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) return -1;
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
+        return -1;
     }
+    Py_DECREF(token);
 
     return 0;
 }
@@ -679,7 +673,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
     Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
 
     if (context & LC_TEMPLATE_NAME) {
-        const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+        const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return -1;
         if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME))
@@ -726,16 +720,14 @@ Tokenizer_handle_template_param(Tokenizer* self)
 static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    const char* unsafes[] = {"\n", "{{", "}}", NULL};
+    if (Tokenizer_verify_safe(self, unsafes))
+        return -1;
+
+    if (PyErr_Occurred()) {
         PyObject* stack = Tokenizer_pop(self);
         Py_XDECREF(stack);
-        longjmp(exception_env, BAD_ROUTE);
-    }
-
-    else {
-        const char* unsafes[] = {"\n", "{{", "}}"};
-        if (Tokenizer_verify_safe(self, unsafes))
-            return -1;
+        return 0;
     }
 
     PyObject* stack = Tokenizer_pop_keeping_context(self);
@@ -776,7 +768,7 @@ Tokenizer_handle_template_end(Tokenizer* self)
     Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
 
     if (context & LC_TEMPLATE_NAME) {
-        const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+        const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
     }
@@ -801,7 +793,7 @@ Tokenizer_handle_template_end(Tokenizer* self)
 static int
 Tokenizer_handle_argument_separator(Tokenizer* self)
 {
-    const char* unsafes[] = {"\n", "{{", "}}"};
+    const char* unsafes[] = {"\n", "{{", "}}", NULL};
     if (Tokenizer_verify_safe(self, unsafes))
         return -1;
 
@@ -832,7 +824,7 @@ static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
     if (Tokenizer_CONTEXT_VAL(self) & LC_ARGUMENT_NAME) {
-        const char* unsafes[] = {"\n", "{{", "}}"};
+        const char* unsafes[] = {"\n", "{{", "}}", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
     }
@@ -851,7 +843,12 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     self->head += 2;
     Py_ssize_t reset = self->head - 1;
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    PyObject *class, *token;
+    PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+    if (!wikilink) return -1;
+
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
         self->head = reset;
         PyObject* text = PyUnicode_FromString("[[");
         if (!text) return -1;
@@ -859,49 +856,45 @@ Tokenizer_parse_wikilink(Tokenizer* self)
             Py_XDECREF(text);
             return -1;
         }
+        return 0;
     }
-    else {
-        PyObject *class, *token;
-        PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
-        if (!wikilink) return -1;
 
-        class = PyObject_GetAttrString(tokens, "WikilinkOpen");
-        if (!class) {
-            Py_DECREF(wikilink);
-            return -1;
-        }
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) {
-            Py_DECREF(wikilink);
-            return -1;
-        }
+    class = PyObject_GetAttrString(tokens, "WikilinkOpen");
+    if (!class) {
+        Py_DECREF(wikilink);
+        return -1;
+    }
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) {
+        Py_DECREF(wikilink);
+        return -1;
+    }
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            Py_DECREF(wikilink);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
+        Py_DECREF(wikilink);
+        return -1;
+    }
+    Py_DECREF(token);
 
-        if (Tokenizer_write_all(self, wikilink)) {
-            Py_DECREF(wikilink);
-            return -1;
-        }
+    if (Tokenizer_write_all(self, wikilink)) {
         Py_DECREF(wikilink);
+        return -1;
+    }
+    Py_DECREF(wikilink);
 
-        class = PyObject_GetAttrString(tokens, "WikilinkClose");
-        if (!class) return -1;
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) return -1;
+    class = PyObject_GetAttrString(tokens, "WikilinkClose");
+    if (!class) return -1;
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) return -1;
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
+        return -1;
     }
+    Py_DECREF(token);
     return 0;
 }
 
@@ -911,7 +904,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
 static int
 Tokenizer_handle_wikilink_separator(Tokenizer* self)
 {
-    const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+    const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
     if (Tokenizer_verify_safe(self, unsafes))
         return -1;
 
@@ -942,7 +935,7 @@ static PyObject*
 Tokenizer_handle_wikilink_end(Tokenizer* self)
 {
     if (Tokenizer_CONTEXT_VAL(self) & LC_WIKILINK_TITLE) {
-        const char* unsafes[] = {"\n", "{", "}", "[", "]"};
+        const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
     }
@@ -971,11 +964,13 @@ Tokenizer_parse_heading(Tokenizer* self)
     }
 
     Py_ssize_t context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
+    HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
         self->head = reset + best - 1;
         char blocks[best];
-        for (i = 0; i < best; i++) blocks[i] = *"{";
+        for (i = 0; i < best; i++) blocks[i] = *"=";
         text = PyUnicode_FromString(blocks);
         if (!text) return -1;
 
@@ -985,94 +980,92 @@ Tokenizer_parse_heading(Tokenizer* self)
         }
         Py_DECREF(text);
         self->global ^= GL_HEADING;
+        return 0;
     }
-    else {
-        HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
 
-        PyObject* level = PyInt_FromSsize_t(heading->level);
-        if (!level) {
-            Py_DECREF(heading->title);
-            free(heading);
-            return -1;
-        }
+    PyObject* level = PyInt_FromSsize_t(heading->level);
+    if (!level) {
+        Py_DECREF(heading->title);
+        free(heading);
+        return -1;
+    }
 
-        PyObject* class = PyObject_GetAttrString(tokens, "HeadingStart");
-        if (!class) {
-            Py_DECREF(level);
-            Py_DECREF(heading->title);
-            free(heading);
-            return -1;
-        }
-        PyObject* kwargs = PyDict_New();
-        if (!kwargs) {
-            Py_DECREF(class);
-            Py_DECREF(level);
-            Py_DECREF(heading->title);
-            free(heading);
-            return -1;
-        }
-        PyDict_SetItemString(kwargs, "level", level);
+    PyObject* class = PyObject_GetAttrString(tokens, "HeadingStart");
+    if (!class) {
         Py_DECREF(level);
-
-        PyObject* token = PyObject_Call(class, NOARGS, kwargs);
+        Py_DECREF(heading->title);
+        free(heading);
+        return -1;
+    }
+    PyObject* kwargs = PyDict_New();
+    if (!kwargs) {
         Py_DECREF(class);
-        Py_DECREF(kwargs);
-        if (!token) {
-            Py_DECREF(heading->title);
-            free(heading);
-            return -1;
-        }
+        Py_DECREF(level);
+        Py_DECREF(heading->title);
+        free(heading);
+        return -1;
+    }
+    PyDict_SetItemString(kwargs, "level", level);
+    Py_DECREF(level);
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
+    PyObject* token = PyObject_Call(class, NOARGS, kwargs);
+    Py_DECREF(class);
+    Py_DECREF(kwargs);
+    if (!token) {
+        Py_DECREF(heading->title);
+        free(heading);
+        return -1;
+    }
+
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        Py_DECREF(heading->title);
+        free(heading);
+        return -1;
+    }
+    Py_DECREF(token);
+
+    if (heading->level < best) {
+        Py_ssize_t diff = best - heading->level;
+        char diffblocks[diff];
+        for (i = 0; i < diff; i++) diffblocks[i] = *"=";
+        PyObject* text = PyUnicode_FromString(diffblocks);
+        if (!text) {
             Py_DECREF(heading->title);
             free(heading);
             return -1;
         }
-        Py_DECREF(token);
 
-        if (heading->level < best) {
-            Py_ssize_t diff = best - heading->level;
-            char diffblocks[diff];
-            for (i = 0; i < diff; i++) diffblocks[i] = *"=";
-            PyObject* text = PyUnicode_FromString(diffblocks);
-            if (!text) {
-                Py_DECREF(heading->title);
-                free(heading);
-                return -1;
-            }
-
-            if (Tokenizer_write_text_then_stack(self, text)) {
-                Py_DECREF(text);
-                Py_DECREF(heading->title);
-                free(heading);
-                return -1;
-            }
+        if (Tokenizer_write_text_then_stack(self, text)) {
             Py_DECREF(text);
-        }
-
-        if (Tokenizer_write_all(self, heading->title)) {
             Py_DECREF(heading->title);
             free(heading);
             return -1;
         }
+        Py_DECREF(text);
+    }
+
+    if (Tokenizer_write_all(self, heading->title)) {
         Py_DECREF(heading->title);
         free(heading);
+        return -1;
+    }
+    Py_DECREF(heading->title);
+    free(heading);
 
-        class = PyObject_GetAttrString(tokens, "HeadingEnd");
-        if (!class) return -1;
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) return -1;
+    class = PyObject_GetAttrString(tokens, "HeadingEnd");
+    if (!class) return -1;
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) return -1;
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
-
-        self->global ^= GL_HEADING;
+        return -1;
     }
+    Py_DECREF(token);
+
+    self->global ^= GL_HEADING;
     return 0;
 }
 
@@ -1096,7 +1089,11 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     Py_ssize_t current = log2(Tokenizer_CONTEXT_VAL(self) / LC_HEADING_LEVEL_1) + 1;
     Py_ssize_t level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
+    HeadingData* after = (HeadingData*) Tokenizer_parse(self, context);
+
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
         if (level < best) {
             Py_ssize_t diff = best - level;
             char diffblocks[diff];
@@ -1113,10 +1110,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
 
         self->head = reset + best - 1;
     }
-    else {
-        Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-        HeadingData* after = (HeadingData*) Tokenizer_parse(self, context);
 
+    else {
         char blocks[best];
         for (i = 0; i < best; i++) blocks[i] = *"=";
         text = PyUnicode_FromString(blocks);
@@ -1176,24 +1171,25 @@ Tokenizer_parse_entity(Tokenizer* self)
     if (Tokenizer_push(self, 0))
         return -1;
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    if (Tokenizer_really_parse_entity(self))
+            return -1;
+
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
         self->head = reset;
         if (Tokenizer_write_text(self, Tokenizer_read(self, 0)))
             return -1;
+        return 0;
     }
-    else {
-        if (Tokenizer_really_parse_entity(self))
-            return -1;
-
-        PyObject* tokenlist = Tokenizer_pop(self);
-        if (!tokenlist) return -1;
-        if (Tokenizer_write_all(self, tokenlist)) {
-            Py_DECREF(tokenlist);
-            return -1;
-        }
 
+    PyObject* tokenlist = Tokenizer_pop(self);
+    if (!tokenlist) return -1;
+    if (Tokenizer_write_all(self, tokenlist)) {
         Py_DECREF(tokenlist);
+        return -1;
     }
+
+    Py_DECREF(tokenlist);
     return 0;
 }
 
@@ -1206,7 +1202,12 @@ Tokenizer_parse_comment(Tokenizer* self)
     self->head += 4;
     Py_ssize_t reset = self->head - 1;
 
-    if (setjmp(exception_env) == BAD_ROUTE) {
+    PyObject *class, *token;
+    PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+    if (!comment) return -1;
+
+    if (PyErr_Occurred()) {
+        PyErr_Clear();
         self->head = reset;
         PyObject* text = PyUnicode_FromString("<!--");
         if (!text) return -1;
@@ -1214,50 +1215,46 @@ Tokenizer_parse_comment(Tokenizer* self)
             Py_XDECREF(text);
             return -1;
         }
+        return 0;
     }
-    else {
-        PyObject *class, *token;
-        PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
-        if (!comment) return -1;
 
-        class = PyObject_GetAttrString(tokens, "CommentStart");
-        if (!class) {
-            Py_DECREF(comment);
-            return -1;
-        }
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) {
-            Py_DECREF(comment);
-            return -1;
-        }
+    class = PyObject_GetAttrString(tokens, "CommentStart");
+    if (!class) {
+        Py_DECREF(comment);
+        return -1;
+    }
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) {
+        Py_DECREF(comment);
+        return -1;
+    }
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            Py_DECREF(comment);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
+        Py_DECREF(comment);
+        return -1;
+    }
+    Py_DECREF(token);
 
-        if (Tokenizer_write_all(self, comment)) {
-            Py_DECREF(comment);
-            return -1;
-        }
+    if (Tokenizer_write_all(self, comment)) {
         Py_DECREF(comment);
+        return -1;
+    }
+    Py_DECREF(comment);
 
-        class = PyObject_GetAttrString(tokens, "CommentEnd");
-        if (!class) return -1;
-        token = PyObject_CallObject(class, NULL);
-        Py_DECREF(class);
-        if (!token) return -1;
+    class = PyObject_GetAttrString(tokens, "CommentEnd");
+    if (!class) return -1;
+    token = PyObject_CallObject(class, NULL);
+    Py_DECREF(class);
+    if (!token) return -1;
 
-        if (Tokenizer_write(self, token)) {
-            Py_DECREF(token);
-            return -1;
-        }
+    if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
-        self->head += 2;
+        return -1;
     }
+    Py_DECREF(token);
+    self->head += 2;
     return 0;
 }
 
@@ -1297,7 +1294,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
 
         if (this_data == *"") {
             if (this_context & fail_contexts) {
-                Tokenizer_fail_route(self);
+                return Tokenizer_fail_route(self);
             }
             return Tokenizer_pop(self);
         }
@@ -1322,7 +1319,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             Tokenizer_handle_template_param_value(self);
         }
         else if (this_data == next && next == *"}" && this_context & LC_TEMPLATE) {
-            Tokenizer_handle_template_end(self);
+            return Tokenizer_handle_template_end(self);
         }
         else if (this_data == *"|" && this_context & LC_ARGUMENT_NAME) {
             Tokenizer_handle_argument_separator(self);
@@ -1360,7 +1357,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             return (PyObject*) Tokenizer_handle_heading_end(self);
         }
         else if (this_data == *"\n" && this_context & LC_HEADING) {
-            Tokenizer_fail_route(self);
+            return Tokenizer_fail_route(self);
         }
         else if (this_data == *"&") {
             Tokenizer_parse_entity(self);
@@ -1434,6 +1431,10 @@ init_tokenizer(void)
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
+    BadRoute = PyErr_NewException("_tokenizer.BadRoute", NULL, NULL);
+    Py_INCREF(BadRoute);
+    PyModule_AddObject(module, "BadRoute", BadRoute);
+
     EMPTY = PyUnicode_FromString("");
     NOARGS = PyTuple_New(0);
 
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 776f38a..9dc0670 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -27,7 +27,6 @@ SOFTWARE.
 
 #include <Python.h>
 #include <math.h>
-#include <setjmp.h>
 #include <structmember.h>
 
 static const char* MARKERS[] = {
@@ -35,11 +34,9 @@ static const char* MARKERS[] = {
     "!", "\n", ""};
 static const int NUM_MARKERS = 18;
 
-static jmp_buf exception_env;
-static const int BAD_ROUTE = 1;
-
 static PyObject* EMPTY;
 static PyObject* NOARGS;
+static PyObject* BadRoute;
 static PyObject* tokens;
 
 
@@ -116,7 +113,7 @@ static int Tokenizer_push_textbuffer(Tokenizer*);
 static int Tokenizer_delete_top_of_stack(Tokenizer*);
 static PyObject* Tokenizer_pop(Tokenizer*);
 static PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
-static void Tokenizer_fail_route(Tokenizer*);
+static void* Tokenizer_fail_route(Tokenizer*);
 static int Tokenizer_write(Tokenizer*, PyObject*);
 static int Tokenizer_write_first(Tokenizer*, PyObject*);
 static int Tokenizer_write_text(Tokenizer*, PyObject*);

From 28a25d9a52159248177b63053f43c85303525ce9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 14:23:17 -0400
Subject: [PATCH 025/180] Bugfix when text ends within param key context.

---
 mwparserfromhell/parser/tokenizer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index ca645b0..085a4f6 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -435,6 +435,8 @@ class Tokenizer(object):
             if this is self.END:
                 fail = (contexts.TEMPLATE | contexts.ARGUMENT |
                         contexts.HEADING | contexts.COMMENT)
+                if self._context & contexts.TEMPLATE_PARAM_KEY:
+                    self._pop()
                 if self._context & fail:
                     self._fail_route()
                 return self._pop()

From ff0b4439f8d0b16e4f3a9e3241ef5164b5a17df2 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 14:29:22 -0400
Subject: [PATCH 026/180] Replace Python exception machinery with a global and
 some macros.

---
 mwparserfromhell/parser/tokenizer.c  | 66 ++++++++++++++++++++----------------
 mwparserfromhell/parser/tokenizer.h  |  6 +++-
 mwparserfromhell/parser/tokenizer.py |  2 ++
 3 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d604219..73033e4 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -213,7 +213,7 @@ Tokenizer_fail_route(Tokenizer* self)
 {
     PyObject* stack = Tokenizer_pop(self);
     Py_XDECREF(stack);
-    PyErr_SetNone(BadRoute);
+    FAIL_ROUTE();
     return NULL;
 }
 
@@ -382,7 +382,6 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
     while (braces) {
         if (braces == 1) {
             PyObject* text = PyUnicode_FromString("{");
-
             if (Tokenizer_write_text_then_stack(self, text)) {
                 Py_XDECREF(text);
                 return -1;
@@ -393,10 +392,11 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
         }
 
         if (braces == 2) {
-            if (Tokenizer_parse_template(self)) return -1;
+            if (Tokenizer_parse_template(self))
+                return -1;
 
-            if (PyErr_Occurred()) {
-                PyErr_Clear();
+            if (BAD_ROUTE) {
+                RESET_ROUTE();
                 PyObject* text = PyUnicode_FromString("{{");
                 if (Tokenizer_write_text_then_stack(self, text)) {
                     Py_XDECREF(text);
@@ -409,16 +409,16 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
             break;
         }
 
-        if (Tokenizer_parse_argument(self)) return -1;
-        braces -= 3;
+        if (Tokenizer_parse_argument(self))
+            return -1;
 
-        if (PyErr_Occurred()) {
-            PyErr_Clear();
-            if (Tokenizer_parse_template(self)) return -1;
-            braces -= 2;
+        if (BAD_ROUTE) {
+            RESET_ROUTE();
+            if (Tokenizer_parse_template(self))
+                return -1;
 
-            if (PyErr_Occurred()) {
-                PyErr_Clear();
+            if (BAD_ROUTE) {
+                RESET_ROUTE();
                 char bracestr[braces];
                 for (i = 0; i < braces; i++) bracestr[i] = *"{";
                 PyObject* text = PyUnicode_FromString(bracestr);
@@ -431,6 +431,12 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
                 Py_XDECREF(text);
                 return 0;
             }
+            else {
+                braces -= 2;
+            }
+        }
+        else {
+            braces -= 3;
         }
 
         if (braces) {
@@ -459,7 +465,7 @@ Tokenizer_parse_template(Tokenizer* self)
     Py_ssize_t reset = self->head;
 
     template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
-    if (PyErr_Occurred()) {
+    if (BAD_ROUTE) {
         self->head = reset;
         return 0;
     }
@@ -515,7 +521,7 @@ Tokenizer_parse_argument(Tokenizer* self)
     Py_ssize_t reset = self->head;
 
     argument = Tokenizer_parse(self, LC_ARGUMENT_NAME);
-    if (PyErr_Occurred()) {
+    if (BAD_ROUTE) {
         self->head = reset;
         return 0;
     }
@@ -724,7 +730,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
     if (Tokenizer_verify_safe(self, unsafes))
         return -1;
 
-    if (PyErr_Occurred()) {
+    if (BAD_ROUTE) {
         PyObject* stack = Tokenizer_pop(self);
         Py_XDECREF(stack);
         return 0;
@@ -847,8 +853,8 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
     if (!wikilink) return -1;
 
-    if (PyErr_Occurred()) {
-        PyErr_Clear();
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
         self->head = reset;
         PyObject* text = PyUnicode_FromString("[[");
         if (!text) return -1;
@@ -966,8 +972,8 @@ Tokenizer_parse_heading(Tokenizer* self)
     Py_ssize_t context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
     HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
 
-    if (PyErr_Occurred()) {
-        PyErr_Clear();
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
         self->head = reset + best - 1;
         char blocks[best];
         for (i = 0; i < best; i++) blocks[i] = *"=";
@@ -1092,8 +1098,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
     HeadingData* after = (HeadingData*) Tokenizer_parse(self, context);
 
-    if (PyErr_Occurred()) {
-        PyErr_Clear();
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
         if (level < best) {
             Py_ssize_t diff = best - level;
             char diffblocks[diff];
@@ -1174,8 +1180,8 @@ Tokenizer_parse_entity(Tokenizer* self)
     if (Tokenizer_really_parse_entity(self))
             return -1;
 
-    if (PyErr_Occurred()) {
-        PyErr_Clear();
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
         self->head = reset;
         if (Tokenizer_write_text(self, Tokenizer_read(self, 0)))
             return -1;
@@ -1206,8 +1212,8 @@ Tokenizer_parse_comment(Tokenizer* self)
     PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
     if (!comment) return -1;
 
-    if (PyErr_Occurred()) {
-        PyErr_Clear();
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
         self->head = reset;
         PyObject* text = PyUnicode_FromString("<!--");
         if (!text) return -1;
@@ -1293,6 +1299,10 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         this_context = Tokenizer_CONTEXT_VAL(self);
 
         if (this_data == *"") {
+            if (this_context & LC_TEMPLATE_PARAM_KEY) {
+                PyObject* trash = Tokenizer_pop(self);
+                Py_XDECREF(trash);
+            }
             if (this_context & fail_contexts) {
                 return Tokenizer_fail_route(self);
             }
@@ -1431,10 +1441,6 @@ init_tokenizer(void)
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
-    BadRoute = PyErr_NewException("_tokenizer.BadRoute", NULL, NULL);
-    Py_INCREF(BadRoute);
-    PyModule_AddObject(module, "BadRoute", BadRoute);
-
     EMPTY = PyUnicode_FromString("");
     NOARGS = PyTuple_New(0);
 
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 9dc0670..91da10e 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -34,9 +34,13 @@ static const char* MARKERS[] = {
     "!", "\n", ""};
 static const int NUM_MARKERS = 18;
 
+static int route_state = 0;
+#define BAD_ROUTE     (route_state)
+#define FAIL_ROUTE()  (route_state = 1)
+#define RESET_ROUTE() (route_state = 0)
+
 static PyObject* EMPTY;
 static PyObject* NOARGS;
-static PyObject* BadRoute;
 static PyObject* tokens;
 
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 508344e..9cd6290 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -432,6 +432,8 @@ class Tokenizer(object):
             if this is self.END:
                 fail = (contexts.TEMPLATE | contexts.ARGUMENT |
                         contexts.HEADING | contexts.COMMENT)
+                if self._context & contexts.TEMPLATE_PARAM_KEY:
+                    self._pop()
                 if self._context & fail:
                     self._fail_route()
                 return self._pop()

From b447e7bfc16322fc296d45be1a7f5e6769a53498 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 14:32:28 -0400
Subject: [PATCH 027/180] contexts.WIKILINK should also fail

---
 mwparserfromhell/parser/tokenizer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 085a4f6..e6e9fbe 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -434,7 +434,8 @@ class Tokenizer(object):
                 continue
             if this is self.END:
                 fail = (contexts.TEMPLATE | contexts.ARGUMENT |
-                        contexts.HEADING | contexts.COMMENT)
+                        contexts.WIKILINK | contexts.HEADING |
+                        contexts.COMMENT)
                 if self._context & contexts.TEMPLATE_PARAM_KEY:
                     self._pop()
                 if self._context & fail:

From 29ef2a66823fba39f1af8542eebcb397d667fa82 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 14:34:51 -0400
Subject: [PATCH 028/180] Propogate fix from
 b447e7bfc16322fc296d45be1a7f5e6769a53498

---
 mwparserfromhell/parser/tokenizer.c  | 3 ++-
 mwparserfromhell/parser/tokenizer.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 73033e4..5117e9c 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1273,7 +1273,8 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
     PyObject *this;
     Py_UNICODE this_data, next, next_next, last;
     Py_ssize_t this_context;
-    Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT;
+    Py_ssize_t fail_contexts = (
+        LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT);
     int is_marker, i;
 
     Tokenizer_push(self, context);
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 9cd6290..5b0e976 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -431,7 +431,8 @@ class Tokenizer(object):
                 continue
             if this is self.END:
                 fail = (contexts.TEMPLATE | contexts.ARGUMENT |
-                        contexts.HEADING | contexts.COMMENT)
+                        contexts.WIKILINK | contexts.HEADING |
+                        contexts.COMMENT)
                 if self._context & contexts.TEMPLATE_PARAM_KEY:
                     self._pop()
                 if self._context & fail:

From 467aef3651a7854329e44111f3ce405232dabc58 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 14:44:38 -0400
Subject: [PATCH 029/180] FromStringAndSize instead of FromString.

---
 mwparserfromhell/parser/tokenizer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 5117e9c..a8da2c5 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -421,7 +421,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
                 RESET_ROUTE();
                 char bracestr[braces];
                 for (i = 0; i < braces; i++) bracestr[i] = *"{";
-                PyObject* text = PyUnicode_FromString(bracestr);
+                PyObject* text = PyUnicode_FromStringAndSize(bracestr, braces);
 
                 if (Tokenizer_write_text_then_stack(self, text)) {
                     Py_XDECREF(text);
@@ -977,7 +977,7 @@ Tokenizer_parse_heading(Tokenizer* self)
         self->head = reset + best - 1;
         char blocks[best];
         for (i = 0; i < best; i++) blocks[i] = *"=";
-        text = PyUnicode_FromString(blocks);
+        text = PyUnicode_FromStringAndSize(blocks, best);
         if (!text) return -1;
 
         if (Tokenizer_write_text_then_stack(self, text)) {
@@ -1035,7 +1035,7 @@ Tokenizer_parse_heading(Tokenizer* self)
         Py_ssize_t diff = best - heading->level;
         char diffblocks[diff];
         for (i = 0; i < diff; i++) diffblocks[i] = *"=";
-        PyObject* text = PyUnicode_FromString(diffblocks);
+        PyObject* text = PyUnicode_FromStringAndSize(diffblocks, diff);
         if (!text) {
             Py_DECREF(heading->title);
             free(heading);
@@ -1104,7 +1104,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
             Py_ssize_t diff = best - level;
             char diffblocks[diff];
             for (i = 0; i < diff; i++) diffblocks[i] = *"=";
-            text = PyUnicode_FromString(diffblocks);
+            text = PyUnicode_FromStringAndSize(diffblocks, diff);
             if (!text) return NULL;
 
             if (Tokenizer_write_text_then_stack(self, text)) {
@@ -1120,7 +1120,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     else {
         char blocks[best];
         for (i = 0; i < best; i++) blocks[i] = *"=";
-        text = PyUnicode_FromString(blocks);
+        text = PyUnicode_FromStringAndSize(blocks, best);
         if (!text) {
             Py_DECREF(after->title);
             free(after);

From 230c89711b10477752134bc91f43cb948b943a44 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 20:47:02 -0400
Subject: [PATCH 030/180] Fix verify_save and propogate errors correctly in the
 parse func.

---
 mwparserfromhell/parser/tokenizer.c | 44 +++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index a8da2c5..2808157 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -654,10 +654,7 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
                 case 0:
                     break;
                 case 1:
-                    Py_DECREF(stripped);
-                    Py_DECREF(unsafe);
                     Tokenizer_fail_route(self);
-                    break;
                 case -1:
                     Py_DECREF(stripped);
                     Py_DECREF(unsafe);
@@ -682,6 +679,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
         const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return -1;
+        if (BAD_ROUTE) return -1;
         if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME))
             return -1;
     }
@@ -727,13 +725,12 @@ static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
     const char* unsafes[] = {"\n", "{{", "}}", NULL};
-    if (Tokenizer_verify_safe(self, unsafes))
+    if (Tokenizer_verify_safe(self, unsafes)) {
+        if (BAD_ROUTE) {
+            PyObject* stack = Tokenizer_pop(self);
+            Py_XDECREF(stack);
+        }
         return -1;
-
-    if (BAD_ROUTE) {
-        PyObject* stack = Tokenizer_pop(self);
-        Py_XDECREF(stack);
-        return 0;
     }
 
     PyObject* stack = Tokenizer_pop_keeping_context(self);
@@ -1164,7 +1161,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
 static int
 Tokenizer_really_parse_entity(Tokenizer* self)
 {
-
+    return 0;
 }
 
 /*
@@ -1321,19 +1318,23 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             Tokenizer_write_text(self, this);
         }
         else if (this_data == next && next == *"{") {
-            Tokenizer_parse_template_or_argument(self);
+            if (Tokenizer_parse_template_or_argument(self))
+                return NULL;
         }
         else if (this_data == *"|" && this_context & LC_TEMPLATE) {
-            Tokenizer_handle_template_param(self);
+            if (Tokenizer_handle_template_param(self))
+                return NULL;
         }
         else if (this_data == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) {
-            Tokenizer_handle_template_param_value(self);
+            if (Tokenizer_handle_template_param_value(self))
+                return NULL;
         }
         else if (this_data == next && next == *"}" && this_context & LC_TEMPLATE) {
             return Tokenizer_handle_template_end(self);
         }
         else if (this_data == *"|" && this_context & LC_ARGUMENT_NAME) {
-            Tokenizer_handle_argument_separator(self);
+            if (Tokenizer_handle_argument_separator(self))
+                return NULL;
         }
         else if (this_data == next && next == *"}" && this_context & LC_ARGUMENT) {
             if (*Tokenizer_READ(self, 2) == *"}") {
@@ -1343,14 +1344,16 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         }
         else if (this_data == next && next == *"[") {
             if (!(this_context & LC_WIKILINK_TITLE)) {
-                Tokenizer_parse_wikilink(self);
+                if (Tokenizer_parse_wikilink(self))
+                    return NULL;
             }
             else {
                 Tokenizer_write_text(self, this);
             }
         }
         else if (this_data == *"|" && this_context & LC_WIKILINK_TITLE) {
-            Tokenizer_handle_wikilink_separator(self);
+            if (Tokenizer_handle_wikilink_separator(self))
+                return NULL;
         }
         else if (this_data == next && next == *"]" && this_context & LC_WIKILINK) {
             return Tokenizer_handle_wikilink_end(self);
@@ -1358,7 +1361,8 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
         else if (this_data == *"=" && !(self->global & GL_HEADING)) {
             last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
             if (last == *"\n" || last == *"") {
-                Tokenizer_parse_heading(self);
+                if (Tokenizer_parse_heading(self))
+                    return NULL;
             }
             else {
                 Tokenizer_write_text(self, this);
@@ -1371,12 +1375,14 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             return Tokenizer_fail_route(self);
         }
         else if (this_data == *"&") {
-            Tokenizer_parse_entity(self);
+            if (Tokenizer_parse_entity(self))
+                return NULL;
         }
         else if (this_data == *"<" && next == *"!") {
             next_next = *Tokenizer_READ(self, 2);
             if (next_next == *Tokenizer_READ(self, 3) && next_next == *"-") {
-                Tokenizer_parse_comment(self);
+                if (Tokenizer_parse_comment(self))
+                    return NULL;
             }
             else {
                 Tokenizer_write_text(self, this);

From 1bfd364a4a46537d37f57182b972393f8c84457e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 21:24:00 -0400
Subject: [PATCH 031/180] Process contexts in Tokenizer_handle_template_param()
 correctly.

---
 mwparserfromhell/parser/tokenizer.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 2808157..4ac86d3 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -680,11 +680,13 @@ Tokenizer_handle_template_param(Tokenizer* self)
         if (Tokenizer_verify_safe(self, unsafes))
             return -1;
         if (BAD_ROUTE) return -1;
-        if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME))
+        context ^= LC_TEMPLATE_NAME;
+        if (Tokenizer_set_context(self, context))
             return -1;
     }
     else if (context & LC_TEMPLATE_PARAM_VALUE) {
-        if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_PARAM_VALUE))
+        context ^= LC_TEMPLATE_PARAM_VALUE;
+        if (Tokenizer_set_context(self, context))
             return -1;
     }
 
@@ -698,7 +700,8 @@ Tokenizer_handle_template_param(Tokenizer* self)
         Py_DECREF(stack);
     }
     else {
-        if (Tokenizer_set_context(self, context | LC_TEMPLATE_PARAM_KEY))
+        context |= LC_TEMPLATE_PARAM_KEY;
+        if (Tokenizer_set_context(self, context))
             return -1;
     }
 
@@ -714,7 +717,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
     }
     Py_DECREF(token);
 
-    Tokenizer_push(self, Tokenizer_CONTEXT_VAL(self));
+    Tokenizer_push(self, context);
     return 0;
 }
 

From d2868731e85486fe5d2b900c95bf7812793d452f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Oct 2012 22:56:23 -0400
Subject: [PATCH 032/180] Update some abstract protocols.

---
 mwparserfromhell/parser/tokenizer.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 4ac86d3..6550aad 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -106,7 +106,7 @@ Tokenizer_push(Tokenizer* self, Py_ssize_t context)
 static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
-    if (PySequence_Fast_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
+    if (PyList_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
         PyObject* text = PyUnicode_Join(EMPTY, Tokenizer_TEXTBUFFER(self));
         if (!text) return -1;
 
@@ -149,9 +149,9 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
         return -1;
     Py_DECREF(self->topstack);
 
-    Py_ssize_t size = PySequence_Fast_GET_SIZE(self->stacks);
+    Py_ssize_t size = PyList_GET_SIZE(self->stacks);
     if (size > 0) {
-        PyObject* top = PySequence_Fast_GET_ITEM(self->stacks, size - 1);
+        PyObject* top = PyList_GET_ITEM(self->stacks, size - 1);
         self->topstack = top;
         Py_INCREF(top);
     }
@@ -265,8 +265,8 @@ Tokenizer_write_text(Tokenizer* self, PyObject* text)
 static int
 Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 {
-    if (PySequence_Fast_GET_SIZE(tokenlist) > 0) {
-        PyObject* token = PySequence_Fast_GET_ITEM(tokenlist, 0);
+    if (PyList_GET_SIZE(tokenlist) > 0) {
+        PyObject* token = PyList_GET_ITEM(tokenlist, 0);
         PyObject* class = PyObject_GetAttrString(tokens, "Text");
         if (!class) return -1;
 
@@ -303,7 +303,7 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
         return -1;
 
     PyObject* stack = Tokenizer_STACK(self);
-    Py_ssize_t size = PySequence_Fast_GET_SIZE(stack);
+    Py_ssize_t size = PyList_GET_SIZE(stack);
 
     if (PyList_SetSlice(stack, size, size, tokenlist))
         return -1;
@@ -324,7 +324,7 @@ Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text)
     }
 
     if (stack) {
-        if (PySequence_Fast_GET_SIZE(stack) > 0) {
+        if (PyList_GET_SIZE(stack) > 0) {
             if (Tokenizer_write_all(self, stack)) {
                 Py_DECREF(stack);
                 return -1;
@@ -348,7 +348,7 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
     if (index >= self->length)
         return EMPTY;
 
-    return PySequence_Fast_GET_ITEM(self->text, index);
+    return PyList_GET_ITEM(self->text, index);
 }
 
 /*
@@ -361,7 +361,7 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
         return EMPTY;
 
     Py_ssize_t index = self->head - delta;
-    return PySequence_Fast_GET_ITEM(self->text, index);
+    return PyList_GET_ITEM(self->text, index);
 }
 
 /*
@@ -591,11 +591,11 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
         }
 
         int i;
-        Py_ssize_t length = PySequence_Fast_GET_SIZE(stack);
+        Py_ssize_t length = PyList_GET_SIZE(stack);
         PyObject *token, *textdata;
 
         for (i = 0; i < length; i++) {
-            token = PySequence_Fast_GET_ITEM(stack, i);
+            token = PyList_GET_ITEM(stack, i);
             switch (PyObject_IsInstance(token, class)) {
                 case 0:
                     break;
@@ -1432,7 +1432,7 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
         self->text = PySequence_Fast(text, "expected a sequence");
     }
 
-    self->length = PySequence_Length(self->text);
+    self->length = PyList_GET_SIZE(self->text);
 
     return Tokenizer_parse(self, 0);
 }

From 6d73eeeab1aeaa42c62af99745b9d77d29feb6a6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 7 Oct 2012 00:12:29 -0400
Subject: [PATCH 033/180] Replace Python list of stacks with a singly linked
 list.

---
 mwparserfromhell/parser/tokenizer.c | 198 ++++++++++++------------------------
 mwparserfromhell/parser/tokenizer.h |  72 ++++++-------
 2 files changed, 103 insertions(+), 167 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 6550aad..d7c206d 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -34,8 +34,14 @@ static void
 Tokenizer_dealloc(Tokenizer* self)
 {
     Py_XDECREF(self->text);
-    Py_XDECREF(self->stacks);
-    Py_XDECREF(self->topstack);
+    struct Stack *this = self->topstack, *next;
+    while (this) {
+        Py_DECREF(this->stack);
+        Py_DECREF(this->textbuffer);
+        next = this->next;
+        free(this);
+        this = next;
+    }
     self->ob_type->tp_free((PyObject*) self);
 }
 
@@ -47,57 +53,26 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
         return -1;
 
     self->text = Py_None;
-    self->topstack = Py_None;
-    Py_INCREF(Py_None);
     Py_INCREF(Py_None);
-
-    self->stacks = PyList_New(0);
-    if (!self->stacks) {
-        Py_DECREF(self);
-        return -1;
-    }
-
+    self->topstack = NULL;
     self->head = 0;
     self->length = 0;
     self->global = 0;
-
-    return 0;
-}
-
-static int
-Tokenizer_set_context(Tokenizer* self, Py_ssize_t value)
-{
-    if (PyList_SetItem(self->topstack, 1, PyInt_FromSsize_t(value)))
-        return -1;
-    return 0;
-}
-
-static int
-Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value)
-{
-    if (PyList_SetItem(self->topstack, 2, value))
-        return -1;
     return 0;
 }
 
 /*
     Add a new token stack, context, and textbuffer to the list.
 */
-static int
-Tokenizer_push(Tokenizer* self, Py_ssize_t context)
+static void
+Tokenizer_push(Tokenizer* self, int context)
 {
-    PyObject* top = PyList_New(3);
-    if (!top) return -1;
-    PyList_SET_ITEM(top, 0, PyList_New(0));
-    PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(context));
-    PyList_SET_ITEM(top, 2, PyList_New(0));
-
-    Py_XDECREF(self->topstack);
+    struct Stack* top = malloc(sizeof(struct Stack));
+    top->stack = PyList_New(0);
+    top->context = context;
+    top->textbuffer = PyList_New(0);
+    top->next = self->topstack;
     self->topstack = top;
-
-    if (PyList_Append(self->stacks, top))
-        return -1;
-    return 0;
 }
 
 /*
@@ -106,8 +81,8 @@ Tokenizer_push(Tokenizer* self, Py_ssize_t context)
 static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
-    if (PyList_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) {
-        PyObject* text = PyUnicode_Join(EMPTY, Tokenizer_TEXTBUFFER(self));
+    if (PyList_GET_SIZE(self->topstack->textbuffer) > 0) {
+        PyObject* text = PyUnicode_Join(EMPTY, self->topstack->textbuffer);
         if (!text) return -1;
 
         PyObject* class = PyObject_GetAttrString(tokens, "Text");
@@ -129,37 +104,28 @@ Tokenizer_push_textbuffer(Tokenizer* self)
         Py_DECREF(kwargs);
         if (!token) return -1;
 
-        if (PyList_Append(Tokenizer_STACK(self), token)) {
+        if (PyList_Append(self->topstack->stack, token)) {
             Py_DECREF(token);
             return -1;
         }
 
         Py_DECREF(token);
 
-        if (Tokenizer_set_textbuffer(self, PyList_New(0)))
+        self->topstack->textbuffer = PyList_New(0);
+        if (!self->topstack->textbuffer)
             return -1;
     }
     return 0;
 }
 
-static int
+static void
 Tokenizer_delete_top_of_stack(Tokenizer* self)
 {
-    if (PySequence_DelItem(self->stacks, -1))
-        return -1;
-    Py_DECREF(self->topstack);
-
-    Py_ssize_t size = PyList_GET_SIZE(self->stacks);
-    if (size > 0) {
-        PyObject* top = PyList_GET_ITEM(self->stacks, size - 1);
-        self->topstack = top;
-        Py_INCREF(top);
-    }
-    else {
-        self->topstack = NULL;
-    }
-
-    return 0;
+    struct Stack* top = self->topstack;
+    Py_DECREF(top->stack);
+    Py_DECREF(top->textbuffer);
+    self->topstack = top->next;
+    free(top);
 }
 
 /*
@@ -171,12 +137,10 @@ Tokenizer_pop(Tokenizer* self)
     if (Tokenizer_push_textbuffer(self))
         return NULL;
 
-    PyObject* stack = Tokenizer_STACK(self);
+    PyObject* stack = self->topstack->stack;
     Py_INCREF(stack);
 
-    if (Tokenizer_delete_top_of_stack(self))
-        return NULL;
-
+    Tokenizer_delete_top_of_stack(self);
     return stack;
 }
 
@@ -190,17 +154,12 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
     if (Tokenizer_push_textbuffer(self))
         return NULL;
 
-    PyObject* stack = Tokenizer_STACK(self);
-    PyObject* context = Tokenizer_CONTEXT(self);
+    PyObject* stack = self->topstack->stack;
     Py_INCREF(stack);
-    Py_INCREF(context);
-
-    if (Tokenizer_delete_top_of_stack(self))
-        return NULL;
-
-    if (PyList_SetItem(self->topstack, 1, context))
-        return NULL;
+    int context = self->topstack->context;
 
+    Tokenizer_delete_top_of_stack(self);
+    self->topstack->context = context;
     return stack;
 }
 
@@ -226,7 +185,7 @@ Tokenizer_write(Tokenizer* self, PyObject* token)
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
-    if (PyList_Append(Tokenizer_STACK(self), token))
+    if (PyList_Append(self->topstack->stack, token))
         return -1;
 
     return 0;
@@ -241,7 +200,7 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
-    if (PyList_Insert(Tokenizer_STACK(self), 0, token))
+    if (PyList_Insert(self->topstack->stack, 0, token))
         return -1;
 
     return 0;
@@ -253,7 +212,7 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
 static int
 Tokenizer_write_text(Tokenizer* self, PyObject* text)
 {
-    if (PyList_Append(Tokenizer_TEXTBUFFER(self), text))
+    if (PyList_Append(self->topstack->textbuffer, text))
         return -1;
 
     return 0;
@@ -302,7 +261,7 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
-    PyObject* stack = Tokenizer_STACK(self);
+    PyObject* stack = self->topstack->stack;
     Py_ssize_t size = PyList_GET_SIZE(stack);
 
     if (PyList_SetSlice(stack, size, size, tokenlist))
@@ -579,7 +538,7 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
     if (Tokenizer_push_textbuffer(self))
         return -1;
 
-    PyObject* stack = Tokenizer_STACK(self);
+    PyObject* stack = self->topstack->stack;
     if (stack) {
         PyObject* textlist = PyList_New(0);
         if (!textlist) return -1;
@@ -673,24 +632,18 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
 static int
 Tokenizer_handle_template_param(Tokenizer* self)
 {
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-
-    if (context & LC_TEMPLATE_NAME) {
+    if (self->topstack->context & LC_TEMPLATE_NAME) {
         const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return -1;
         if (BAD_ROUTE) return -1;
-        context ^= LC_TEMPLATE_NAME;
-        if (Tokenizer_set_context(self, context))
-            return -1;
+        self->topstack->context ^= LC_TEMPLATE_NAME;
     }
-    else if (context & LC_TEMPLATE_PARAM_VALUE) {
-        context ^= LC_TEMPLATE_PARAM_VALUE;
-        if (Tokenizer_set_context(self, context))
-            return -1;
+    else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) {
+        self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE;
     }
 
-    if (context & LC_TEMPLATE_PARAM_KEY) {
+    if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
         PyObject* stack = Tokenizer_pop_keeping_context(self);
         if (!stack) return -1;
         if (Tokenizer_write_all(self, stack)) {
@@ -700,9 +653,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
         Py_DECREF(stack);
     }
     else {
-        context |= LC_TEMPLATE_PARAM_KEY;
-        if (Tokenizer_set_context(self, context))
-            return -1;
+        self->topstack->context |= LC_TEMPLATE_PARAM_KEY;
     }
 
     PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
@@ -717,7 +668,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
     }
     Py_DECREF(token);
 
-    Tokenizer_push(self, context);
+    Tokenizer_push(self, self->topstack->context);
     return 0;
 }
 
@@ -744,11 +695,8 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
     }
     Py_DECREF(stack);
 
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-    context ^= LC_TEMPLATE_PARAM_KEY;
-    context |= LC_TEMPLATE_PARAM_VALUE;
-    if (Tokenizer_set_context(self, context))
-        return -1;
+    self->topstack->context ^= LC_TEMPLATE_PARAM_KEY;
+    self->topstack->context |= LC_TEMPLATE_PARAM_VALUE;
 
     PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamEquals");
     if (!class) return -1;
@@ -771,14 +719,12 @@ static PyObject*
 Tokenizer_handle_template_end(Tokenizer* self)
 {
     PyObject* stack;
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-
-    if (context & LC_TEMPLATE_NAME) {
+    if (self->topstack->context & LC_TEMPLATE_NAME) {
         const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
     }
-    else if (context & LC_TEMPLATE_PARAM_KEY) {
+    else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
         stack = Tokenizer_pop_keeping_context(self);
         if (!stack) return NULL;
         if (Tokenizer_write_all(self, stack)) {
@@ -803,11 +749,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
     if (Tokenizer_verify_safe(self, unsafes))
         return -1;
 
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-    context ^= LC_ARGUMENT_NAME;
-    context |= LC_ARGUMENT_DEFAULT;
-    if (Tokenizer_set_context(self, context))
-        return -1;
+    self->topstack->context ^= LC_ARGUMENT_NAME;
+    self->topstack->context |= LC_ARGUMENT_DEFAULT;
 
     PyObject* class = PyObject_GetAttrString(tokens, "ArgumentSeparator");
     if (!class) return -1;
@@ -829,7 +772,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
-    if (Tokenizer_CONTEXT_VAL(self) & LC_ARGUMENT_NAME) {
+    if (self->topstack->context & LC_ARGUMENT_NAME) {
         const char* unsafes[] = {"\n", "{{", "}}", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
@@ -914,11 +857,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
     if (Tokenizer_verify_safe(self, unsafes))
         return -1;
 
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-    context ^= LC_WIKILINK_TITLE;
-    context |= LC_WIKILINK_TEXT;
-    if (Tokenizer_set_context(self, context))
-        return -1;
+    self->topstack->context ^= LC_WIKILINK_TITLE;
+    self->topstack->context |= LC_WIKILINK_TEXT;
 
     PyObject* class = PyObject_GetAttrString(tokens, "WikilinkSeparator");
     if (!class) return -1;
@@ -940,7 +880,7 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_wikilink_end(Tokenizer* self)
 {
-    if (Tokenizer_CONTEXT_VAL(self) & LC_WIKILINK_TITLE) {
+    if (self->topstack->context & LC_WIKILINK_TITLE) {
         const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
         if (Tokenizer_verify_safe(self, unsafes))
             return NULL;
@@ -960,7 +900,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     self->global |= GL_HEADING;
     Py_ssize_t reset = self->head;
     self->head += 1;
-    Py_ssize_t best = 1;
+    int best = 1;
     PyObject* text;
     int i;
 
@@ -969,7 +909,7 @@ Tokenizer_parse_heading(Tokenizer* self)
         self->head++;
     }
 
-    Py_ssize_t context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
+    int context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
     HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
 
     if (BAD_ROUTE) {
@@ -1032,7 +972,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     Py_DECREF(token);
 
     if (heading->level < best) {
-        Py_ssize_t diff = best - heading->level;
+        int diff = best - heading->level;
         char diffblocks[diff];
         for (i = 0; i < diff; i++) diffblocks[i] = *"=";
         PyObject* text = PyUnicode_FromStringAndSize(diffblocks, diff);
@@ -1092,16 +1032,14 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         self->head++;
     }
 
-    Py_ssize_t current = log2(Tokenizer_CONTEXT_VAL(self) / LC_HEADING_LEVEL_1) + 1;
-    Py_ssize_t level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
-
-    Py_ssize_t context = Tokenizer_CONTEXT_VAL(self);
-    HeadingData* after = (HeadingData*) Tokenizer_parse(self, context);
+    int current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
+    int level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
+    HeadingData* after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
 
     if (BAD_ROUTE) {
         RESET_ROUTE();
         if (level < best) {
-            Py_ssize_t diff = best - level;
+            int diff = best - level;
             char diffblocks[diff];
             for (i = 0; i < diff; i++) diffblocks[i] = *"=";
             text = PyUnicode_FromStringAndSize(diffblocks, diff);
@@ -1174,8 +1112,7 @@ static int
 Tokenizer_parse_entity(Tokenizer* self)
 {
     Py_ssize_t reset = self->head;
-    if (Tokenizer_push(self, 0))
-        return -1;
+    Tokenizer_push(self, 0);
 
     if (Tokenizer_really_parse_entity(self))
             return -1;
@@ -1268,13 +1205,12 @@ Tokenizer_parse_comment(Tokenizer* self)
     Parse the wikicode string, using context for when to stop.
 */
 static PyObject*
-Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
+Tokenizer_parse(Tokenizer* self, int context)
 {
     PyObject *this;
     Py_UNICODE this_data, next, next_next, last;
-    Py_ssize_t this_context;
-    Py_ssize_t fail_contexts = (
-        LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT);
+    int this_context;
+    int fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT;
     int is_marker, i;
 
     Tokenizer_push(self, context);
@@ -1297,7 +1233,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context)
             continue;
         }
 
-        this_context = Tokenizer_CONTEXT_VAL(self);
+        this_context = self->topstack->context;
 
         if (this_data == *"") {
             if (this_context & LC_TEMPLATE_PARAM_KEY) {
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 91da10e..1b9b76a 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -46,40 +46,47 @@ static PyObject* tokens;
 
 /* Local contexts: */
 
-static const Py_ssize_t LC_TEMPLATE =             0x0007;
-static const Py_ssize_t LC_TEMPLATE_NAME =        0x0001;
-static const Py_ssize_t LC_TEMPLATE_PARAM_KEY =   0x0002;
-static const Py_ssize_t LC_TEMPLATE_PARAM_VALUE = 0x0004;
+static const int LC_TEMPLATE =             0x0007;
+static const int LC_TEMPLATE_NAME =        0x0001;
+static const int LC_TEMPLATE_PARAM_KEY =   0x0002;
+static const int LC_TEMPLATE_PARAM_VALUE = 0x0004;
 
-static const Py_ssize_t LC_ARGUMENT =             0x0018;
-static const Py_ssize_t LC_ARGUMENT_NAME =        0x0008;
-static const Py_ssize_t LC_ARGUMENT_DEFAULT =     0x0010;
+static const int LC_ARGUMENT =             0x0018;
+static const int LC_ARGUMENT_NAME =        0x0008;
+static const int LC_ARGUMENT_DEFAULT =     0x0010;
 
-static const Py_ssize_t LC_WIKILINK =             0x0060;
-static const Py_ssize_t LC_WIKILINK_TITLE =       0x0020;
-static const Py_ssize_t LC_WIKILINK_TEXT =        0x0040;
+static const int LC_WIKILINK =             0x0060;
+static const int LC_WIKILINK_TITLE =       0x0020;
+static const int LC_WIKILINK_TEXT =        0x0040;
 
-static const Py_ssize_t LC_HEADING =              0x1f80;
-static const Py_ssize_t LC_HEADING_LEVEL_1 =      0x0080;
-static const Py_ssize_t LC_HEADING_LEVEL_2 =      0x0100;
-static const Py_ssize_t LC_HEADING_LEVEL_3 =      0x0200;
-static const Py_ssize_t LC_HEADING_LEVEL_4 =      0x0400;
-static const Py_ssize_t LC_HEADING_LEVEL_5 =      0x0800;
-static const Py_ssize_t LC_HEADING_LEVEL_6 =      0x1000;
+static const int LC_HEADING =              0x1f80;
+static const int LC_HEADING_LEVEL_1 =      0x0080;
+static const int LC_HEADING_LEVEL_2 =      0x0100;
+static const int LC_HEADING_LEVEL_3 =      0x0200;
+static const int LC_HEADING_LEVEL_4 =      0x0400;
+static const int LC_HEADING_LEVEL_5 =      0x0800;
+static const int LC_HEADING_LEVEL_6 =      0x1000;
 
-static const Py_ssize_t LC_COMMENT =              0x2000;
+static const int LC_COMMENT =              0x2000;
 
 
 /* Global contexts: */
 
-static const Py_ssize_t GL_HEADING = 0x1;
+static const int GL_HEADING = 0x1;
 
 
 /* Miscellaneous structs: */
 
+struct Stack {
+    PyObject* stack;
+    int context;
+    PyObject* textbuffer;
+    struct Stack* next;
+};
+
 typedef struct {
     PyObject* title;
-    Py_ssize_t level;
+    int level;
 } HeadingData;
 
 
@@ -87,22 +94,17 @@ typedef struct {
 
 typedef struct {
     PyObject_HEAD
-    PyObject* text;        /* text to tokenize */
-    PyObject* stacks;      /* token stacks */
-    PyObject* topstack;    /* topmost stack */
-    Py_ssize_t head;       /* current position in text */
-    Py_ssize_t length;     /* length of text */
-    Py_ssize_t global;     /* global context */
+    PyObject* text;         /* text to tokenize */
+    struct Stack* topstack; /* topmost stack */
+    Py_ssize_t head;        /* current position in text */
+    Py_ssize_t length;      /* length of text */
+    int global;             /* global context */
 } Tokenizer;
 
 
 /* Macros for accessing Tokenizer data: */
 
-#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0)
-#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1)
-#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t(Tokenizer_CONTEXT(self))
-#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2)
-#define Tokenizer_READ(self, num) PyUnicode_AS_UNICODE(Tokenizer_read(self, num))
+#define Tokenizer_READ(self, delta) PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))
 
 
 /* Tokenizer function prototypes: */
@@ -110,11 +112,9 @@ typedef struct {
 static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
 static void Tokenizer_dealloc(Tokenizer*);
 static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
-static int Tokenizer_set_context(Tokenizer*, Py_ssize_t);
-static int Tokenizer_set_textbuffer(Tokenizer*, PyObject*);
-static int Tokenizer_push(Tokenizer*, Py_ssize_t);
+static void Tokenizer_push(Tokenizer*, int);
 static int Tokenizer_push_textbuffer(Tokenizer*);
-static int Tokenizer_delete_top_of_stack(Tokenizer*);
+static void Tokenizer_delete_top_of_stack(Tokenizer*);
 static PyObject* Tokenizer_pop(Tokenizer*);
 static PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
 static void* Tokenizer_fail_route(Tokenizer*);
@@ -142,7 +142,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
 static int Tokenizer_really_parse_entity(Tokenizer*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
-static PyObject* Tokenizer_parse(Tokenizer*, Py_ssize_t);
+static PyObject* Tokenizer_parse(Tokenizer*, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 
 

From f0a36f32628d4514fb10d095f8c1734ec8529c3f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 7 Oct 2012 23:39:18 -0400
Subject: [PATCH 034/180] Store tokens in a global variable instead of
 retrieving anew each time.

---
 mwparserfromhell/parser/tokenizer.c | 161 +++++++++++++-----------------------
 mwparserfromhell/parser/tokenizer.h |  37 +++++++++
 2 files changed, 93 insertions(+), 105 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d7c206d..547207a 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -85,22 +85,15 @@ Tokenizer_push_textbuffer(Tokenizer* self)
         PyObject* text = PyUnicode_Join(EMPTY, self->topstack->textbuffer);
         if (!text) return -1;
 
-        PyObject* class = PyObject_GetAttrString(tokens, "Text");
-        if (!class) {
-            Py_DECREF(text);
-            return -1;
-        }
         PyObject* kwargs = PyDict_New();
         if (!kwargs) {
-            Py_DECREF(class);
             Py_DECREF(text);
             return -1;
         }
         PyDict_SetItemString(kwargs, "text", text);
         Py_DECREF(text);
 
-        PyObject* token = PyObject_Call(class, NOARGS, kwargs);
-        Py_DECREF(class);
+        PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
         Py_DECREF(kwargs);
         if (!token) return -1;
 
@@ -226,36 +219,29 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 {
     if (PyList_GET_SIZE(tokenlist) > 0) {
         PyObject* token = PyList_GET_ITEM(tokenlist, 0);
-        PyObject* class = PyObject_GetAttrString(tokens, "Text");
-        if (!class) return -1;
 
         PyObject* text;
-        switch (PyObject_IsInstance(token, class)) {
+        switch (PyObject_IsInstance(token, Text)) {
             case 0:
                 break;
             case 1:
                 text = PyObject_GetAttrString(token, "text");
                 if (!text) {
-                    Py_DECREF(class);
                     return -1;
                 }
                 if (PySequence_DelItem(tokenlist, 0)) {
                     Py_DECREF(text);
-                    Py_DECREF(class);
                     return -1;
                 }
                 if (Tokenizer_write_text(self, text)) {
                     Py_DECREF(text);
-                    Py_DECREF(class);
                     return -1;
                 }
                 Py_DECREF(text);
                 break;
             case -1:
-                Py_DECREF(class);
                 return -1;
         }
-        Py_DECREF(class);
     }
 
     if (Tokenizer_push_textbuffer(self))
@@ -420,7 +406,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
 static int
 Tokenizer_parse_template(Tokenizer* self)
 {
-    PyObject *template, *class, *token;
+    PyObject *template, *token;
     Py_ssize_t reset = self->head;
 
     template = Tokenizer_parse(self, LC_TEMPLATE_NAME);
@@ -430,13 +416,7 @@ Tokenizer_parse_template(Tokenizer* self)
     }
     if (!template) return -1;
 
-    class = PyObject_GetAttrString(tokens, "TemplateOpen");
-    if (!class) {
-        Py_DECREF(template);
-        return -1;
-    }
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(TemplateOpen, NULL);
     if (!token) {
         Py_DECREF(template);
         return -1;
@@ -455,10 +435,7 @@ Tokenizer_parse_template(Tokenizer* self)
     }
     Py_DECREF(template);
 
-    class = PyObject_GetAttrString(tokens, "TemplateClose");
-    if (!class) return -1;
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(TemplateClose, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -476,7 +453,7 @@ Tokenizer_parse_template(Tokenizer* self)
 static int
 Tokenizer_parse_argument(Tokenizer* self)
 {
-    PyObject *argument, *class, *token;
+    PyObject *argument, *token;
     Py_ssize_t reset = self->head;
 
     argument = Tokenizer_parse(self, LC_ARGUMENT_NAME);
@@ -486,13 +463,7 @@ Tokenizer_parse_argument(Tokenizer* self)
     }
     if (!argument) return -1;
 
-    class = PyObject_GetAttrString(tokens, "ArgumentOpen");
-    if (!class) {
-        Py_DECREF(argument);
-        return -1;
-    }
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(ArgumentOpen, NULL);
     if (!token) {
         Py_DECREF(argument);
         return -1;
@@ -511,10 +482,7 @@ Tokenizer_parse_argument(Tokenizer* self)
     }
     Py_DECREF(argument);
 
-    class = PyObject_GetAttrString(tokens, "ArgumentClose");
-    if (!class) return -1;
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(ArgumentClose, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -543,31 +511,23 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
         PyObject* textlist = PyList_New(0);
         if (!textlist) return -1;
 
-        PyObject* class = PyObject_GetAttrString(tokens, "Text");
-        if (!class) {
-            Py_DECREF(textlist);
-            return -1;
-        }
-
         int i;
         Py_ssize_t length = PyList_GET_SIZE(stack);
         PyObject *token, *textdata;
 
         for (i = 0; i < length; i++) {
             token = PyList_GET_ITEM(stack, i);
-            switch (PyObject_IsInstance(token, class)) {
+            switch (PyObject_IsInstance(token, Text)) {
                 case 0:
                     break;
                 case 1:
                     textdata = PyObject_GetAttrString(token, "text");
                     if (!textdata) {
                         Py_DECREF(textlist);
-                        Py_DECREF(class);
                         return -1;
                     }
                     if (PyList_Append(textlist, textdata)) {
                         Py_DECREF(textlist);
-                        Py_DECREF(class);
                         Py_DECREF(textdata);
                         return -1;
                     }
@@ -575,11 +535,9 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
                     break;
                 case -1:
                     Py_DECREF(textlist);
-                    Py_DECREF(class);
                     return -1;
             }
         }
-        Py_DECREF(class);
 
         PyObject* text = PyUnicode_Join(EMPTY, textlist);
         if (!text) {
@@ -656,10 +614,7 @@ Tokenizer_handle_template_param(Tokenizer* self)
         self->topstack->context |= LC_TEMPLATE_PARAM_KEY;
     }
 
-    PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
-    if (!class) return -1;
-    PyObject* token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    PyObject* token = PyObject_CallObject(TemplateParamSeparator, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -698,10 +653,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
     self->topstack->context ^= LC_TEMPLATE_PARAM_KEY;
     self->topstack->context |= LC_TEMPLATE_PARAM_VALUE;
 
-    PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamEquals");
-    if (!class) return -1;
-    PyObject* token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    PyObject* token = PyObject_CallObject(TemplateParamEquals, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -752,10 +704,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
     self->topstack->context ^= LC_ARGUMENT_NAME;
     self->topstack->context |= LC_ARGUMENT_DEFAULT;
 
-    PyObject* class = PyObject_GetAttrString(tokens, "ArgumentSeparator");
-    if (!class) return -1;
-    PyObject* token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -792,7 +741,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     self->head += 2;
     Py_ssize_t reset = self->head - 1;
 
-    PyObject *class, *token;
+    PyObject *token;
     PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
     if (!wikilink) return -1;
 
@@ -808,13 +757,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
         return 0;
     }
 
-    class = PyObject_GetAttrString(tokens, "WikilinkOpen");
-    if (!class) {
-        Py_DECREF(wikilink);
-        return -1;
-    }
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(WikilinkOpen, NULL);
     if (!token) {
         Py_DECREF(wikilink);
         return -1;
@@ -833,10 +776,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     }
     Py_DECREF(wikilink);
 
-    class = PyObject_GetAttrString(tokens, "WikilinkClose");
-    if (!class) return -1;
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(WikilinkClose, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -860,10 +800,7 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
     self->topstack->context ^= LC_WIKILINK_TITLE;
     self->topstack->context |= LC_WIKILINK_TEXT;
 
-    PyObject* class = PyObject_GetAttrString(tokens, "WikilinkSeparator");
-    if (!class) return -1;
-    PyObject* token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -936,16 +873,8 @@ Tokenizer_parse_heading(Tokenizer* self)
         return -1;
     }
 
-    PyObject* class = PyObject_GetAttrString(tokens, "HeadingStart");
-    if (!class) {
-        Py_DECREF(level);
-        Py_DECREF(heading->title);
-        free(heading);
-        return -1;
-    }
     PyObject* kwargs = PyDict_New();
     if (!kwargs) {
-        Py_DECREF(class);
         Py_DECREF(level);
         Py_DECREF(heading->title);
         free(heading);
@@ -954,8 +883,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     PyDict_SetItemString(kwargs, "level", level);
     Py_DECREF(level);
 
-    PyObject* token = PyObject_Call(class, NOARGS, kwargs);
-    Py_DECREF(class);
+    PyObject* token = PyObject_Call(HeadingStart, NOARGS, kwargs);
     Py_DECREF(kwargs);
     if (!token) {
         Py_DECREF(heading->title);
@@ -999,10 +927,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     Py_DECREF(heading->title);
     free(heading);
 
-    class = PyObject_GetAttrString(tokens, "HeadingEnd");
-    if (!class) return -1;
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(HeadingEnd, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -1145,7 +1070,7 @@ Tokenizer_parse_comment(Tokenizer* self)
     self->head += 4;
     Py_ssize_t reset = self->head - 1;
 
-    PyObject *class, *token;
+    PyObject *token;
     PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
     if (!comment) return -1;
 
@@ -1161,13 +1086,7 @@ Tokenizer_parse_comment(Tokenizer* self)
         return 0;
     }
 
-    class = PyObject_GetAttrString(tokens, "CommentStart");
-    if (!class) {
-        Py_DECREF(comment);
-        return -1;
-    }
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(CommentStart, NULL);
     if (!token) {
         Py_DECREF(comment);
         return -1;
@@ -1186,10 +1105,7 @@ Tokenizer_parse_comment(Tokenizer* self)
     }
     Py_DECREF(comment);
 
-    class = PyObject_GetAttrString(tokens, "CommentEnd");
-    if (!class) return -1;
-    token = PyObject_CallObject(class, NULL);
-    Py_DECREF(class);
+    token = PyObject_CallObject(CommentEnd, NULL);
     if (!token) return -1;
 
     if (Tokenizer_write(self, token)) {
@@ -1410,4 +1326,39 @@ init_tokenizer(void)
 
     tokens = PyObject_GetAttrString(tokmodule, "tokens");
     Py_DECREF(tokmodule);
+
+    Text = PyObject_GetAttrString(tokens, "Text");
+
+    TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen");
+    TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
+    TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals");
+    TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose");
+
+    ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen");
+    ArgumentSeparator = PyObject_GetAttrString(tokens, "ArgumentSeparator");
+    ArgumentClose = PyObject_GetAttrString(tokens, "ArgumentClose");
+
+    WikilinkOpen = PyObject_GetAttrString(tokens, "WikilinkOpen");
+    WikilinkSeparator = PyObject_GetAttrString(tokens, "WikilinkSeparator");
+    WikilinkClose = PyObject_GetAttrString(tokens, "WikilinkClose");
+
+    HTMLEntityStart = PyObject_GetAttrString(tokens, "HTMLEntityStart");
+    HTMLEntityNumeric = PyObject_GetAttrString(tokens, "HTMLEntityNumeric");
+    HTMLEntityHex = PyObject_GetAttrString(tokens, "HTMLEntityHex");
+    HTMLEntityEnd = PyObject_GetAttrString(tokens, "HTMLEntityEnd");
+
+    HeadingStart = PyObject_GetAttrString(tokens, "HeadingStart");
+    HeadingEnd = PyObject_GetAttrString(tokens, "HeadingEnd");
+
+    CommentStart = PyObject_GetAttrString(tokens, "CommentStart");
+    CommentEnd = PyObject_GetAttrString(tokens, "CommentEnd");
+
+    TagOpenOpen = PyObject_GetAttrString(tokens, "TagOpenOpen");
+    TagAttrStart = PyObject_GetAttrString(tokens, "TagAttrStart");
+    TagAttrEquals = PyObject_GetAttrString(tokens, "TagAttrEquals");
+    TagAttrQuote = PyObject_GetAttrString(tokens, "TagAttrQuote");
+    TagCloseOpen = PyObject_GetAttrString(tokens, "TagCloseOpen");
+    TagCloseSelfclose = PyObject_GetAttrString(tokens, "TagCloseSelfclose");
+    TagOpenClose = PyObject_GetAttrString(tokens, "TagOpenClose");
+    TagCloseClose = PyObject_GetAttrString(tokens, "TagCloseClose");
 }
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 1b9b76a..3883d45 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -44,6 +44,43 @@ static PyObject* NOARGS;
 static PyObject* tokens;
 
 
+/* Tokens */
+
+static PyObject* Text;
+
+static PyObject* TemplateOpen;
+static PyObject* TemplateParamSeparator;
+static PyObject* TemplateParamEquals;
+static PyObject* TemplateClose;
+
+static PyObject* ArgumentOpen;
+static PyObject* ArgumentSeparator;
+static PyObject* ArgumentClose;
+
+static PyObject* WikilinkOpen;
+static PyObject* WikilinkSeparator;
+static PyObject* WikilinkClose;
+
+static PyObject* HTMLEntityStart;
+static PyObject* HTMLEntityNumeric;
+static PyObject* HTMLEntityHex;
+static PyObject* HTMLEntityEnd;
+static PyObject* HeadingStart;
+static PyObject* HeadingEnd;
+
+static PyObject* CommentStart;
+static PyObject* CommentEnd;
+
+static PyObject* TagOpenOpen;
+static PyObject* TagAttrStart;
+static PyObject* TagAttrEquals;
+static PyObject* TagAttrQuote;
+static PyObject* TagCloseOpen;
+static PyObject* TagCloseSelfclose;
+static PyObject* TagOpenClose;
+static PyObject* TagCloseClose;
+
+
 /* Local contexts: */
 
 static const int LC_TEMPLATE =             0x0007;

From cdef073a9b9b084ed2b7bdf0dc8827af5a62c300 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 20 Oct 2012 13:22:34 -0400
Subject: [PATCH 035/180] Replace textbuffer with our own datastructure.

---
 mwparserfromhell/parser/tokenizer.c | 307 ++++++++++++++++++++----------------
 mwparserfromhell/parser/tokenizer.h |  61 ++++---
 2 files changed, 211 insertions(+), 157 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 547207a..2c083c5 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -30,6 +30,25 @@ Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
     return (PyObject*) self;
 }
 
+static struct Textbuffer*
+Textbuffer_new(void)
+{
+    struct Textbuffer* buffer = malloc(sizeof(struct Textbuffer));
+    if (!buffer) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    buffer->size = 0;
+    buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE);
+    if (!buffer->data) {
+        free(buffer);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    buffer->next = NULL;
+    return buffer;
+}
+
 static void
 Tokenizer_dealloc(Tokenizer* self)
 {
@@ -37,7 +56,7 @@ Tokenizer_dealloc(Tokenizer* self)
     struct Stack *this = self->topstack, *next;
     while (this) {
         Py_DECREF(this->stack);
-        Py_DECREF(this->textbuffer);
+        Textbuffer_dealloc(this->textbuffer);
         next = this->next;
         free(this);
         this = next;
@@ -45,6 +64,18 @@ Tokenizer_dealloc(Tokenizer* self)
     self->ob_type->tp_free((PyObject*) self);
 }
 
+static void
+Textbuffer_dealloc(struct Textbuffer* this)
+{
+    struct Textbuffer* next;
+    while (this) {
+        free(this->data);
+        next = this->next;
+        free(this);
+        this = next;
+    }
+}
+
 static int
 Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 {
@@ -64,15 +95,32 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 /*
     Add a new token stack, context, and textbuffer to the list.
 */
-static void
+static int
 Tokenizer_push(Tokenizer* self, int context)
 {
     struct Stack* top = malloc(sizeof(struct Stack));
+    if (!top) {
+        PyErr_NoMemory();
+        return -1;
+    }
     top->stack = PyList_New(0);
     top->context = context;
-    top->textbuffer = PyList_New(0);
+    top->textbuffer = Textbuffer_new();
+    if (!top->textbuffer) {
+        return -1;
+    }
     top->next = self->topstack;
     self->topstack = top;
+    return 0;
+}
+
+/*
+    Return the contents of the textbuffer as a Python Unicode object.
+*/
+static PyObject*
+Textbuffer_render(struct Textbuffer* self)
+{
+    return PyUnicode_FromUnicode(self->data, self->size);
 }
 
 /*
@@ -81,32 +129,35 @@ Tokenizer_push(Tokenizer* self, int context)
 static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
-    if (PyList_GET_SIZE(self->topstack->textbuffer) > 0) {
-        PyObject* text = PyUnicode_Join(EMPTY, self->topstack->textbuffer);
-        if (!text) return -1;
+    struct Textbuffer* buffer = self->topstack->textbuffer;
+    if (buffer->size == 0 && !buffer->next) {
+        return 0;
+    }
+    PyObject* text = Textbuffer_render(buffer);
+    if (!text) return -1;
 
-        PyObject* kwargs = PyDict_New();
-        if (!kwargs) {
-            Py_DECREF(text);
-            return -1;
-        }
-        PyDict_SetItemString(kwargs, "text", text);
+    PyObject* kwargs = PyDict_New();
+    if (!kwargs) {
         Py_DECREF(text);
+        return -1;
+    }
+    PyDict_SetItemString(kwargs, "text", text);
+    Py_DECREF(text);
 
-        PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
-        Py_DECREF(kwargs);
-        if (!token) return -1;
-
-        if (PyList_Append(self->topstack->stack, token)) {
-            Py_DECREF(token);
-            return -1;
-        }
+    PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
+    Py_DECREF(kwargs);
+    if (!token) return -1;
 
+    if (PyList_Append(self->topstack->stack, token)) {
         Py_DECREF(token);
+        return -1;
+    }
 
-        self->topstack->textbuffer = PyList_New(0);
-        if (!self->topstack->textbuffer)
-            return -1;
+    Py_DECREF(token);
+
+    self->topstack->textbuffer = Textbuffer_new();
+    if (!self->topstack->textbuffer) {
+        return -1;
     }
     return 0;
 }
@@ -116,7 +167,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
 {
     struct Stack* top = self->topstack;
     Py_DECREF(top->stack);
-    Py_DECREF(top->textbuffer);
+    Textbuffer_dealloc(top->textbuffer);
     self->topstack = top->next;
     free(top);
 }
@@ -203,11 +254,20 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
     Write text to the current textbuffer.
 */
 static int
-Tokenizer_write_text(Tokenizer* self, PyObject* text)
+Tokenizer_write_text(Tokenizer* self, Py_UNICODE text)
 {
-    if (PyList_Append(self->topstack->textbuffer, text))
-        return -1;
-
+    struct Textbuffer* buf = self->topstack->textbuffer;
+    if (buf->size == TEXTBUFFER_BLOCKSIZE) {
+        struct Textbuffer* new = Textbuffer_new();
+        if (!new) {
+            return -1;
+        }
+        new->next = buf;
+        self->topstack->textbuffer = new;
+        buf = new;
+    }
+    buf->data[buf->size] = text;
+    buf->size++;
     return 0;
 }
 
@@ -217,35 +277,48 @@ Tokenizer_write_text(Tokenizer* self, PyObject* text)
 static int
 Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 {
+    int pushed = 0;
     if (PyList_GET_SIZE(tokenlist) > 0) {
         PyObject* token = PyList_GET_ITEM(tokenlist, 0);
-
-        PyObject* text;
         switch (PyObject_IsInstance(token, Text)) {
             case 0:
                 break;
-            case 1:
-                text = PyObject_GetAttrString(token, "text");
-                if (!text) {
-                    return -1;
+            case 1: {
+                pushed = 1;
+                struct Textbuffer* buffer = self->topstack->textbuffer;
+                if (buffer->size == 0 && !buffer->next) {
+                    break;
                 }
-                if (PySequence_DelItem(tokenlist, 0)) {
+                PyObject* left = Textbuffer_render(buffer);
+                if (!left) return -1;
+                PyObject* right = PyObject_GetAttrString(token, "text");
+                if (!right) return -1;
+
+                PyObject* text = PyUnicode_Concat(left, right);
+                Py_DECREF(left);
+                Py_DECREF(right);
+                if (!text) return -1;
+
+                if (PyObject_SetAttrString(token, "text", text)) {
                     Py_DECREF(text);
                     return -1;
                 }
-                if (Tokenizer_write_text(self, text)) {
-                    Py_DECREF(text);
+                Py_DECREF(text);
+
+                self->topstack->textbuffer = Textbuffer_new();
+                if (!self->topstack->textbuffer) {
                     return -1;
                 }
-                Py_DECREF(text);
                 break;
+            }
             case -1:
                 return -1;
         }
     }
-
-    if (Tokenizer_push_textbuffer(self))
-        return -1;
+    if (!pushed) {
+        if (Tokenizer_push_textbuffer(self))
+            return -1;
+    }
 
     PyObject* stack = self->topstack->stack;
     Py_ssize_t size = PyList_GET_SIZE(stack);
@@ -257,15 +330,21 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 }
 
 /*
-    Pop the current stack, write text, and then write the stack.
+    Pop the current stack, write text, and then write the stack. 'text' is a
+    NULL-terminated array of chars.
 */
 static int
-Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text)
+Tokenizer_write_text_then_stack(Tokenizer* self, const char* text)
 {
     PyObject* stack = Tokenizer_pop(self);
-    if (Tokenizer_write_text(self, text)) {
-        Py_XDECREF(stack);
-        return -1;
+    int i = 0;
+    while (1) {
+        if (!text[i]) break;
+        if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) {
+            Py_XDECREF(stack);
+            return -1;
+        }
+        i++;
     }
 
     if (stack) {
@@ -322,17 +401,13 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
         self->head++;
         braces++;
     }
-    Tokenizer_push(self, 0);
+    if (Tokenizer_push(self, 0))
+        return -1;
 
     while (braces) {
         if (braces == 1) {
-            PyObject* text = PyUnicode_FromString("{");
-            if (Tokenizer_write_text_then_stack(self, text)) {
-                Py_XDECREF(text);
+            if (Tokenizer_write_text_then_stack(self, "{"))
                 return -1;
-            }
-
-            Py_XDECREF(text);
             return 0;
         }
 
@@ -342,13 +417,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
 
             if (BAD_ROUTE) {
                 RESET_ROUTE();
-                PyObject* text = PyUnicode_FromString("{{");
-                if (Tokenizer_write_text_then_stack(self, text)) {
-                    Py_XDECREF(text);
+                if (Tokenizer_write_text_then_stack(self, "{{"))
                     return -1;
-                }
-
-                Py_XDECREF(text);
                 return 0;
             }
             break;
@@ -364,10 +434,9 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
 
             if (BAD_ROUTE) {
                 RESET_ROUTE();
-                char bracestr[braces];
-                for (i = 0; i < braces; i++) bracestr[i] = *"{";
-                PyObject* text = PyUnicode_FromStringAndSize(bracestr, braces);
-
+                char text[braces + 1];
+                for (i = 0; i < braces; i++) text[i] = *"{";
+                text[braces] = *"";
                 if (Tokenizer_write_text_then_stack(self, text)) {
                     Py_XDECREF(text);
                     return -1;
@@ -623,7 +692,8 @@ Tokenizer_handle_template_param(Tokenizer* self)
     }
     Py_DECREF(token);
 
-    Tokenizer_push(self, self->topstack->context);
+    if (Tokenizer_push(self, self->topstack->context))
+        return -1;
     return 0;
 }
 
@@ -748,11 +818,10 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
-        PyObject* text = PyUnicode_FromString("[[");
-        if (!text) return -1;
-        if (Tokenizer_write_text(self, text)) {
-            Py_XDECREF(text);
-            return -1;
+        int i;
+        for (i = 0; i < 2; i++) {
+            if (Tokenizer_write_text(self, *"["))
+                return -1;
         }
         return 0;
     }
@@ -837,9 +906,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     self->global |= GL_HEADING;
     Py_ssize_t reset = self->head;
     self->head += 1;
-    int best = 1;
-    PyObject* text;
-    int i;
+    int best = 1, i;
 
     while (*Tokenizer_READ(self, 0) == *"=") {
         best++;
@@ -852,16 +919,11 @@ Tokenizer_parse_heading(Tokenizer* self)
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset + best - 1;
-        char blocks[best];
-        for (i = 0; i < best; i++) blocks[i] = *"=";
-        text = PyUnicode_FromStringAndSize(blocks, best);
-        if (!text) return -1;
-
-        if (Tokenizer_write_text_then_stack(self, text)) {
-            Py_DECREF(text);
+        char text[best + 1];
+        for (i = 0; i < best; i++) text[i] = *"=";
+        text[best] = *"";
+        if (Tokenizer_write_text_then_stack(self, text))
             return -1;
-        }
-        Py_DECREF(text);
         self->global ^= GL_HEADING;
         return 0;
     }
@@ -901,22 +963,14 @@ Tokenizer_parse_heading(Tokenizer* self)
 
     if (heading->level < best) {
         int diff = best - heading->level;
-        char diffblocks[diff];
-        for (i = 0; i < diff; i++) diffblocks[i] = *"=";
-        PyObject* text = PyUnicode_FromStringAndSize(diffblocks, diff);
-        if (!text) {
+        char difftext[diff + 1];
+        for (i = 0; i < diff; i++) difftext[i] = *"=";
+        difftext[diff] = *"";
+        if (Tokenizer_write_text_then_stack(self, difftext)) {
             Py_DECREF(heading->title);
             free(heading);
             return -1;
         }
-
-        if (Tokenizer_write_text_then_stack(self, text)) {
-            Py_DECREF(text);
-            Py_DECREF(heading->title);
-            free(heading);
-            return -1;
-        }
-        Py_DECREF(text);
     }
 
     if (Tokenizer_write_all(self, heading->title)) {
@@ -949,7 +1003,6 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     Py_ssize_t reset = self->head;
     self->head += 1;
     Py_ssize_t best = 1;
-    PyObject* text;
     int i;
 
     while (*Tokenizer_READ(self, 0) == *"=") {
@@ -965,39 +1018,24 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         RESET_ROUTE();
         if (level < best) {
             int diff = best - level;
-            char diffblocks[diff];
-            for (i = 0; i < diff; i++) diffblocks[i] = *"=";
-            text = PyUnicode_FromStringAndSize(diffblocks, diff);
-            if (!text) return NULL;
-
-            if (Tokenizer_write_text_then_stack(self, text)) {
-                Py_DECREF(text);
+            char difftext[diff + 1];
+            for (i = 0; i < diff; i++) difftext[i] = *"=";
+            difftext[diff] = *"";
+            if (Tokenizer_write_text_then_stack(self, difftext))
                 return NULL;
-            }
-            Py_DECREF(text);
         }
-
         self->head = reset + best - 1;
     }
 
     else {
-        char blocks[best];
-        for (i = 0; i < best; i++) blocks[i] = *"=";
-        text = PyUnicode_FromStringAndSize(blocks, best);
-        if (!text) {
-            Py_DECREF(after->title);
-            free(after);
-            return NULL;
-        }
-
+        char text[best + 1];
+        for (i = 0; i < best; i++) text[i] = *"=";
+        text[best] = *"";
         if (Tokenizer_write_text_then_stack(self, text)) {
-            Py_DECREF(text);
             Py_DECREF(after->title);
             free(after);
             return NULL;
         }
-        Py_DECREF(text);
-
         if (Tokenizer_write_all(self, after->title)) {
             Py_DECREF(after->title);
             free(after);
@@ -1037,7 +1075,8 @@ static int
 Tokenizer_parse_entity(Tokenizer* self)
 {
     Py_ssize_t reset = self->head;
-    Tokenizer_push(self, 0);
+    if (Tokenizer_push(self, 0))
+        return -1;
 
     if (Tokenizer_really_parse_entity(self))
             return -1;
@@ -1045,7 +1084,7 @@ Tokenizer_parse_entity(Tokenizer* self)
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
-        if (Tokenizer_write_text(self, Tokenizer_read(self, 0)))
+        if (Tokenizer_write_text(self, *PyUnicode_AS_UNICODE(Tokenizer_read(self, 0))))
             return -1;
         return 0;
     }
@@ -1077,13 +1116,16 @@ Tokenizer_parse_comment(Tokenizer* self)
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
-        PyObject* text = PyUnicode_FromString("<!--");
-        if (!text) return -1;
-        if (Tokenizer_write_text(self, text)) {
-            Py_XDECREF(text);
-            return -1;
+        const char* text = "<!--";
+        int i = 0;
+        while (1) {
+            if (!text[i]) return 0;
+            if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) {
+                Py_XDECREF(text);
+                return -1;
+            }
+            i++;
         }
-        return 0;
     }
 
     token = PyObject_CallObject(CommentStart, NULL);
@@ -1129,7 +1171,8 @@ Tokenizer_parse(Tokenizer* self, int context)
     int fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT;
     int is_marker, i;
 
-    Tokenizer_push(self, context);
+    if (Tokenizer_push(self, context))
+        return NULL;
 
     while (1) {
         this = Tokenizer_read(self, 0);
@@ -1144,7 +1187,7 @@ Tokenizer_parse(Tokenizer* self, int context)
         }
 
         if (!is_marker) {
-            Tokenizer_write_text(self, this);
+            Tokenizer_write_text(self, this_data);
             self->head++;
             continue;
         }
@@ -1170,7 +1213,7 @@ Tokenizer_parse(Tokenizer* self, int context)
                     return Tokenizer_pop(self);
                 }
             }
-            Tokenizer_write_text(self, this);
+            Tokenizer_write_text(self, this_data);
         }
         else if (this_data == next && next == *"{") {
             if (Tokenizer_parse_template_or_argument(self))
@@ -1195,7 +1238,7 @@ Tokenizer_parse(Tokenizer* self, int context)
             if (*Tokenizer_READ(self, 2) == *"}") {
                 return Tokenizer_handle_argument_end(self);
             }
-            Tokenizer_write_text(self, this);
+            Tokenizer_write_text(self, this_data);
         }
         else if (this_data == next && next == *"[") {
             if (!(this_context & LC_WIKILINK_TITLE)) {
@@ -1203,7 +1246,7 @@ Tokenizer_parse(Tokenizer* self, int context)
                     return NULL;
             }
             else {
-                Tokenizer_write_text(self, this);
+                Tokenizer_write_text(self, this_data);
             }
         }
         else if (this_data == *"|" && this_context & LC_WIKILINK_TITLE) {
@@ -1220,7 +1263,7 @@ Tokenizer_parse(Tokenizer* self, int context)
                     return NULL;
             }
             else {
-                Tokenizer_write_text(self, this);
+                Tokenizer_write_text(self, this_data);
             }
         }
         else if (this_data == *"=" && this_context & LC_HEADING) {
@@ -1240,11 +1283,11 @@ Tokenizer_parse(Tokenizer* self, int context)
                     return NULL;
             }
             else {
-                Tokenizer_write_text(self, this);
+                Tokenizer_write_text(self, this_data);
             }
         }
         else {
-            Tokenizer_write_text(self, this);
+            Tokenizer_write_text(self, this_data);
         }
 
         self->head++;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 3883d45..468700c 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -32,7 +32,9 @@ SOFTWARE.
 static const char* MARKERS[] = {
     "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
     "!", "\n", ""};
-static const int NUM_MARKERS = 18;
+
+#define NUM_MARKERS 18
+#define TEXTBUFFER_BLOCKSIZE 1024
 
 static int route_state = 0;
 #define BAD_ROUTE     (route_state)
@@ -83,41 +85,47 @@ static PyObject* TagCloseClose;
 
 /* Local contexts: */
 
-static const int LC_TEMPLATE =             0x0007;
-static const int LC_TEMPLATE_NAME =        0x0001;
-static const int LC_TEMPLATE_PARAM_KEY =   0x0002;
-static const int LC_TEMPLATE_PARAM_VALUE = 0x0004;
+#define LC_TEMPLATE             0x0007
+#define LC_TEMPLATE_NAME        0x0001
+#define LC_TEMPLATE_PARAM_KEY   0x0002
+#define LC_TEMPLATE_PARAM_VALUE 0x0004
 
-static const int LC_ARGUMENT =             0x0018;
-static const int LC_ARGUMENT_NAME =        0x0008;
-static const int LC_ARGUMENT_DEFAULT =     0x0010;
+#define LC_ARGUMENT             0x0018
+#define LC_ARGUMENT_NAME        0x0008
+#define LC_ARGUMENT_DEFAULT     0x0010
 
-static const int LC_WIKILINK =             0x0060;
-static const int LC_WIKILINK_TITLE =       0x0020;
-static const int LC_WIKILINK_TEXT =        0x0040;
+#define LC_WIKILINK             0x0060
+#define LC_WIKILINK_TITLE       0x0020
+#define LC_WIKILINK_TEXT        0x0040
 
-static const int LC_HEADING =              0x1f80;
-static const int LC_HEADING_LEVEL_1 =      0x0080;
-static const int LC_HEADING_LEVEL_2 =      0x0100;
-static const int LC_HEADING_LEVEL_3 =      0x0200;
-static const int LC_HEADING_LEVEL_4 =      0x0400;
-static const int LC_HEADING_LEVEL_5 =      0x0800;
-static const int LC_HEADING_LEVEL_6 =      0x1000;
+#define LC_HEADING              0x1f80
+#define LC_HEADING_LEVEL_1      0x0080
+#define LC_HEADING_LEVEL_2      0x0100
+#define LC_HEADING_LEVEL_3      0x0200
+#define LC_HEADING_LEVEL_4      0x0400
+#define LC_HEADING_LEVEL_5      0x0800
+#define LC_HEADING_LEVEL_6      0x1000
 
-static const int LC_COMMENT =              0x2000;
+#define LC_COMMENT              0x2000
 
 
 /* Global contexts: */
 
-static const int GL_HEADING = 0x1;
+#define GL_HEADING 0x1
 
 
 /* Miscellaneous structs: */
 
+struct Textbuffer {
+    Py_ssize_t size;
+    Py_UNICODE* data;
+    struct Textbuffer* next;
+};
+
 struct Stack {
     PyObject* stack;
     int context;
-    PyObject* textbuffer;
+    struct Textbuffer* textbuffer;
     struct Stack* next;
 };
 
@@ -144,12 +152,15 @@ typedef struct {
 #define Tokenizer_READ(self, delta) PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))
 
 
-/* Tokenizer function prototypes: */
+/* Function prototypes: */
 
 static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
+static struct Textbuffer* Textbuffer_new(void);
 static void Tokenizer_dealloc(Tokenizer*);
+static void Textbuffer_dealloc(struct Textbuffer*);
 static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
-static void Tokenizer_push(Tokenizer*, int);
+static int Tokenizer_push(Tokenizer*, int);
+static PyObject* Textbuffer_render(struct Textbuffer*);
 static int Tokenizer_push_textbuffer(Tokenizer*);
 static void Tokenizer_delete_top_of_stack(Tokenizer*);
 static PyObject* Tokenizer_pop(Tokenizer*);
@@ -157,9 +168,9 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
 static void* Tokenizer_fail_route(Tokenizer*);
 static int Tokenizer_write(Tokenizer*, PyObject*);
 static int Tokenizer_write_first(Tokenizer*, PyObject*);
-static int Tokenizer_write_text(Tokenizer*, PyObject*);
+static int Tokenizer_write_text(Tokenizer*, Py_UNICODE);
 static int Tokenizer_write_all(Tokenizer*, PyObject*);
-static int Tokenizer_write_text_then_stack(Tokenizer*, PyObject*);
+static int Tokenizer_write_text_then_stack(Tokenizer*, const char*);
 static PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t);
 static PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t);
 static int Tokenizer_parse_template_or_argument(Tokenizer*);

From c5fea8ea34ac6cb9e4c84dc5c1ec2ff5a79c1882 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 20 Oct 2012 16:05:04 -0400
Subject: [PATCH 036/180] Render the entire textbuffer correctly.

---
 mwparserfromhell/parser/tokenizer.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 2c083c5..1247c3e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -120,7 +120,17 @@ Tokenizer_push(Tokenizer* self, int context)
 static PyObject*
 Textbuffer_render(struct Textbuffer* self)
 {
-    return PyUnicode_FromUnicode(self->data, self->size);
+    PyObject *result = PyUnicode_FromUnicode(self->data, self->size);
+    PyObject *left, *concat;
+    while (self->next) {
+        self = self->next;
+        left = PyUnicode_FromUnicode(self->data, self->size);
+        concat = PyUnicode_Concat(left, result);
+        Py_DECREF(left);
+        Py_DECREF(result);
+        result = concat;
+    }
+    return result;
 }
 
 /*
@@ -155,6 +165,7 @@ Tokenizer_push_textbuffer(Tokenizer* self)
 
     Py_DECREF(token);
 
+    Textbuffer_dealloc(buffer);
     self->topstack->textbuffer = Textbuffer_new();
     if (!self->topstack->textbuffer) {
         return -1;
@@ -305,6 +316,7 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
                 }
                 Py_DECREF(text);
 
+                Textbuffer_dealloc(buffer);
                 self->topstack->textbuffer = Textbuffer_new();
                 if (!self->topstack->textbuffer) {
                     return -1;

From 6413ae21da70a64cbbb0043f08bd6f5ee37c0de9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 27 Oct 2012 21:44:18 -0400
Subject: [PATCH 037/180] Should have used the param's name here instead of its
 value.

---
 mwparserfromhell/nodes/template.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index c1abc2a..08ab4a5 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -249,11 +249,11 @@ class Template(Node):
         if not force_nonconformity:
             before_n, after_n = self._get_spacing_conventions(use_names=True)
             if before_n and after_n:
-                name = parse_anything([before_n, value, after_n])
+                name = parse_anything([before_n, name, after_n])
             elif before_n:
-                name = parse_anything([before_n, value])
+                name = parse_anything([before_n, name])
             elif after_n:
-                name = parse_anything([value, after_n])
+                name = parse_anything([name, after_n])
 
             before_v, after_v = self._get_spacing_conventions(use_names=False)
             if before_v and after_v:

From 76b299e4faaa34dfead07e727978276f3254159b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 27 Oct 2012 21:47:11 -0400
Subject: [PATCH 038/180] Sync from upstream (6413ae21da); some doc updates.

---
 mwparserfromhell/nodes/template.py |  6 +++---
 mwparserfromhell/wikicode.py       | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index c1abc2a..08ab4a5 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -249,11 +249,11 @@ class Template(Node):
         if not force_nonconformity:
             before_n, after_n = self._get_spacing_conventions(use_names=True)
             if before_n and after_n:
-                name = parse_anything([before_n, value, after_n])
+                name = parse_anything([before_n, name, after_n])
             elif before_n:
-                name = parse_anything([before_n, value])
+                name = parse_anything([before_n, name])
             elif after_n:
-                name = parse_anything([value, after_n])
+                name = parse_anything([name, after_n])
 
             before_v, after_v = self._get_spacing_conventions(use_names=False)
             if before_v and after_v:
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index e0f5acd..2c532f5 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -386,12 +386,12 @@ class Wikicode(StringMixIn):
         With *flat* as ``True``, each returned section contains all of its
         subsections within the :py:class:`~.Wikicode`; otherwise, the returned
         sections contain only the section up to the next heading, regardless of
-        its size. If *matches* is given, it should be a regex to matched
+        its size. If *matches* is given, it should be a regex to be matched
         against the titles of section headings; only sections whose headings
-        match the regex will be included. If *levels* is given, it should be a =
-        list of integers; only sections whose heading levels are within the
-        list will be returned. If *include_headings* is ``True``, the section's
-        literal :py:class:`~.Heading` object will be included in returned
+        match the regex will be included. If *levels* is given, it should be a
+        iterable of integers; only sections whose heading levels are within it
+        will be returned. If *include_headings* is ``True``, the section's
+        beginning :py:class:`~.Heading` object will be included in returned
         :py:class:`~.Wikicode` objects; otherwise, this is skipped.
         """
         if matches:
@@ -402,16 +402,16 @@ class Wikicode(StringMixIn):
             headings = [head for head in headings if head.level in levels]
 
         sections = []
-        buffers = [[maxsize, 0]]
+        buffers = [(maxsize, 0)]
         i = 0
         while i < len(self.nodes):
             if self.nodes[i] in headings:
                 this = self.nodes[i].level
                 for (level, start) in buffers:
                     if not flat or this <= level:
-                        buffers.remove([level, start])
+                        buffers.remove((level, start))
                         sections.append(Wikicode(self.nodes[start:i]))
-                buffers.append([this, i])
+                buffers.append((this, i))
                 if not include_headings:
                     i += 1
             i += 1

From b2f933dddc868b729b810ab10cfc5ee59214deb4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 1 Nov 2012 12:41:55 -0400
Subject: [PATCH 039/180] Fix memory leak.

---
 mwparserfromhell/parser/tokenizer.c | 39 +++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 1247c3e..01acd50 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -33,15 +33,15 @@ Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
 static struct Textbuffer*
 Textbuffer_new(void)
 {
-    struct Textbuffer* buffer = malloc(sizeof(struct Textbuffer));
+    struct Textbuffer* buffer = PyObject_Malloc(sizeof(struct Textbuffer));
     if (!buffer) {
         PyErr_NoMemory();
         return NULL;
     }
     buffer->size = 0;
-    buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE);
+    buffer->data = PyObject_Malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE);
     if (!buffer->data) {
-        free(buffer);
+        PyObject_Free(buffer);
         PyErr_NoMemory();
         return NULL;
     }
@@ -58,7 +58,7 @@ Tokenizer_dealloc(Tokenizer* self)
         Py_DECREF(this->stack);
         Textbuffer_dealloc(this->textbuffer);
         next = this->next;
-        free(this);
+        PyObject_Free(this);
         this = next;
     }
     self->ob_type->tp_free((PyObject*) self);
@@ -69,9 +69,9 @@ Textbuffer_dealloc(struct Textbuffer* this)
 {
     struct Textbuffer* next;
     while (this) {
-        free(this->data);
+        PyObject_Free(this->data);
         next = this->next;
-        free(this);
+        PyObject_Free(this);
         this = next;
     }
 }
@@ -98,7 +98,7 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 static int
 Tokenizer_push(Tokenizer* self, int context)
 {
-    struct Stack* top = malloc(sizeof(struct Stack));
+    struct Stack* top = PyObject_Malloc(sizeof(struct Stack));
     if (!top) {
         PyErr_NoMemory();
         return -1;
@@ -180,7 +180,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
     Py_DECREF(top->stack);
     Textbuffer_dealloc(top->textbuffer);
     self->topstack = top->next;
-    free(top);
+    PyObject_Free(top);
 }
 
 /*
@@ -607,6 +607,7 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
                         Py_DECREF(textlist);
                         return -1;
                     }
+                    Py_DECREF(textdata);
                     if (PyList_Append(textlist, textdata)) {
                         Py_DECREF(textlist);
                         Py_DECREF(textdata);
@@ -943,7 +944,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     PyObject* level = PyInt_FromSsize_t(heading->level);
     if (!level) {
         Py_DECREF(heading->title);
-        free(heading);
+        PyObject_Free(heading);
         return -1;
     }
 
@@ -951,7 +952,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     if (!kwargs) {
         Py_DECREF(level);
         Py_DECREF(heading->title);
-        free(heading);
+        PyObject_Free(heading);
         return -1;
     }
     PyDict_SetItemString(kwargs, "level", level);
@@ -961,14 +962,14 @@ Tokenizer_parse_heading(Tokenizer* self)
     Py_DECREF(kwargs);
     if (!token) {
         Py_DECREF(heading->title);
-        free(heading);
+        PyObject_Free(heading);
         return -1;
     }
 
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         Py_DECREF(heading->title);
-        free(heading);
+        PyObject_Free(heading);
         return -1;
     }
     Py_DECREF(token);
@@ -980,18 +981,18 @@ Tokenizer_parse_heading(Tokenizer* self)
         difftext[diff] = *"";
         if (Tokenizer_write_text_then_stack(self, difftext)) {
             Py_DECREF(heading->title);
-            free(heading);
+            PyObject_Free(heading);
             return -1;
         }
     }
 
     if (Tokenizer_write_all(self, heading->title)) {
         Py_DECREF(heading->title);
-        free(heading);
+        PyObject_Free(heading);
         return -1;
     }
     Py_DECREF(heading->title);
-    free(heading);
+    PyObject_Free(heading);
 
     token = PyObject_CallObject(HeadingEnd, NULL);
     if (!token) return -1;
@@ -1045,23 +1046,23 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         text[best] = *"";
         if (Tokenizer_write_text_then_stack(self, text)) {
             Py_DECREF(after->title);
-            free(after);
+            PyObject_Free(after);
             return NULL;
         }
         if (Tokenizer_write_all(self, after->title)) {
             Py_DECREF(after->title);
-            free(after);
+            PyObject_Free(after);
             return NULL;
         }
         Py_DECREF(after->title);
         level = after->level;
-        free(after);
+        PyObject_Free(after);
     }
 
     PyObject* stack = Tokenizer_pop(self);
     if (!stack) return NULL;
 
-    HeadingData* heading = malloc(sizeof(HeadingData));
+    HeadingData* heading = PyObject_Malloc(sizeof(HeadingData));
     if (!heading) {
         PyErr_NoMemory();
         return NULL;

From 7de34ed5a6151e7d3d787dbce5bbc83fdba44e64 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 1 Nov 2012 15:15:56 -0400
Subject: [PATCH 040/180] Rewrite verify_safe completely.

---
 mwparserfromhell/parser/tokenizer.c | 210 +++++++++++++-----------------------
 mwparserfromhell/parser/tokenizer.h |   9 +-
 2 files changed, 79 insertions(+), 140 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 01acd50..82cffaf 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -576,107 +576,12 @@ Tokenizer_parse_argument(Tokenizer* self)
 }
 
 /*
-    Verify that there are no unsafe characters in the current stack. The route
-    will be failed if the name contains any element of unsafes in it (not
-    merely at the beginning or end). This is used when parsing a template name
-    or parameter key, which cannot contain newlines.
-*/
-static int
-Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[])
-{
-    if (Tokenizer_push_textbuffer(self))
-        return -1;
-
-    PyObject* stack = self->topstack->stack;
-    if (stack) {
-        PyObject* textlist = PyList_New(0);
-        if (!textlist) return -1;
-
-        int i;
-        Py_ssize_t length = PyList_GET_SIZE(stack);
-        PyObject *token, *textdata;
-
-        for (i = 0; i < length; i++) {
-            token = PyList_GET_ITEM(stack, i);
-            switch (PyObject_IsInstance(token, Text)) {
-                case 0:
-                    break;
-                case 1:
-                    textdata = PyObject_GetAttrString(token, "text");
-                    if (!textdata) {
-                        Py_DECREF(textlist);
-                        return -1;
-                    }
-                    Py_DECREF(textdata);
-                    if (PyList_Append(textlist, textdata)) {
-                        Py_DECREF(textlist);
-                        Py_DECREF(textdata);
-                        return -1;
-                    }
-                    Py_DECREF(textdata);
-                    break;
-                case -1:
-                    Py_DECREF(textlist);
-                    return -1;
-            }
-        }
-
-        PyObject* text = PyUnicode_Join(EMPTY, textlist);
-        if (!text) {
-            Py_DECREF(textlist);
-            return -1;
-        }
-        Py_DECREF(textlist);
-
-        PyObject* stripped = PyObject_CallMethod(text, "strip", NULL);
-        if (!stripped) {
-            Py_DECREF(text);
-            return -1;
-        }
-        Py_DECREF(text);
-
-        const char* unsafe_char;
-        PyObject* unsafe;
-        i = 0;
-        while (1) {
-            unsafe_char = unsafes[i];
-            if (!unsafe_char) break;
-
-            unsafe = PyUnicode_FromString(unsafe_char);
-
-            if (!unsafe) {
-                Py_DECREF(stripped);
-                return -1;
-            }
-
-            switch (PyUnicode_Contains(stripped, unsafe)) {
-                case 0:
-                    break;
-                case 1:
-                    Tokenizer_fail_route(self);
-                case -1:
-                    Py_DECREF(stripped);
-                    Py_DECREF(unsafe);
-                    return -1;
-            }
-            i++;
-        }
-    }
-
-    return 0;
-}
-
-/*
     Handle a template parameter at the head of the string.
 */
 static int
 Tokenizer_handle_template_param(Tokenizer* self)
 {
     if (self->topstack->context & LC_TEMPLATE_NAME) {
-        const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
-        if (Tokenizer_verify_safe(self, unsafes))
-            return -1;
-        if (BAD_ROUTE) return -1;
         self->topstack->context ^= LC_TEMPLATE_NAME;
     }
     else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) {
@@ -716,15 +621,6 @@ Tokenizer_handle_template_param(Tokenizer* self)
 static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
-    const char* unsafes[] = {"\n", "{{", "}}", NULL};
-    if (Tokenizer_verify_safe(self, unsafes)) {
-        if (BAD_ROUTE) {
-            PyObject* stack = Tokenizer_pop(self);
-            Py_XDECREF(stack);
-        }
-        return -1;
-    }
-
     PyObject* stack = Tokenizer_pop_keeping_context(self);
     if (!stack) return -1;
     if (Tokenizer_write_all(self, stack)) {
@@ -754,12 +650,7 @@ static PyObject*
 Tokenizer_handle_template_end(Tokenizer* self)
 {
     PyObject* stack;
-    if (self->topstack->context & LC_TEMPLATE_NAME) {
-        const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
-        if (Tokenizer_verify_safe(self, unsafes))
-            return NULL;
-    }
-    else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
+    if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
         stack = Tokenizer_pop_keeping_context(self);
         if (!stack) return NULL;
         if (Tokenizer_write_all(self, stack)) {
@@ -780,10 +671,6 @@ Tokenizer_handle_template_end(Tokenizer* self)
 static int
 Tokenizer_handle_argument_separator(Tokenizer* self)
 {
-    const char* unsafes[] = {"\n", "{{", "}}", NULL};
-    if (Tokenizer_verify_safe(self, unsafes))
-        return -1;
-
     self->topstack->context ^= LC_ARGUMENT_NAME;
     self->topstack->context |= LC_ARGUMENT_DEFAULT;
 
@@ -804,12 +691,6 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
-    if (self->topstack->context & LC_ARGUMENT_NAME) {
-        const char* unsafes[] = {"\n", "{{", "}}", NULL};
-        if (Tokenizer_verify_safe(self, unsafes))
-            return NULL;
-    }
-
     self->head += 2;
     PyObject* stack = Tokenizer_pop(self);
     return stack;
@@ -826,7 +707,6 @@ Tokenizer_parse_wikilink(Tokenizer* self)
 
     PyObject *token;
     PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
-    if (!wikilink) return -1;
 
     if (BAD_ROUTE) {
         RESET_ROUTE();
@@ -838,6 +718,7 @@ Tokenizer_parse_wikilink(Tokenizer* self)
         }
         return 0;
     }
+    if (!wikilink) return -1;
 
     token = PyObject_CallObject(WikilinkOpen, NULL);
     if (!token) {
@@ -875,10 +756,6 @@ Tokenizer_parse_wikilink(Tokenizer* self)
 static int
 Tokenizer_handle_wikilink_separator(Tokenizer* self)
 {
-    const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
-    if (Tokenizer_verify_safe(self, unsafes))
-        return -1;
-
     self->topstack->context ^= LC_WIKILINK_TITLE;
     self->topstack->context |= LC_WIKILINK_TEXT;
 
@@ -899,12 +776,6 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_wikilink_end(Tokenizer* self)
 {
-    if (self->topstack->context & LC_WIKILINK_TITLE) {
-        const char* unsafes[] = {"\n", "{", "}", "[", "]", NULL};
-        if (Tokenizer_verify_safe(self, unsafes))
-            return NULL;
-    }
-
     self->head += 1;
     PyObject* stack = Tokenizer_pop(self);
     return stack;
@@ -1124,7 +995,6 @@ Tokenizer_parse_comment(Tokenizer* self)
 
     PyObject *token;
     PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
-    if (!comment) return -1;
 
     if (BAD_ROUTE) {
         RESET_ROUTE();
@@ -1139,7 +1009,9 @@ Tokenizer_parse_comment(Tokenizer* self)
             }
             i++;
         }
+        return 0;
     }
+    if (!comment) return -1;
 
     token = PyObject_CallObject(CommentStart, NULL);
     if (!token) {
@@ -1173,16 +1045,74 @@ Tokenizer_parse_comment(Tokenizer* self)
 }
 
 /*
+    Make sure we are not trying to write an invalid character.
+*/
+static void
+Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
+{
+    if (context & LC_FAIL_NEXT) {
+        Tokenizer_fail_route(self);
+        return;
+    }
+
+    if (context & (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE)) {
+        if (data == *"{" || data == *"}" || data == *"[" || data == *"]") {
+            self->topstack->context |= LC_FAIL_NEXT;
+            return;
+        }
+    }
+    else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
+        if (context & LC_FAIL_ON_LBRACE) {
+            if (data == *"{") {
+                self->topstack->context |= LC_FAIL_NEXT;
+                return;
+            }
+            self->topstack->context ^= LC_FAIL_ON_LBRACE;
+        }
+        else if (context & LC_FAIL_ON_RBRACE) {
+            if (data == *"}") {
+                self->topstack->context |= LC_FAIL_NEXT;
+                return;
+            }
+            self->topstack->context ^= LC_FAIL_ON_RBRACE;
+        }
+        else if (data == *"{") {
+            self->topstack->context |= LC_FAIL_ON_LBRACE;
+        }
+        else if (data == *"}") {
+            self->topstack->context |= LC_FAIL_ON_RBRACE;
+        }
+    }
+
+    if (context & LC_HAS_TEXT) {
+        if (context & LC_FAIL_ON_TEXT) {
+            if (!Py_UNICODE_ISSPACE(data)) {
+                Tokenizer_fail_route(self);
+                return;
+            }
+        }
+        else {
+            if (data == *"\n") {
+                self->topstack->context |= LC_FAIL_ON_TEXT;
+            }
+        }
+    }
+    else if (!Py_UNICODE_ISSPACE(data)) {
+        self->topstack->context |= LC_HAS_TEXT;
+    }
+}
+
+/*
     Parse the wikicode string, using context for when to stop.
 */
 static PyObject*
 Tokenizer_parse(Tokenizer* self, int context)
 {
-    PyObject *this;
+    static int fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT;
+    static int unsafe_contexts = LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME;
+    int this_context, is_marker, i;
     Py_UNICODE this_data, next, next_next, last;
-    int this_context;
-    int fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT;
-    int is_marker, i;
+    PyObject *this;
 
     if (Tokenizer_push(self, context))
         return NULL;
@@ -1190,6 +1120,12 @@ Tokenizer_parse(Tokenizer* self, int context)
     while (1) {
         this = Tokenizer_read(self, 0);
         this_data = *PyUnicode_AS_UNICODE(this);
+        this_context = self->topstack->context;
+
+        if (this_context & unsafe_contexts) {
+            Tokenizer_verify_safe(self, this_context, this_data);
+            if (BAD_ROUTE) return NULL;
+        }
 
         is_marker = 0;
         for (i = 0; i < NUM_MARKERS; i++) {
@@ -1205,8 +1141,6 @@ Tokenizer_parse(Tokenizer* self, int context)
             continue;
         }
 
-        this_context = self->topstack->context;
-
         if (this_data == *"") {
             if (this_context & LC_TEMPLATE_PARAM_KEY) {
                 PyObject* trash = Tokenizer_pop(self);
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 468700c..2888f6b 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -98,7 +98,7 @@ static PyObject* TagCloseClose;
 #define LC_WIKILINK_TITLE       0x0020
 #define LC_WIKILINK_TEXT        0x0040
 
-#define LC_HEADING              0x1f80
+#define LC_HEADING              0x1F80
 #define LC_HEADING_LEVEL_1      0x0080
 #define LC_HEADING_LEVEL_2      0x0100
 #define LC_HEADING_LEVEL_3      0x0200
@@ -108,6 +108,11 @@ static PyObject* TagCloseClose;
 
 #define LC_COMMENT              0x2000
 
+#define LC_HAS_TEXT             0x010000
+#define LC_FAIL_ON_TEXT         0x020000
+#define LC_FAIL_NEXT            0x040000
+#define LC_FAIL_ON_LBRACE       0x080000
+#define LC_FAIL_ON_RBRACE       0x100000
 
 /* Global contexts: */
 
@@ -176,7 +181,6 @@ static PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t);
 static int Tokenizer_parse_template_or_argument(Tokenizer*);
 static int Tokenizer_parse_template(Tokenizer*);
 static int Tokenizer_parse_argument(Tokenizer*);
-static int Tokenizer_verify_safe(Tokenizer*, const char* []);
 static int Tokenizer_handle_template_param(Tokenizer*);
 static int Tokenizer_handle_template_param_value(Tokenizer*);
 static PyObject* Tokenizer_handle_template_end(Tokenizer*);
@@ -190,6 +194,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
 static int Tokenizer_really_parse_entity(Tokenizer*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
+static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
 static PyObject* Tokenizer_parse(Tokenizer*, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 

From 087d606097220e0b24909834458921cdf167ebd1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 1 Nov 2012 15:35:42 -0400
Subject: [PATCH 041/180] Recover failing contexts when due to a template or
 link opening.

---
 mwparserfromhell/parser/tokenizer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 82cffaf..c7219ab 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1124,7 +1124,8 @@ Tokenizer_parse(Tokenizer* self, int context)
 
         if (this_context & unsafe_contexts) {
             Tokenizer_verify_safe(self, this_context, this_data);
-            if (BAD_ROUTE) return NULL;
+            if (BAD_ROUTE)
+                return NULL;
         }
 
         is_marker = 0;
@@ -1165,6 +1166,8 @@ Tokenizer_parse(Tokenizer* self, int context)
         else if (this_data == next && next == *"{") {
             if (Tokenizer_parse_template_or_argument(self))
                 return NULL;
+            if (self->topstack->context & LC_FAIL_NEXT)
+                self->topstack->context ^= LC_FAIL_NEXT;
         }
         else if (this_data == *"|" && this_context & LC_TEMPLATE) {
             if (Tokenizer_handle_template_param(self))
@@ -1191,6 +1194,8 @@ Tokenizer_parse(Tokenizer* self, int context)
             if (!(this_context & LC_WIKILINK_TITLE)) {
                 if (Tokenizer_parse_wikilink(self))
                     return NULL;
+                if (self->topstack->context & LC_FAIL_NEXT)
+                    self->topstack->context ^= LC_FAIL_NEXT;
             }
             else {
                 Tokenizer_write_text(self, this_data);

From 16024494cf5134587001f7d7160cae8bad2966a4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 15:42:16 -0500
Subject: [PATCH 042/180] Mostly implement Tokenizer_really_parse_entity().

---
 mwparserfromhell/parser/tokenizer.c | 223 ++++++++++++++++++++++++++++++------
 mwparserfromhell/parser/tokenizer.h |  50 ++++----
 2 files changed, 211 insertions(+), 62 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index c7219ab..708cd8b 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -409,7 +409,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
     self->head += 2;
     unsigned int braces = 2, i;
 
-    while (*Tokenizer_READ(self, 0) == *"{") {
+    while (Tokenizer_READ(self, 0) == *"{") {
         self->head++;
         braces++;
     }
@@ -792,7 +792,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     self->head += 1;
     int best = 1, i;
 
-    while (*Tokenizer_READ(self, 0) == *"=") {
+    while (Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
@@ -889,7 +889,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     Py_ssize_t best = 1;
     int i;
 
-    while (*Tokenizer_READ(self, 0) == *"=") {
+    while (Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
@@ -949,7 +949,155 @@ Tokenizer_handle_heading_end(Tokenizer* self)
 static int
 Tokenizer_really_parse_entity(Tokenizer* self)
 {
-    return 0;
+    PyObject *token, *kwargs, *textobj;
+    Py_UNICODE this;
+    int numeric, hexadecimal, i, j, test;
+    char *valid, *def;
+    char text[];
+
+    token = PyObject_CallObject(HTMLEntityStart, NULL);
+    if (!token) return -1;
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
+
+    self->head++;
+
+    this = Tokenizer_READ(self, 0);
+    if (this == *"") {
+        Tokenizer_fail_route();
+        return 0;
+    }
+    if (this == *"#") {
+        numeric = 1;
+        token = PyObject_CallObject(HTMLEntityNumeric, NULL);
+        if (!token) return -1;
+        if (Tokenizer_write(self, token)) {
+            Py_DECREF(token);
+            return -1;
+        }
+        Py_DECREF(token);
+
+        self->head++;
+        this = Tokenizer_READ(self, 0);
+        if (this == *"") {
+            Tokenizer_fail_route();
+            return 0;
+        }
+        if (this == *"x" || this == *"X") {
+            hexadecimal = 1;
+            kwargs = PyDict_New();
+            if (!kwargs) return -1;
+            PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0));
+            PyObject* token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs);
+            Py_DECREF(kwargs);
+            if (!token) return -1;
+            if (Tokenizer_write(self, token)) {
+                Py_DECREF(token);
+                return -1;
+            }
+            Py_DECREF(token);
+            self->head++;
+        }
+        else {
+            hexadecimal = 0;
+        }
+    }
+    else {
+        numeric = hexadecimal = 0;
+    }
+
+    if (hexadecimal)
+        valid = "0123456789abcdefABCDEF";
+    else if (numeric)
+        valid = "0123456789";
+    else
+        valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+    text = PyObject_Malloc(8 * sizeof(char));  // Max theoretical size
+    if (!text) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    #define FAIL_ROUTE_AND_EXIT() { \
+        Tokenizer_fail_route();     \
+        PyObject_Free(text);        \
+        return 0;                   \
+    }
+
+    i = 0;
+    while (1) {
+        this = Tokenizer_READ(self, 0);
+        if (this == *";") {
+            if (i == 0)
+                FAIL_ROUTE_AND_EXIT()
+            break;
+        }
+        if (i == 0 && this == *"0") {
+            self->head++;
+            continue;
+        }
+        if (i >= 8)
+            FAIL_ROUTE_AND_EXIT()
+        for (j = 0; j < NUM_MARKERS; j++) {
+            if (this == *MARKERS[j])
+                FAIL_ROUTE_AND_EXIT()
+        }
+        text[i] = this;
+        self->head++;
+        i++;
+    }
+
+    if (numeric) {
+        sscanf(text, (hexadecimal ? "%x" : "%d"), &test);
+        if (test < 1 || test > 0x10FFFF)
+            FAIL_ROUTE_AND_EXIT()
+    }
+    else {
+        i = 0;
+        while (1) {
+            def = entitydefs[i];
+            if (!def)  // We've reached the end of the def list without finding it
+                FAIL_ROUTE_AND_EXIT()
+            if (strcmp(text, def) == 0)
+                break;
+            i++;
+        }
+    }
+
+    textobj = PyUnicode_FromString(text);
+    if (!textobj) {
+        PyObject_Free(text);
+        return -1;
+    }
+    PyObject_Free(text);
+
+    kwargs = PyDict_New();
+    if (!kwargs) {
+        Py_DECREF(textobj);
+        return -1;
+    }
+    PyDict_SetItemString(kwargs, "text", textobj);
+    Py_DECREF(textobj);
+    PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
+    Py_DECREF(kwargs);
+    if (!token) return -1;
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
+
+    token = PyObject_CallObject(HTMLEntityEnd, NULL);
+    if (!token) return -1;
+    if (Tokenizer_write(self, token)) {
+        Py_DECREF(token);
+        return -1;
+    }
+    Py_DECREF(token);
 }
 
 /*
@@ -968,7 +1116,7 @@ Tokenizer_parse_entity(Tokenizer* self)
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
-        if (Tokenizer_write_text(self, *PyUnicode_AS_UNICODE(Tokenizer_read(self, 0))))
+        if (Tokenizer_write_text(self, *"&"))
             return -1;
         return 0;
     }
@@ -1111,38 +1259,37 @@ Tokenizer_parse(Tokenizer* self, int context)
     static int fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT;
     static int unsafe_contexts = LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME;
     int this_context, is_marker, i;
-    Py_UNICODE this_data, next, next_next, last;
+    Py_UNICODE this, next, next_next, last;
     PyObject *this;
 
     if (Tokenizer_push(self, context))
         return NULL;
 
     while (1) {
-        this = Tokenizer_read(self, 0);
-        this_data = *PyUnicode_AS_UNICODE(this);
+        this = Tokenizer_READ(self, 0);
         this_context = self->topstack->context;
 
         if (this_context & unsafe_contexts) {
-            Tokenizer_verify_safe(self, this_context, this_data);
+            Tokenizer_verify_safe(self, this_context, this);
             if (BAD_ROUTE)
                 return NULL;
         }
 
         is_marker = 0;
         for (i = 0; i < NUM_MARKERS; i++) {
-            if (*MARKERS[i] == this_data) {
+            if (*MARKERS[i] == this) {
                 is_marker = 1;
                 break;
             }
         }
 
         if (!is_marker) {
-            Tokenizer_write_text(self, this_data);
+            Tokenizer_write_text(self, this);
             self->head++;
             continue;
         }
 
-        if (this_data == *"") {
+        if (this == *"") {
             if (this_context & LC_TEMPLATE_PARAM_KEY) {
                 PyObject* trash = Tokenizer_pop(self);
                 Py_XDECREF(trash);
@@ -1153,44 +1300,44 @@ Tokenizer_parse(Tokenizer* self, int context)
             return Tokenizer_pop(self);
         }
 
-        next = *Tokenizer_READ(self, 1);
+        next = Tokenizer_READ(self, 1);
 
         if (this_context & LC_COMMENT) {
-            if (this_data == next && next == *"-") {
-                if (*Tokenizer_READ(self, 2) == *">") {
+            if (this == next && next == *"-") {
+                if (Tokenizer_READ(self, 2) == *">") {
                     return Tokenizer_pop(self);
                 }
             }
-            Tokenizer_write_text(self, this_data);
+            Tokenizer_write_text(self, this);
         }
-        else if (this_data == next && next == *"{") {
+        else if (this == next && next == *"{") {
             if (Tokenizer_parse_template_or_argument(self))
                 return NULL;
             if (self->topstack->context & LC_FAIL_NEXT)
                 self->topstack->context ^= LC_FAIL_NEXT;
         }
-        else if (this_data == *"|" && this_context & LC_TEMPLATE) {
+        else if (this == *"|" && this_context & LC_TEMPLATE) {
             if (Tokenizer_handle_template_param(self))
                 return NULL;
         }
-        else if (this_data == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) {
+        else if (this == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) {
             if (Tokenizer_handle_template_param_value(self))
                 return NULL;
         }
-        else if (this_data == next && next == *"}" && this_context & LC_TEMPLATE) {
+        else if (this == next && next == *"}" && this_context & LC_TEMPLATE) {
             return Tokenizer_handle_template_end(self);
         }
-        else if (this_data == *"|" && this_context & LC_ARGUMENT_NAME) {
+        else if (this == *"|" && this_context & LC_ARGUMENT_NAME) {
             if (Tokenizer_handle_argument_separator(self))
                 return NULL;
         }
-        else if (this_data == next && next == *"}" && this_context & LC_ARGUMENT) {
-            if (*Tokenizer_READ(self, 2) == *"}") {
+        else if (this == next && next == *"}" && this_context & LC_ARGUMENT) {
+            if (Tokenizer_READ(self, 2) == *"}") {
                 return Tokenizer_handle_argument_end(self);
             }
-            Tokenizer_write_text(self, this_data);
+            Tokenizer_write_text(self, this);
         }
-        else if (this_data == next && next == *"[") {
+        else if (this == next && next == *"[") {
             if (!(this_context & LC_WIKILINK_TITLE)) {
                 if (Tokenizer_parse_wikilink(self))
                     return NULL;
@@ -1198,48 +1345,48 @@ Tokenizer_parse(Tokenizer* self, int context)
                     self->topstack->context ^= LC_FAIL_NEXT;
             }
             else {
-                Tokenizer_write_text(self, this_data);
+                Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == *"|" && this_context & LC_WIKILINK_TITLE) {
+        else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
             if (Tokenizer_handle_wikilink_separator(self))
                 return NULL;
         }
-        else if (this_data == next && next == *"]" && this_context & LC_WIKILINK) {
+        else if (this == next && next == *"]" && this_context & LC_WIKILINK) {
             return Tokenizer_handle_wikilink_end(self);
         }
-        else if (this_data == *"=" && !(self->global & GL_HEADING)) {
+        else if (this == *"=" && !(self->global & GL_HEADING)) {
             last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
             if (last == *"\n" || last == *"") {
                 if (Tokenizer_parse_heading(self))
                     return NULL;
             }
             else {
-                Tokenizer_write_text(self, this_data);
+                Tokenizer_write_text(self, this);
             }
         }
-        else if (this_data == *"=" && this_context & LC_HEADING) {
+        else if (this == *"=" && this_context & LC_HEADING) {
             return (PyObject*) Tokenizer_handle_heading_end(self);
         }
-        else if (this_data == *"\n" && this_context & LC_HEADING) {
+        else if (this == *"\n" && this_context & LC_HEADING) {
             return Tokenizer_fail_route(self);
         }
-        else if (this_data == *"&") {
+        else if (this == *"&") {
             if (Tokenizer_parse_entity(self))
                 return NULL;
         }
-        else if (this_data == *"<" && next == *"!") {
-            next_next = *Tokenizer_READ(self, 2);
-            if (next_next == *Tokenizer_READ(self, 3) && next_next == *"-") {
+        else if (this == *"<" && next == *"!") {
+            next_next = Tokenizer_READ(self, 2);
+            if (next_next == Tokenizer_READ(self, 3) && next_next == *"-") {
                 if (Tokenizer_parse_comment(self))
                     return NULL;
             }
             else {
-                Tokenizer_write_text(self, this_data);
+                Tokenizer_write_text(self, this);
             }
         }
         else {
-            Tokenizer_write_text(self, this_data);
+            Tokenizer_write_text(self, this);
         }
 
         self->head++;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 2888f6b..9e94dbc 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -41,6 +41,8 @@ static int route_state = 0;
 #define FAIL_ROUTE()  (route_state = 1)
 #define RESET_ROUTE() (route_state = 0)
 
+static char* entitydefs[];
+
 static PyObject* EMPTY;
 static PyObject* NOARGS;
 static PyObject* tokens;
@@ -85,34 +87,34 @@ static PyObject* TagCloseClose;
 
 /* Local contexts: */
 
-#define LC_TEMPLATE             0x0007
-#define LC_TEMPLATE_NAME        0x0001
-#define LC_TEMPLATE_PARAM_KEY   0x0002
-#define LC_TEMPLATE_PARAM_VALUE 0x0004
+#define LC_TEMPLATE             0x00007
+#define LC_TEMPLATE_NAME        0x00001
+#define LC_TEMPLATE_PARAM_KEY   0x00002
+#define LC_TEMPLATE_PARAM_VALUE 0x00004
 
-#define LC_ARGUMENT             0x0018
-#define LC_ARGUMENT_NAME        0x0008
-#define LC_ARGUMENT_DEFAULT     0x0010
+#define LC_ARGUMENT             0x00018
+#define LC_ARGUMENT_NAME        0x00008
+#define LC_ARGUMENT_DEFAULT     0x00010
 
-#define LC_WIKILINK             0x0060
-#define LC_WIKILINK_TITLE       0x0020
-#define LC_WIKILINK_TEXT        0x0040
+#define LC_WIKILINK             0x00060
+#define LC_WIKILINK_TITLE       0x00020
+#define LC_WIKILINK_TEXT        0x00040
 
-#define LC_HEADING              0x1F80
-#define LC_HEADING_LEVEL_1      0x0080
-#define LC_HEADING_LEVEL_2      0x0100
-#define LC_HEADING_LEVEL_3      0x0200
-#define LC_HEADING_LEVEL_4      0x0400
-#define LC_HEADING_LEVEL_5      0x0800
-#define LC_HEADING_LEVEL_6      0x1000
+#define LC_HEADING              0x01F80
+#define LC_HEADING_LEVEL_1      0x00080
+#define LC_HEADING_LEVEL_2      0x00100
+#define LC_HEADING_LEVEL_3      0x00200
+#define LC_HEADING_LEVEL_4      0x00400
+#define LC_HEADING_LEVEL_5      0x00800
+#define LC_HEADING_LEVEL_6      0x01000
 
-#define LC_COMMENT              0x2000
+#define LC_COMMENT              0x02000
 
-#define LC_HAS_TEXT             0x010000
-#define LC_FAIL_ON_TEXT         0x020000
-#define LC_FAIL_NEXT            0x040000
-#define LC_FAIL_ON_LBRACE       0x080000
-#define LC_FAIL_ON_RBRACE       0x100000
+#define LC_HAS_TEXT             0x04000
+#define LC_FAIL_ON_TEXT         0x08000
+#define LC_FAIL_NEXT            0x10000
+#define LC_FAIL_ON_LBRACE       0x20000
+#define LC_FAIL_ON_RBRACE       0x40000
 
 /* Global contexts: */
 
@@ -154,7 +156,7 @@ typedef struct {
 
 /* Macros for accessing Tokenizer data: */
 
-#define Tokenizer_READ(self, delta) PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))
+#define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta)))
 
 
 /* Function prototypes: */

From 06b20dd8c0e947c8b48dbb59b62bd72afc1a81d2 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 16:15:23 -0500
Subject: [PATCH 043/180] More updates.

---
 mwparserfromhell/parser/tokenizer.c | 82 ++++++++++++++++++++++---------------
 mwparserfromhell/parser/tokenizer.h |  6 ++-
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 708cd8b..0935770 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -33,15 +33,15 @@ Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
 static struct Textbuffer*
 Textbuffer_new(void)
 {
-    struct Textbuffer* buffer = PyObject_Malloc(sizeof(struct Textbuffer));
+    struct Textbuffer* buffer = malloc(sizeof(struct Textbuffer));
     if (!buffer) {
         PyErr_NoMemory();
         return NULL;
     }
     buffer->size = 0;
-    buffer->data = PyObject_Malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE);
+    buffer->data = malloc(sizeof(Py_UNICODE) * TEXTBUFFER_BLOCKSIZE);
     if (!buffer->data) {
-        PyObject_Free(buffer);
+        free(buffer);
         PyErr_NoMemory();
         return NULL;
     }
@@ -58,7 +58,7 @@ Tokenizer_dealloc(Tokenizer* self)
         Py_DECREF(this->stack);
         Textbuffer_dealloc(this->textbuffer);
         next = this->next;
-        PyObject_Free(this);
+        free(this);
         this = next;
     }
     self->ob_type->tp_free((PyObject*) self);
@@ -69,9 +69,9 @@ Textbuffer_dealloc(struct Textbuffer* this)
 {
     struct Textbuffer* next;
     while (this) {
-        PyObject_Free(this->data);
+        free(this->data);
         next = this->next;
-        PyObject_Free(this);
+        free(this);
         this = next;
     }
 }
@@ -98,7 +98,7 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 static int
 Tokenizer_push(Tokenizer* self, int context)
 {
-    struct Stack* top = PyObject_Malloc(sizeof(struct Stack));
+    struct Stack* top = malloc(sizeof(struct Stack));
     if (!top) {
         PyErr_NoMemory();
         return -1;
@@ -180,7 +180,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
     Py_DECREF(top->stack);
     Textbuffer_dealloc(top->textbuffer);
     self->topstack = top->next;
-    PyObject_Free(top);
+    free(top);
 }
 
 /*
@@ -815,7 +815,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     PyObject* level = PyInt_FromSsize_t(heading->level);
     if (!level) {
         Py_DECREF(heading->title);
-        PyObject_Free(heading);
+        free(heading);
         return -1;
     }
 
@@ -823,7 +823,7 @@ Tokenizer_parse_heading(Tokenizer* self)
     if (!kwargs) {
         Py_DECREF(level);
         Py_DECREF(heading->title);
-        PyObject_Free(heading);
+        free(heading);
         return -1;
     }
     PyDict_SetItemString(kwargs, "level", level);
@@ -833,14 +833,14 @@ Tokenizer_parse_heading(Tokenizer* self)
     Py_DECREF(kwargs);
     if (!token) {
         Py_DECREF(heading->title);
-        PyObject_Free(heading);
+        free(heading);
         return -1;
     }
 
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         Py_DECREF(heading->title);
-        PyObject_Free(heading);
+        free(heading);
         return -1;
     }
     Py_DECREF(token);
@@ -852,18 +852,18 @@ Tokenizer_parse_heading(Tokenizer* self)
         difftext[diff] = *"";
         if (Tokenizer_write_text_then_stack(self, difftext)) {
             Py_DECREF(heading->title);
-            PyObject_Free(heading);
+            free(heading);
             return -1;
         }
     }
 
     if (Tokenizer_write_all(self, heading->title)) {
         Py_DECREF(heading->title);
-        PyObject_Free(heading);
+        free(heading);
         return -1;
     }
     Py_DECREF(heading->title);
-    PyObject_Free(heading);
+    free(heading);
 
     token = PyObject_CallObject(HeadingEnd, NULL);
     if (!token) return -1;
@@ -917,23 +917,23 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         text[best] = *"";
         if (Tokenizer_write_text_then_stack(self, text)) {
             Py_DECREF(after->title);
-            PyObject_Free(after);
+            free(after);
             return NULL;
         }
         if (Tokenizer_write_all(self, after->title)) {
             Py_DECREF(after->title);
-            PyObject_Free(after);
+            free(after);
             return NULL;
         }
         Py_DECREF(after->title);
         level = after->level;
-        PyObject_Free(after);
+        free(after);
     }
 
     PyObject* stack = Tokenizer_pop(self);
     if (!stack) return NULL;
 
-    HeadingData* heading = PyObject_Malloc(sizeof(HeadingData));
+    HeadingData* heading = malloc(sizeof(HeadingData));
     if (!heading) {
         PyErr_NoMemory();
         return NULL;
@@ -952,8 +952,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     PyObject *token, *kwargs, *textobj;
     Py_UNICODE this;
     int numeric, hexadecimal, i, j, test;
-    char *valid, *def;
-    char text[];
+    char *valid, *text, *def;
 
     token = PyObject_CallObject(HTMLEntityStart, NULL);
     if (!token) return -1;
@@ -967,7 +966,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
 
     this = Tokenizer_READ(self, 0);
     if (this == *"") {
-        Tokenizer_fail_route();
+        Tokenizer_fail_route(self);
         return 0;
     }
     if (this == *"#") {
@@ -983,7 +982,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         self->head++;
         this = Tokenizer_READ(self, 0);
         if (this == *"") {
-            Tokenizer_fail_route();
+            Tokenizer_fail_route(self);
             return 0;
         }
         if (this == *"x" || this == *"X") {
@@ -1016,15 +1015,15 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     else
         valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
 
-    text = PyObject_Malloc(8 * sizeof(char));  // Max theoretical size
+    text = malloc(MAX_ENTITY_SIZE * sizeof(char));
     if (!text) {
         PyErr_NoMemory();
         return -1;
     }
 
     #define FAIL_ROUTE_AND_EXIT() { \
-        Tokenizer_fail_route();     \
-        PyObject_Free(text);        \
+        Tokenizer_fail_route(self); \
+        free(text);                 \
         return 0;                   \
     }
 
@@ -1070,10 +1069,10 @@ Tokenizer_really_parse_entity(Tokenizer* self)
 
     textobj = PyUnicode_FromString(text);
     if (!textobj) {
-        PyObject_Free(text);
+        free(text);
         return -1;
     }
-    PyObject_Free(text);
+    free(text);
 
     kwargs = PyDict_New();
     if (!kwargs) {
@@ -1082,7 +1081,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     }
     PyDict_SetItemString(kwargs, "text", textobj);
     Py_DECREF(textobj);
-    PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
+    token = PyObject_Call(Text, NOARGS, kwargs);
     Py_DECREF(kwargs);
     if (!token) return -1;
     if (Tokenizer_write(self, token)) {
@@ -1098,6 +1097,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         return -1;
     }
     Py_DECREF(token);
+    return 0;
 }
 
 /*
@@ -1260,7 +1260,6 @@ Tokenizer_parse(Tokenizer* self, int context)
     static int unsafe_contexts = LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME;
     int this_context, is_marker, i;
     Py_UNICODE this, next, next_next, last;
-    PyObject *this;
 
     if (Tokenizer_push(self, context))
         return NULL;
@@ -1445,6 +1444,25 @@ init_tokenizer(void)
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
+    PyObject* htmlentitydefs = PyImport_ImportModule("htmlentitydefs");
+    if (!htmlentitydefs) return;
+
+    PyObject* defmap = PyObject_GetAttrString(htmlentitydefs, "entitydefs");
+    if (!defmap) return;
+    Py_DECREF(htmlentitydefs);
+
+    unsigned numdefs = (unsigned) PyDict_Size(defmap);
+    entitydefs = malloc(numdefs * sizeof(char));
+    PyObject* deflist = PyDict_Keys(defmap);
+    if (!deflist) return;
+    Py_DECREF(defmap);
+
+    unsigned i;
+    for (i = 0; i < numdefs; i++) {
+        entitydefs[i] = PyString_AsString(PyList_GET_ITEM(deflist, i));
+    }
+    Py_DECREF(deflist);
+
     EMPTY = PyUnicode_FromString("");
     NOARGS = PyTuple_New(0);
 
@@ -1462,9 +1480,7 @@ init_tokenizer(void)
 
     PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
     Py_DECREF(fromlist);
-    if (!tokmodule) {
-        return;
-    }
+    if (!tokmodule) return;
 
     tokens = PyObject_GetAttrString(tokmodule, "tokens");
     Py_DECREF(tokmodule);
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 9e94dbc..67c39cd 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -29,19 +29,23 @@ SOFTWARE.
 #include <math.h>
 #include <structmember.h>
 
+#define malloc PyObject_Malloc
+#define free   PyObject_Free
+
 static const char* MARKERS[] = {
     "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
     "!", "\n", ""};
 
 #define NUM_MARKERS 18
 #define TEXTBUFFER_BLOCKSIZE 1024
+#define MAX_ENTITY_SIZE 8
 
 static int route_state = 0;
 #define BAD_ROUTE     (route_state)
 #define FAIL_ROUTE()  (route_state = 1)
 #define RESET_ROUTE() (route_state = 0)
 
-static char* entitydefs[];
+static char** entitydefs;
 
 static PyObject* EMPTY;
 static PyObject* NOARGS;

From 19e7c3b6acd5ca2ab6b40413bab5f6779ab2e8d9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 16:47:09 -0500
Subject: [PATCH 044/180] Fix type.

---
 mwparserfromhell/parser/tokenizer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 0935770..18972aa 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1452,7 +1452,7 @@ init_tokenizer(void)
     Py_DECREF(htmlentitydefs);
 
     unsigned numdefs = (unsigned) PyDict_Size(defmap);
-    entitydefs = malloc(numdefs * sizeof(char));
+    entitydefs = malloc(numdefs * sizeof(char*));
     PyObject* deflist = PyDict_Keys(defmap);
     if (!deflist) return;
     Py_DECREF(defmap);

From a1e6dfcef67a056ccb930e111a025db46093c398 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 16:52:01 -0500
Subject: [PATCH 045/180] Ensure that there is null terminator at the end of
 entitydefs.

---
 mwparserfromhell/parser/tokenizer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 18972aa..a8c69e7 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1452,7 +1452,7 @@ init_tokenizer(void)
     Py_DECREF(htmlentitydefs);
 
     unsigned numdefs = (unsigned) PyDict_Size(defmap);
-    entitydefs = malloc(numdefs * sizeof(char*));
+    entitydefs = calloc(numdefs + 1, sizeof(char*));
     PyObject* deflist = PyDict_Keys(defmap);
     if (!deflist) return;
     Py_DECREF(defmap);

From 0e78571672ea99727a854985e63e4ba50ee1f6f8 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 16:57:02 -0500
Subject: [PATCH 046/180] Forgot to check numeric/hexadecimal entities for
 string chars.

---
 mwparserfromhell/parser/tokenizer.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index a8c69e7..a3e2a95 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1045,6 +1045,14 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             if (this == *MARKERS[j])
                 FAIL_ROUTE_AND_EXIT()
         }
+        j = 0;
+        while (1) {
+            if (!valid[j])
+                FAIL_ROUTE_AND_EXIT()
+            if (this == valid[j])
+                break;
+            j++;
+        }
         text[i] = this;
         self->head++;
         i++;

From 06f02b9753c5e614cb4f74218163ea1c5f4fc398 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 17:12:02 -0500
Subject: [PATCH 047/180] Clean up whitespace / newlines.

---
 mwparserfromhell/parser/tokenizer.c | 231 +++++++++++++++++-------------------
 1 file changed, 108 insertions(+), 123 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index a3e2a95..40f91a1 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -106,9 +106,8 @@ Tokenizer_push(Tokenizer* self, int context)
     top->stack = PyList_New(0);
     top->context = context;
     top->textbuffer = Textbuffer_new();
-    if (!top->textbuffer) {
+    if (!top->textbuffer)
         return -1;
-    }
     top->next = self->topstack;
     self->topstack = top;
     return 0;
@@ -140,11 +139,11 @@ static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
     struct Textbuffer* buffer = self->topstack->textbuffer;
-    if (buffer->size == 0 && !buffer->next) {
+    if (buffer->size == 0 && !buffer->next)
         return 0;
-    }
     PyObject* text = Textbuffer_render(buffer);
-    if (!text) return -1;
+    if (!text)
+        return -1;
 
     PyObject* kwargs = PyDict_New();
     if (!kwargs) {
@@ -156,20 +155,19 @@ Tokenizer_push_textbuffer(Tokenizer* self)
 
     PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
     Py_DECREF(kwargs);
-    if (!token) return -1;
+    if (!token)
+        return -1;
 
     if (PyList_Append(self->topstack->stack, token)) {
         Py_DECREF(token);
         return -1;
     }
-
     Py_DECREF(token);
 
     Textbuffer_dealloc(buffer);
     self->topstack->textbuffer = Textbuffer_new();
-    if (!self->topstack->textbuffer) {
+    if (!self->topstack->textbuffer)
         return -1;
-    }
     return 0;
 }
 
@@ -239,10 +237,8 @@ Tokenizer_write(Tokenizer* self, PyObject* token)
 {
     if (Tokenizer_push_textbuffer(self))
         return -1;
-
     if (PyList_Append(self->topstack->stack, token))
         return -1;
-
     return 0;
 }
 
@@ -254,10 +250,8 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
 {
     if (Tokenizer_push_textbuffer(self))
         return -1;
-
     if (PyList_Insert(self->topstack->stack, 0, token))
         return -1;
-
     return 0;
 }
 
@@ -270,9 +264,8 @@ Tokenizer_write_text(Tokenizer* self, Py_UNICODE text)
     struct Textbuffer* buf = self->topstack->textbuffer;
     if (buf->size == TEXTBUFFER_BLOCKSIZE) {
         struct Textbuffer* new = Textbuffer_new();
-        if (!new) {
+        if (!new)
             return -1;
-        }
         new->next = buf;
         self->topstack->textbuffer = new;
         buf = new;
@@ -297,18 +290,20 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
             case 1: {
                 pushed = 1;
                 struct Textbuffer* buffer = self->topstack->textbuffer;
-                if (buffer->size == 0 && !buffer->next) {
+                if (buffer->size == 0 && !buffer->next)
                     break;
-                }
                 PyObject* left = Textbuffer_render(buffer);
-                if (!left) return -1;
+                if (!left)
+                    return -1;
                 PyObject* right = PyObject_GetAttrString(token, "text");
-                if (!right) return -1;
+                if (!right)
+                    return -1;
 
                 PyObject* text = PyUnicode_Concat(left, right);
                 Py_DECREF(left);
                 Py_DECREF(right);
-                if (!text) return -1;
+                if (!text)
+                    return -1;
 
                 if (PyObject_SetAttrString(token, "text", text)) {
                     Py_DECREF(text);
@@ -318,9 +313,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 
                 Textbuffer_dealloc(buffer);
                 self->topstack->textbuffer = Textbuffer_new();
-                if (!self->topstack->textbuffer) {
+                if (!self->topstack->textbuffer)
                     return -1;
-                }
                 break;
             }
             case -1:
@@ -334,10 +328,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 
     PyObject* stack = self->topstack->stack;
     Py_ssize_t size = PyList_GET_SIZE(stack);
-
     if (PyList_SetSlice(stack, size, size, tokenlist))
         return -1;
-
     return 0;
 }
 
@@ -351,7 +343,8 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text)
     PyObject* stack = Tokenizer_pop(self);
     int i = 0;
     while (1) {
-        if (!text[i]) break;
+        if (!text[i])
+            break;
         if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) {
             Py_XDECREF(stack);
             return -1;
@@ -380,10 +373,8 @@ static PyObject*
 Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 {
     Py_ssize_t index = self->head + delta;
-
     if (index >= self->length)
         return EMPTY;
-
     return PyList_GET_ITEM(self->text, index);
 }
 
@@ -395,7 +386,6 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
 {
     if (delta > self->head)
         return EMPTY;
-
     Py_ssize_t index = self->head - delta;
     return PyList_GET_ITEM(self->text, index);
 }
@@ -457,21 +447,19 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
                 Py_XDECREF(text);
                 return 0;
             }
-            else {
+            else
                 braces -= 2;
-            }
         }
-        else {
+        else
             braces -= 3;
-        }
 
-        if (braces) {
+        if (braces)
             self->head++;
-        }
     }
 
     PyObject* tokenlist = Tokenizer_pop(self);
-    if (!tokenlist) return -1;
+    if (!tokenlist)
+        return -1;
     if (Tokenizer_write_all(self, tokenlist)) {
         Py_DECREF(tokenlist);
         return -1;
@@ -495,7 +483,8 @@ Tokenizer_parse_template(Tokenizer* self)
         self->head = reset;
         return 0;
     }
-    if (!template) return -1;
+    if (!template)
+        return -1;
 
     token = PyObject_CallObject(TemplateOpen, NULL);
     if (!token) {
@@ -517,14 +506,14 @@ Tokenizer_parse_template(Tokenizer* self)
     Py_DECREF(template);
 
     token = PyObject_CallObject(TemplateClose, NULL);
-    if (!token) return -1;
+    if (!token)
+        return -1;
 
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
     }
     Py_DECREF(token);
-
     return 0;
 }
 
@@ -542,7 +531,8 @@ Tokenizer_parse_argument(Tokenizer* self)
         self->head = reset;
         return 0;
     }
-    if (!argument) return -1;
+    if (!argument)
+        return -1;
 
     token = PyObject_CallObject(ArgumentOpen, NULL);
     if (!token) {
@@ -564,14 +554,14 @@ Tokenizer_parse_argument(Tokenizer* self)
     Py_DECREF(argument);
 
     token = PyObject_CallObject(ArgumentClose, NULL);
-    if (!token) return -1;
+    if (!token)
+        return -1;
 
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
     }
     Py_DECREF(token);
-
     return 0;
 }
 
@@ -581,28 +571,27 @@ Tokenizer_parse_argument(Tokenizer* self)
 static int
 Tokenizer_handle_template_param(Tokenizer* self)
 {
-    if (self->topstack->context & LC_TEMPLATE_NAME) {
+    if (self->topstack->context & LC_TEMPLATE_NAME)
         self->topstack->context ^= LC_TEMPLATE_NAME;
-    }
-    else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) {
+    else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE)
         self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE;
-    }
 
     if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
         PyObject* stack = Tokenizer_pop_keeping_context(self);
-        if (!stack) return -1;
+        if (!stack)
+            return -1;
         if (Tokenizer_write_all(self, stack)) {
             Py_DECREF(stack);
             return -1;
         }
         Py_DECREF(stack);
     }
-    else {
+    else
         self->topstack->context |= LC_TEMPLATE_PARAM_KEY;
-    }
 
     PyObject* token = PyObject_CallObject(TemplateParamSeparator, NULL);
-    if (!token) return -1;
+    if (!token)
+        return -1;
 
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
@@ -622,7 +611,8 @@ static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
     PyObject* stack = Tokenizer_pop_keeping_context(self);
-    if (!stack) return -1;
+    if (!stack)
+        return -1;
     if (Tokenizer_write_all(self, stack)) {
         Py_DECREF(stack);
         return -1;
@@ -633,8 +623,8 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
     self->topstack->context |= LC_TEMPLATE_PARAM_VALUE;
 
     PyObject* token = PyObject_CallObject(TemplateParamEquals, NULL);
-    if (!token) return -1;
-
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -652,14 +642,14 @@ Tokenizer_handle_template_end(Tokenizer* self)
     PyObject* stack;
     if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
         stack = Tokenizer_pop_keeping_context(self);
-        if (!stack) return NULL;
+        if (!stack)
+            return NULL;
         if (Tokenizer_write_all(self, stack)) {
             Py_DECREF(stack);
             return NULL;
         }
         Py_DECREF(stack);
     }
-
     self->head++;
     stack = Tokenizer_pop(self);
     return stack;
@@ -675,8 +665,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
     self->topstack->context |= LC_ARGUMENT_DEFAULT;
 
     PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL);
-    if (!token) return -1;
-
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -702,11 +692,12 @@ Tokenizer_handle_argument_end(Tokenizer* self)
 static int
 Tokenizer_parse_wikilink(Tokenizer* self)
 {
-    self->head += 2;
-    Py_ssize_t reset = self->head - 1;
+    Py_ssize_t reset;
+    PyObject *token, *wikilink;
 
-    PyObject *token;
-    PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+    self->head += 2;
+    reset = self->head - 1;
+    wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
 
     if (BAD_ROUTE) {
         RESET_ROUTE();
@@ -718,7 +709,8 @@ Tokenizer_parse_wikilink(Tokenizer* self)
         }
         return 0;
     }
-    if (!wikilink) return -1;
+    if (!wikilink)
+        return -1;
 
     token = PyObject_CallObject(WikilinkOpen, NULL);
     if (!token) {
@@ -740,8 +732,8 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     Py_DECREF(wikilink);
 
     token = PyObject_CallObject(WikilinkClose, NULL);
-    if (!token) return -1;
-
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -760,8 +752,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
     self->topstack->context |= LC_WIKILINK_TEXT;
 
     PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL);
-    if (!token) return -1;
-
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -866,14 +858,13 @@ Tokenizer_parse_heading(Tokenizer* self)
     free(heading);
 
     token = PyObject_CallObject(HeadingEnd, NULL);
-    if (!token) return -1;
-
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
     }
     Py_DECREF(token);
-
     self->global ^= GL_HEADING;
     return 0;
 }
@@ -931,7 +922,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
     }
 
     PyObject* stack = Tokenizer_pop(self);
-    if (!stack) return NULL;
+    if (!stack)
+        return NULL;
 
     HeadingData* heading = malloc(sizeof(HeadingData));
     if (!heading) {
@@ -955,7 +947,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     char *valid, *text, *def;
 
     token = PyObject_CallObject(HTMLEntityStart, NULL);
-    if (!token) return -1;
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -972,7 +965,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     if (this == *"#") {
         numeric = 1;
         token = PyObject_CallObject(HTMLEntityNumeric, NULL);
-        if (!token) return -1;
+        if (!token)
+            return -1;
         if (Tokenizer_write(self, token)) {
             Py_DECREF(token);
             return -1;
@@ -988,11 +982,13 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         if (this == *"x" || this == *"X") {
             hexadecimal = 1;
             kwargs = PyDict_New();
-            if (!kwargs) return -1;
+            if (!kwargs)
+                return -1;
             PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0));
             PyObject* token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs);
             Py_DECREF(kwargs);
-            if (!token) return -1;
+            if (!token)
+                return -1;
             if (Tokenizer_write(self, token)) {
                 Py_DECREF(token);
                 return -1;
@@ -1000,13 +996,11 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             Py_DECREF(token);
             self->head++;
         }
-        else {
+        else
             hexadecimal = 0;
-        }
     }
-    else {
+    else
         numeric = hexadecimal = 0;
-    }
 
     if (hexadecimal)
         valid = "0123456789abcdefABCDEF";
@@ -1091,7 +1085,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     Py_DECREF(textobj);
     token = PyObject_Call(Text, NOARGS, kwargs);
     Py_DECREF(kwargs);
-    if (!token) return -1;
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -1099,7 +1094,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     Py_DECREF(token);
 
     token = PyObject_CallObject(HTMLEntityEnd, NULL);
-    if (!token) return -1;
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -1117,9 +1113,8 @@ Tokenizer_parse_entity(Tokenizer* self)
     Py_ssize_t reset = self->head;
     if (Tokenizer_push(self, 0))
         return -1;
-
     if (Tokenizer_really_parse_entity(self))
-            return -1;
+        return -1;
 
     if (BAD_ROUTE) {
         RESET_ROUTE();
@@ -1130,12 +1125,12 @@ Tokenizer_parse_entity(Tokenizer* self)
     }
 
     PyObject* tokenlist = Tokenizer_pop(self);
-    if (!tokenlist) return -1;
+    if (!tokenlist)
+        return -1;
     if (Tokenizer_write_all(self, tokenlist)) {
         Py_DECREF(tokenlist);
         return -1;
     }
-
     Py_DECREF(tokenlist);
     return 0;
 }
@@ -1158,7 +1153,8 @@ Tokenizer_parse_comment(Tokenizer* self)
         const char* text = "<!--";
         int i = 0;
         while (1) {
-            if (!text[i]) return 0;
+            if (!text[i])
+                return 0;
             if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) {
                 Py_XDECREF(text);
                 return -1;
@@ -1167,7 +1163,8 @@ Tokenizer_parse_comment(Tokenizer* self)
         }
         return 0;
     }
-    if (!comment) return -1;
+    if (!comment)
+        return -1;
 
     token = PyObject_CallObject(CommentStart, NULL);
     if (!token) {
@@ -1181,7 +1178,6 @@ Tokenizer_parse_comment(Tokenizer* self)
         return -1;
     }
     Py_DECREF(token);
-
     if (Tokenizer_write_all(self, comment)) {
         Py_DECREF(comment);
         return -1;
@@ -1189,8 +1185,8 @@ Tokenizer_parse_comment(Tokenizer* self)
     Py_DECREF(comment);
 
     token = PyObject_CallObject(CommentEnd, NULL);
-    if (!token) return -1;
-
+    if (!token)
+        return -1;
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -1232,12 +1228,10 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
             }
             self->topstack->context ^= LC_FAIL_ON_RBRACE;
         }
-        else if (data == *"{") {
+        else if (data == *"{")
             self->topstack->context |= LC_FAIL_ON_LBRACE;
-        }
-        else if (data == *"}") {
+        else if (data == *"}")
             self->topstack->context |= LC_FAIL_ON_RBRACE;
-        }
     }
 
     if (context & LC_HAS_TEXT) {
@@ -1248,14 +1242,12 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
             }
         }
         else {
-            if (data == *"\n") {
+            if (data == *"\n")
                 self->topstack->context |= LC_FAIL_ON_TEXT;
-            }
         }
     }
-    else if (!Py_UNICODE_ISSPACE(data)) {
+    else if (!Py_UNICODE_ISSPACE(data))
         self->topstack->context |= LC_HAS_TEXT;
-    }
 }
 
 /*
@@ -1301,9 +1293,8 @@ Tokenizer_parse(Tokenizer* self, int context)
                 PyObject* trash = Tokenizer_pop(self);
                 Py_XDECREF(trash);
             }
-            if (this_context & fail_contexts) {
+            if (this_context & fail_contexts)
                 return Tokenizer_fail_route(self);
-            }
             return Tokenizer_pop(self);
         }
 
@@ -1311,9 +1302,8 @@ Tokenizer_parse(Tokenizer* self, int context)
 
         if (this_context & LC_COMMENT) {
             if (this == next && next == *"-") {
-                if (Tokenizer_READ(self, 2) == *">") {
+                if (Tokenizer_READ(self, 2) == *">")
                     return Tokenizer_pop(self);
-                }
             }
             Tokenizer_write_text(self, this);
         }
@@ -1331,9 +1321,8 @@ Tokenizer_parse(Tokenizer* self, int context)
             if (Tokenizer_handle_template_param_value(self))
                 return NULL;
         }
-        else if (this == next && next == *"}" && this_context & LC_TEMPLATE) {
+        else if (this == next && next == *"}" && this_context & LC_TEMPLATE)
             return Tokenizer_handle_template_end(self);
-        }
         else if (this == *"|" && this_context & LC_ARGUMENT_NAME) {
             if (Tokenizer_handle_argument_separator(self))
                 return NULL;
@@ -1359,25 +1348,21 @@ Tokenizer_parse(Tokenizer* self, int context)
             if (Tokenizer_handle_wikilink_separator(self))
                 return NULL;
         }
-        else if (this == next && next == *"]" && this_context & LC_WIKILINK) {
+        else if (this == next && next == *"]" && this_context & LC_WIKILINK)
             return Tokenizer_handle_wikilink_end(self);
-        }
         else if (this == *"=" && !(self->global & GL_HEADING)) {
             last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
             if (last == *"\n" || last == *"") {
                 if (Tokenizer_parse_heading(self))
                     return NULL;
             }
-            else {
+            else
                 Tokenizer_write_text(self, this);
-            }
         }
-        else if (this == *"=" && this_context & LC_HEADING) {
+        else if (this == *"=" && this_context & LC_HEADING)
             return (PyObject*) Tokenizer_handle_heading_end(self);
-        }
-        else if (this == *"\n" && this_context & LC_HEADING) {
+        else if (this == *"\n" && this_context & LC_HEADING)
             return Tokenizer_fail_route(self);
-        }
         else if (this == *"&") {
             if (Tokenizer_parse_entity(self))
                 return NULL;
@@ -1388,14 +1373,11 @@ Tokenizer_parse(Tokenizer* self, int context)
                 if (Tokenizer_parse_comment(self))
                     return NULL;
             }
-            else {
+            else
                 Tokenizer_write_text(self, this);
-            }
         }
-        else {
+        else
             Tokenizer_write_text(self, this);
-        }
-
         self->head++;
     }
 }
@@ -1414,9 +1396,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
         const char* encoded;
         Py_ssize_t size;
 
-        if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) {
+        if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
             return NULL;
-        }
 
         PyObject* temp;
         temp = PyUnicode_FromStringAndSize(encoded, size);
@@ -1434,7 +1415,6 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
     }
 
     self->length = PyList_GET_SIZE(self->text);
-
     return Tokenizer_parse(self, 0);
 }
 
@@ -1453,16 +1433,19 @@ init_tokenizer(void)
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
     PyObject* htmlentitydefs = PyImport_ImportModule("htmlentitydefs");
-    if (!htmlentitydefs) return;
+    if (!htmlentitydefs)
+        return;
 
     PyObject* defmap = PyObject_GetAttrString(htmlentitydefs, "entitydefs");
-    if (!defmap) return;
+    if (!defmap)
+        return;
     Py_DECREF(htmlentitydefs);
 
     unsigned numdefs = (unsigned) PyDict_Size(defmap);
     entitydefs = calloc(numdefs + 1, sizeof(char*));
     PyObject* deflist = PyDict_Keys(defmap);
-    if (!deflist) return;
+    if (!deflist)
+        return;
     Py_DECREF(defmap);
 
     unsigned i;
@@ -1478,7 +1461,8 @@ init_tokenizer(void)
     PyObject* globals = PyEval_GetGlobals();
     PyObject* locals = PyEval_GetLocals();
     PyObject* fromlist = PyList_New(1);
-    if (!fromlist) return;
+    if (!fromlist)
+        return;
     PyObject* submodname = PyBytes_FromString("tokens");
     if (!submodname) {
         Py_DECREF(fromlist);
@@ -1488,7 +1472,8 @@ init_tokenizer(void)
 
     PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
     Py_DECREF(fromlist);
-    if (!tokmodule) return;
+    if (!tokmodule)
+        return;
 
     tokens = PyObject_GetAttrString(tokmodule, "tokens");
     Py_DECREF(tokmodule);

From 3a6335d96915a555399577b191e891a4c4a987f3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 17:18:00 -0500
Subject: [PATCH 048/180] Clean up init_tokenizer().

---
 mwparserfromhell/parser/tokenizer.c | 50 ++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 40f91a1..1efa65d 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1421,7 +1421,9 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 PyMODINIT_FUNC
 init_tokenizer(void)
 {
-    PyObject* module;
+    PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname;
+    unsigned numdefs, i;
+    char* name;
 
     TokenizerType.tp_new = PyType_GenericNew;
     if (PyType_Ready(&TokenizerType) < 0)
@@ -1432,51 +1434,43 @@ init_tokenizer(void)
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
-    PyObject* htmlentitydefs = PyImport_ImportModule("htmlentitydefs");
-    if (!htmlentitydefs)
+    tempmodule = PyImport_ImportModule("htmlentitydefs");
+    if (!tempmodule)
         return;
-
-    PyObject* defmap = PyObject_GetAttrString(htmlentitydefs, "entitydefs");
+    defmap = PyObject_GetAttrString(tempmodule, "entitydefs");
     if (!defmap)
         return;
-    Py_DECREF(htmlentitydefs);
-
-    unsigned numdefs = (unsigned) PyDict_Size(defmap);
-    entitydefs = calloc(numdefs + 1, sizeof(char*));
-    PyObject* deflist = PyDict_Keys(defmap);
+    Py_DECREF(tempmodule);
+    deflist = PyDict_Keys(defmap);
     if (!deflist)
         return;
     Py_DECREF(defmap);
 
-    unsigned i;
-    for (i = 0; i < numdefs; i++) {
+    numdefs = (unsigned) PyList_GET_SIZE(defmap);
+    entitydefs = calloc(numdefs + 1, sizeof(char*));
+    for (i = 0; i < numdefs; i++)
         entitydefs[i] = PyString_AsString(PyList_GET_ITEM(deflist, i));
-    }
     Py_DECREF(deflist);
 
     EMPTY = PyUnicode_FromString("");
     NOARGS = PyTuple_New(0);
 
-    char* name = "mwparserfromhell.parser";
-    PyObject* globals = PyEval_GetGlobals();
-    PyObject* locals = PyEval_GetLocals();
-    PyObject* fromlist = PyList_New(1);
+    name = "mwparserfromhell.parser";
+    globals = PyEval_GetGlobals();
+    locals = PyEval_GetLocals();
+    fromlist = PyList_New(1);
     if (!fromlist)
         return;
-    PyObject* submodname = PyBytes_FromString("tokens");
-    if (!submodname) {
-        Py_DECREF(fromlist);
+    modname = PyBytes_FromString("tokens");
+    if (!modname)
         return;
-    }
-    PyList_SET_ITEM(fromlist, 0, submodname);
-
-    PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
+    PyList_SET_ITEM(fromlist, 0, modname);
+    tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
     Py_DECREF(fromlist);
-    if (!tokmodule)
+    if (!tempmodule)
         return;
-
-    tokens = PyObject_GetAttrString(tokmodule, "tokens");
-    Py_DECREF(tokmodule);
+    tokens = PyObject_GetAttrString(tempmodule, "tokens");
+    Py_DECREF(tempmodule);
 
     Text = PyObject_GetAttrString(tokens, "Text");
 

From 88a2b950306cabac98e7354979f7eb8da74f41ab Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 17:54:10 -0500
Subject: [PATCH 049/180] Remove a bunch of white space.

---
 mwparserfromhell/parser/tokenizer.c | 205 ++++++++++++------------------------
 1 file changed, 65 insertions(+), 140 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 1efa65d..733bd61 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -82,7 +82,6 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
     static char* kwlist[] = {NULL};
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist))
         return -1;
-
     self->text = Py_None;
     Py_INCREF(Py_None);
     self->topstack = NULL;
@@ -144,7 +143,6 @@ Tokenizer_push_textbuffer(Tokenizer* self)
     PyObject* text = Textbuffer_render(buffer);
     if (!text)
         return -1;
-
     PyObject* kwargs = PyDict_New();
     if (!kwargs) {
         Py_DECREF(text);
@@ -152,18 +150,15 @@ Tokenizer_push_textbuffer(Tokenizer* self)
     }
     PyDict_SetItemString(kwargs, "text", text);
     Py_DECREF(text);
-
     PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
     Py_DECREF(kwargs);
     if (!token)
         return -1;
-
     if (PyList_Append(self->topstack->stack, token)) {
         Py_DECREF(token);
         return -1;
     }
     Py_DECREF(token);
-
     Textbuffer_dealloc(buffer);
     self->topstack->textbuffer = Textbuffer_new();
     if (!self->topstack->textbuffer)
@@ -189,10 +184,8 @@ Tokenizer_pop(Tokenizer* self)
 {
     if (Tokenizer_push_textbuffer(self))
         return NULL;
-
     PyObject* stack = self->topstack->stack;
     Py_INCREF(stack);
-
     Tokenizer_delete_top_of_stack(self);
     return stack;
 }
@@ -206,11 +199,9 @@ Tokenizer_pop_keeping_context(Tokenizer* self)
 {
     if (Tokenizer_push_textbuffer(self))
         return NULL;
-
     PyObject* stack = self->topstack->stack;
     Py_INCREF(stack);
     int context = self->topstack->context;
-
     Tokenizer_delete_top_of_stack(self);
     self->topstack->context = context;
     return stack;
@@ -282,35 +273,36 @@ static int
 Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
 {
     int pushed = 0;
+    PyObject *stack, *token, *left, *right, *text;
+    struct Textbuffer* buffer;
+    Py_ssize_t size;
+
     if (PyList_GET_SIZE(tokenlist) > 0) {
-        PyObject* token = PyList_GET_ITEM(tokenlist, 0);
+        token = PyList_GET_ITEM(tokenlist, 0);
         switch (PyObject_IsInstance(token, Text)) {
             case 0:
                 break;
             case 1: {
                 pushed = 1;
-                struct Textbuffer* buffer = self->topstack->textbuffer;
+                buffer = self->topstack->textbuffer;
                 if (buffer->size == 0 && !buffer->next)
                     break;
-                PyObject* left = Textbuffer_render(buffer);
+                left = Textbuffer_render(buffer);
                 if (!left)
                     return -1;
-                PyObject* right = PyObject_GetAttrString(token, "text");
+                right = PyObject_GetAttrString(token, "text");
                 if (!right)
                     return -1;
-
-                PyObject* text = PyUnicode_Concat(left, right);
+                text = PyUnicode_Concat(left, right);
                 Py_DECREF(left);
                 Py_DECREF(right);
                 if (!text)
                     return -1;
-
                 if (PyObject_SetAttrString(token, "text", text)) {
                     Py_DECREF(text);
                     return -1;
                 }
                 Py_DECREF(text);
-
                 Textbuffer_dealloc(buffer);
                 self->topstack->textbuffer = Textbuffer_new();
                 if (!self->topstack->textbuffer)
@@ -325,9 +317,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
         if (Tokenizer_push_textbuffer(self))
             return -1;
     }
-
-    PyObject* stack = self->topstack->stack;
-    Py_ssize_t size = PyList_GET_SIZE(stack);
+    stack = self->topstack->stack;
+    size = PyList_GET_SIZE(stack);
     if (PyList_SetSlice(stack, size, size, tokenlist))
         return -1;
     return 0;
@@ -351,7 +342,6 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text)
         }
         i++;
     }
-
     if (stack) {
         if (PyList_GET_SIZE(stack) > 0) {
             if (Tokenizer_write_all(self, stack)) {
@@ -361,7 +351,6 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text)
         }
         Py_DECREF(stack);
     }
-
     self->head--;
     return 0;
 }
@@ -396,23 +385,22 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
 static int
 Tokenizer_parse_template_or_argument(Tokenizer* self)
 {
-    self->head += 2;
     unsigned int braces = 2, i;
+    PyObject *tokenlist;
 
+    self->head += 2;
     while (Tokenizer_READ(self, 0) == *"{") {
         self->head++;
         braces++;
     }
     if (Tokenizer_push(self, 0))
         return -1;
-
     while (braces) {
         if (braces == 1) {
             if (Tokenizer_write_text_then_stack(self, "{"))
                 return -1;
             return 0;
         }
-
         if (braces == 2) {
             if (Tokenizer_parse_template(self))
                 return -1;
@@ -425,15 +413,12 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
             }
             break;
         }
-
         if (Tokenizer_parse_argument(self))
             return -1;
-
         if (BAD_ROUTE) {
             RESET_ROUTE();
             if (Tokenizer_parse_template(self))
                 return -1;
-
             if (BAD_ROUTE) {
                 RESET_ROUTE();
                 char text[braces + 1];
@@ -443,7 +428,6 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
                     Py_XDECREF(text);
                     return -1;
                 }
-
                 Py_XDECREF(text);
                 return 0;
             }
@@ -452,19 +436,16 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
         }
         else
             braces -= 3;
-
         if (braces)
             self->head++;
     }
-
-    PyObject* tokenlist = Tokenizer_pop(self);
+    tokenlist = Tokenizer_pop(self);
     if (!tokenlist)
         return -1;
     if (Tokenizer_write_all(self, tokenlist)) {
         Py_DECREF(tokenlist);
         return -1;
     }
-
     Py_DECREF(tokenlist);
     return 0;
 }
@@ -485,30 +466,25 @@ Tokenizer_parse_template(Tokenizer* self)
     }
     if (!template)
         return -1;
-
     token = PyObject_CallObject(TemplateOpen, NULL);
     if (!token) {
         Py_DECREF(template);
         return -1;
     }
-
     if (Tokenizer_write_first(self, token)) {
         Py_DECREF(token);
         Py_DECREF(template);
         return -1;
     }
     Py_DECREF(token);
-
     if (Tokenizer_write_all(self, template)) {
         Py_DECREF(template);
         return -1;
     }
     Py_DECREF(template);
-
     token = PyObject_CallObject(TemplateClose, NULL);
     if (!token)
         return -1;
-
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -533,30 +509,25 @@ Tokenizer_parse_argument(Tokenizer* self)
     }
     if (!argument)
         return -1;
-
     token = PyObject_CallObject(ArgumentOpen, NULL);
     if (!token) {
         Py_DECREF(argument);
         return -1;
     }
-
     if (Tokenizer_write_first(self, token)) {
         Py_DECREF(token);
         Py_DECREF(argument);
         return -1;
     }
     Py_DECREF(token);
-
     if (Tokenizer_write_all(self, argument)) {
         Py_DECREF(argument);
         return -1;
     }
     Py_DECREF(argument);
-
     token = PyObject_CallObject(ArgumentClose, NULL);
     if (!token)
         return -1;
-
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
@@ -571,13 +542,14 @@ Tokenizer_parse_argument(Tokenizer* self)
 static int
 Tokenizer_handle_template_param(Tokenizer* self)
 {
+    PyObject *stack, *token;
+
     if (self->topstack->context & LC_TEMPLATE_NAME)
         self->topstack->context ^= LC_TEMPLATE_NAME;
     else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE)
         self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE;
-
     if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
-        PyObject* stack = Tokenizer_pop_keeping_context(self);
+        stack = Tokenizer_pop_keeping_context(self);
         if (!stack)
             return -1;
         if (Tokenizer_write_all(self, stack)) {
@@ -589,16 +561,14 @@ Tokenizer_handle_template_param(Tokenizer* self)
     else
         self->topstack->context |= LC_TEMPLATE_PARAM_KEY;
 
-    PyObject* token = PyObject_CallObject(TemplateParamSeparator, NULL);
+    token = PyObject_CallObject(TemplateParamSeparator, NULL);
     if (!token)
         return -1;
-
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         return -1;
     }
     Py_DECREF(token);
-
     if (Tokenizer_push(self, self->topstack->context))
         return -1;
     return 0;
@@ -610,7 +580,9 @@ Tokenizer_handle_template_param(Tokenizer* self)
 static int
 Tokenizer_handle_template_param_value(Tokenizer* self)
 {
-    PyObject* stack = Tokenizer_pop_keeping_context(self);
+    PyObject *stack, *token;
+
+    stack = Tokenizer_pop_keeping_context(self);
     if (!stack)
         return -1;
     if (Tokenizer_write_all(self, stack)) {
@@ -618,11 +590,9 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
         return -1;
     }
     Py_DECREF(stack);
-
     self->topstack->context ^= LC_TEMPLATE_PARAM_KEY;
     self->topstack->context |= LC_TEMPLATE_PARAM_VALUE;
-
-    PyObject* token = PyObject_CallObject(TemplateParamEquals, NULL);
+    token = PyObject_CallObject(TemplateParamEquals, NULL);
     if (!token)
         return -1;
     if (Tokenizer_write(self, token)) {
@@ -640,6 +610,7 @@ static PyObject*
 Tokenizer_handle_template_end(Tokenizer* self)
 {
     PyObject* stack;
+
     if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
         stack = Tokenizer_pop_keeping_context(self);
         if (!stack)
@@ -663,7 +634,6 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 {
     self->topstack->context ^= LC_ARGUMENT_NAME;
     self->topstack->context |= LC_ARGUMENT_DEFAULT;
-
     PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL);
     if (!token)
         return -1;
@@ -693,16 +663,15 @@ static int
 Tokenizer_parse_wikilink(Tokenizer* self)
 {
     Py_ssize_t reset;
-    PyObject *token, *wikilink;
+    PyObject *wikilink, *token;
+    int i;
 
     self->head += 2;
     reset = self->head - 1;
     wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
-
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
-        int i;
         for (i = 0; i < 2; i++) {
             if (Tokenizer_write_text(self, *"["))
                 return -1;
@@ -711,26 +680,22 @@ Tokenizer_parse_wikilink(Tokenizer* self)
     }
     if (!wikilink)
         return -1;
-
     token = PyObject_CallObject(WikilinkOpen, NULL);
     if (!token) {
         Py_DECREF(wikilink);
         return -1;
     }
-
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         Py_DECREF(wikilink);
         return -1;
     }
     Py_DECREF(token);
-
     if (Tokenizer_write_all(self, wikilink)) {
         Py_DECREF(wikilink);
         return -1;
     }
     Py_DECREF(wikilink);
-
     token = PyObject_CallObject(WikilinkClose, NULL);
     if (!token)
         return -1;
@@ -750,7 +715,6 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
 {
     self->topstack->context ^= LC_WIKILINK_TITLE;
     self->topstack->context |= LC_WIKILINK_TEXT;
-
     PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL);
     if (!token)
         return -1;
@@ -779,19 +743,19 @@ Tokenizer_handle_wikilink_end(Tokenizer* self)
 static int
 Tokenizer_parse_heading(Tokenizer* self)
 {
-    self->global |= GL_HEADING;
     Py_ssize_t reset = self->head;
-    self->head += 1;
-    int best = 1, i;
+    int best = 1, i, context, diff;
+    HeadingData *heading;
+    PyObject *level, *kwargs, *token;
 
+    self->global |= GL_HEADING;
+    self->head += 1;
     while (Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
-
-    int context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
-    HeadingData* heading = (HeadingData*) Tokenizer_parse(self, context);
-
+    context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1);
+    heading = (HeadingData*) Tokenizer_parse(self, context);
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset + best - 1;
@@ -804,14 +768,13 @@ Tokenizer_parse_heading(Tokenizer* self)
         return 0;
     }
 
-    PyObject* level = PyInt_FromSsize_t(heading->level);
+    level = PyInt_FromSsize_t(heading->level);
     if (!level) {
         Py_DECREF(heading->title);
         free(heading);
         return -1;
     }
-
-    PyObject* kwargs = PyDict_New();
+    kwargs = PyDict_New();
     if (!kwargs) {
         Py_DECREF(level);
         Py_DECREF(heading->title);
@@ -820,15 +783,13 @@ Tokenizer_parse_heading(Tokenizer* self)
     }
     PyDict_SetItemString(kwargs, "level", level);
     Py_DECREF(level);
-
-    PyObject* token = PyObject_Call(HeadingStart, NOARGS, kwargs);
+    token = PyObject_Call(HeadingStart, NOARGS, kwargs);
     Py_DECREF(kwargs);
     if (!token) {
         Py_DECREF(heading->title);
         free(heading);
         return -1;
     }
-
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         Py_DECREF(heading->title);
@@ -836,9 +797,8 @@ Tokenizer_parse_heading(Tokenizer* self)
         return -1;
     }
     Py_DECREF(token);
-
     if (heading->level < best) {
-        int diff = best - heading->level;
+        diff = best - heading->level;
         char difftext[diff + 1];
         for (i = 0; i < diff; i++) difftext[i] = *"=";
         difftext[diff] = *"";
@@ -848,7 +808,6 @@ Tokenizer_parse_heading(Tokenizer* self)
             return -1;
         }
     }
-
     if (Tokenizer_write_all(self, heading->title)) {
         Py_DECREF(heading->title);
         free(heading);
@@ -856,7 +815,6 @@ Tokenizer_parse_heading(Tokenizer* self)
     }
     Py_DECREF(heading->title);
     free(heading);
-
     token = PyObject_CallObject(HeadingEnd, NULL);
     if (!token)
         return -1;
@@ -875,24 +833,24 @@ Tokenizer_parse_heading(Tokenizer* self)
 static HeadingData*
 Tokenizer_handle_heading_end(Tokenizer* self)
 {
-    Py_ssize_t reset = self->head;
-    self->head += 1;
-    Py_ssize_t best = 1;
-    int i;
+    Py_ssize_t reset = self->head, best;
+    int i, current, level, diff;
+    HeadingData *after, *heading;
+    PyObject *stack;
 
+    self->head += 1;
+    best = 1;
     while (Tokenizer_READ(self, 0) == *"=") {
         best++;
         self->head++;
     }
-
-    int current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
-    int level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
-    HeadingData* after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
-
+    current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
+    level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
+    after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
     if (BAD_ROUTE) {
         RESET_ROUTE();
         if (level < best) {
-            int diff = best - level;
+            diff = best - level;
             char difftext[diff + 1];
             for (i = 0; i < diff; i++) difftext[i] = *"=";
             difftext[diff] = *"";
@@ -901,7 +859,6 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         }
         self->head = reset + best - 1;
     }
-
     else {
         char text[best + 1];
         for (i = 0; i < best; i++) text[i] = *"=";
@@ -920,12 +877,10 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         level = after->level;
         free(after);
     }
-
-    PyObject* stack = Tokenizer_pop(self);
+    stack = Tokenizer_pop(self);
     if (!stack)
         return NULL;
-
-    HeadingData* heading = malloc(sizeof(HeadingData));
+    heading = malloc(sizeof(HeadingData));
     if (!heading) {
         PyErr_NoMemory();
         return NULL;
@@ -946,6 +901,12 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     int numeric, hexadecimal, i, j, test;
     char *valid, *text, *def;
 
+    #define FAIL_ROUTE_AND_EXIT() { \
+        Tokenizer_fail_route(self); \
+        free(text);                 \
+        return 0;                   \
+    }
+
     token = PyObject_CallObject(HTMLEntityStart, NULL);
     if (!token)
         return -1;
@@ -954,9 +915,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         return -1;
     }
     Py_DECREF(token);
-
     self->head++;
-
     this = Tokenizer_READ(self, 0);
     if (this == *"") {
         Tokenizer_fail_route(self);
@@ -972,7 +931,6 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             return -1;
         }
         Py_DECREF(token);
-
         self->head++;
         this = Tokenizer_READ(self, 0);
         if (this == *"") {
@@ -985,7 +943,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             if (!kwargs)
                 return -1;
             PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0));
-            PyObject* token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs);
+            token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs);
             Py_DECREF(kwargs);
             if (!token)
                 return -1;
@@ -1001,26 +959,17 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     }
     else
         numeric = hexadecimal = 0;
-
     if (hexadecimal)
         valid = "0123456789abcdefABCDEF";
     else if (numeric)
         valid = "0123456789";
     else
         valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
     text = malloc(MAX_ENTITY_SIZE * sizeof(char));
     if (!text) {
         PyErr_NoMemory();
         return -1;
     }
-
-    #define FAIL_ROUTE_AND_EXIT() { \
-        Tokenizer_fail_route(self); \
-        free(text);                 \
-        return 0;                   \
-    }
-
     i = 0;
     while (1) {
         this = Tokenizer_READ(self, 0);
@@ -1051,7 +1000,6 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         self->head++;
         i++;
     }
-
     if (numeric) {
         sscanf(text, (hexadecimal ? "%x" : "%d"), &test);
         if (test < 1 || test > 0x10FFFF)
@@ -1068,14 +1016,12 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             i++;
         }
     }
-
     textobj = PyUnicode_FromString(text);
     if (!textobj) {
         free(text);
         return -1;
     }
     free(text);
-
     kwargs = PyDict_New();
     if (!kwargs) {
         Py_DECREF(textobj);
@@ -1092,7 +1038,6 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         return -1;
     }
     Py_DECREF(token);
-
     token = PyObject_CallObject(HTMLEntityEnd, NULL);
     if (!token)
         return -1;
@@ -1111,11 +1056,12 @@ static int
 Tokenizer_parse_entity(Tokenizer* self)
 {
     Py_ssize_t reset = self->head;
+    PyObject *tokenlist;
+
     if (Tokenizer_push(self, 0))
         return -1;
     if (Tokenizer_really_parse_entity(self))
         return -1;
-
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
@@ -1123,8 +1069,7 @@ Tokenizer_parse_entity(Tokenizer* self)
             return -1;
         return 0;
     }
-
-    PyObject* tokenlist = Tokenizer_pop(self);
+    tokenlist = Tokenizer_pop(self);
     if (!tokenlist)
         return -1;
     if (Tokenizer_write_all(self, tokenlist)) {
@@ -1141,17 +1086,17 @@ Tokenizer_parse_entity(Tokenizer* self)
 static int
 Tokenizer_parse_comment(Tokenizer* self)
 {
-    self->head += 4;
-    Py_ssize_t reset = self->head - 1;
-
-    PyObject *token;
-    PyObject *comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+    Py_ssize_t reset = self->head + 3;
+    PyObject *token, *comment;
+    int i;
 
+    self->head += 4;
+    comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
         const char* text = "<!--";
-        int i = 0;
+        i = 0;
         while (1) {
             if (!text[i])
                 return 0;
@@ -1165,13 +1110,11 @@ Tokenizer_parse_comment(Tokenizer* self)
     }
     if (!comment)
         return -1;
-
     token = PyObject_CallObject(CommentStart, NULL);
     if (!token) {
         Py_DECREF(comment);
         return -1;
     }
-
     if (Tokenizer_write(self, token)) {
         Py_DECREF(token);
         Py_DECREF(comment);
@@ -1183,7 +1126,6 @@ Tokenizer_parse_comment(Tokenizer* self)
         return -1;
     }
     Py_DECREF(comment);
-
     token = PyObject_CallObject(CommentEnd, NULL);
     if (!token)
         return -1;
@@ -1206,7 +1148,6 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         Tokenizer_fail_route(self);
         return;
     }
-
     if (context & (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE)) {
         if (data == *"{" || data == *"}" || data == *"[" || data == *"]") {
             self->topstack->context |= LC_FAIL_NEXT;
@@ -1233,7 +1174,6 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         else if (data == *"}")
             self->topstack->context |= LC_FAIL_ON_RBRACE;
     }
-
     if (context & LC_HAS_TEXT) {
         if (context & LC_FAIL_ON_TEXT) {
             if (!Py_UNICODE_ISSPACE(data)) {
@@ -1263,17 +1203,14 @@ Tokenizer_parse(Tokenizer* self, int context)
 
     if (Tokenizer_push(self, context))
         return NULL;
-
     while (1) {
         this = Tokenizer_READ(self, 0);
         this_context = self->topstack->context;
-
         if (this_context & unsafe_contexts) {
             Tokenizer_verify_safe(self, this_context, this);
             if (BAD_ROUTE)
                 return NULL;
         }
-
         is_marker = 0;
         for (i = 0; i < NUM_MARKERS; i++) {
             if (*MARKERS[i] == this) {
@@ -1281,13 +1218,11 @@ Tokenizer_parse(Tokenizer* self, int context)
                 break;
             }
         }
-
         if (!is_marker) {
             Tokenizer_write_text(self, this);
             self->head++;
             continue;
         }
-
         if (this == *"") {
             if (this_context & LC_TEMPLATE_PARAM_KEY) {
                 PyObject* trash = Tokenizer_pop(self);
@@ -1297,9 +1232,7 @@ Tokenizer_parse(Tokenizer* self, int context)
                 return Tokenizer_fail_route(self);
             return Tokenizer_pop(self);
         }
-
         next = Tokenizer_READ(self, 1);
-
         if (this_context & LC_COMMENT) {
             if (this == next && next == *"-") {
                 if (Tokenizer_READ(self, 2) == *">")
@@ -1388,22 +1321,18 @@ Tokenizer_parse(Tokenizer* self, int context)
 static PyObject*
 Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
-    PyObject* text;
+    PyObject *text, *temp;
 
     if (!PyArg_ParseTuple(args, "U", &text)) {
         /* Failed to parse a Unicode object; try a string instead. */
         PyErr_Clear();
         const char* encoded;
         Py_ssize_t size;
-
         if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
             return NULL;
-
-        PyObject* temp;
         temp = PyUnicode_FromStringAndSize(encoded, size);
         if (!text)
             return NULL;
-
         Py_XDECREF(self->text);
         text = PySequence_Fast(temp, "expected a sequence");
         Py_XDECREF(temp);
@@ -1413,7 +1342,6 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
         Py_XDECREF(self->text);
         self->text = PySequence_Fast(text, "expected a sequence");
     }
-
     self->length = PyList_GET_SIZE(self->text);
     return Tokenizer_parse(self, 0);
 }
@@ -1428,9 +1356,7 @@ init_tokenizer(void)
     TokenizerType.tp_new = PyType_GenericNew;
     if (PyType_Ready(&TokenizerType) < 0)
         return;
-
     module = Py_InitModule("_tokenizer", module_methods);
-
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
@@ -1445,7 +1371,6 @@ init_tokenizer(void)
     if (!deflist)
         return;
     Py_DECREF(defmap);
-
     numdefs = (unsigned) PyList_GET_SIZE(defmap);
     entitydefs = calloc(numdefs + 1, sizeof(char*));
     for (i = 0; i < numdefs; i++)

From 1abdb478c911b7f9acf14fff47e467560425406f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 20:34:41 -0500
Subject: [PATCH 050/180] Fix a couple bugs.

---
 mwparserfromhell/parser/tokenizer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 733bd61..0016515 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -965,7 +965,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         valid = "0123456789";
     else
         valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    text = malloc(MAX_ENTITY_SIZE * sizeof(char));
+    text = calloc(MAX_ENTITY_SIZE, sizeof(char));
     if (!text) {
         PyErr_NoMemory();
         return -1;
@@ -1091,7 +1091,7 @@ Tokenizer_parse_comment(Tokenizer* self)
     int i;
 
     self->head += 4;
-    comment = Tokenizer_parse(self, LC_WIKILINK_TITLE);
+    comment = Tokenizer_parse(self, LC_COMMENT);
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
@@ -1351,7 +1351,7 @@ init_tokenizer(void)
 {
     PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname;
     unsigned numdefs, i;
-    char* name;
+    char *name;
 
     TokenizerType.tp_new = PyType_GenericNew;
     if (PyType_Ready(&TokenizerType) < 0)

From 4a725b7ac5ec983a2efcd8bb3c3786beab175b61 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 21:31:52 -0500
Subject: [PATCH 051/180] Fix another couple bugs regarding template contexts
 and verify_safe().

---
 mwparserfromhell/parser/tokenizer.c | 18 +++++++++++++++---
 mwparserfromhell/parser/tokenizer.h |  1 +
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 0016515..57c6a62 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1153,18 +1153,30 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
             self->topstack->context |= LC_FAIL_NEXT;
             return;
         }
+        if (data == *"|") {
+            if (context & LC_FAIL_ON_TEXT) {
+                self->topstack->context ^= LC_FAIL_ON_TEXT;
+                return;
+            }
+        }
     }
     else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
-        if (context & LC_FAIL_ON_LBRACE) {
-            if (data == *"{") {
+        if (context & LC_FAIL_ON_EQUALS) {
+            if (data == *"=") {
                 self->topstack->context |= LC_FAIL_NEXT;
                 return;
             }
+        }
+        else if (context & LC_FAIL_ON_LBRACE) {
+            if (data == *"{") {
+                self->topstack->context |= (context & LC_TEMPLATE) ? LC_FAIL_ON_EQUALS : LC_FAIL_NEXT;
+                return;
+            }
             self->topstack->context ^= LC_FAIL_ON_LBRACE;
         }
         else if (context & LC_FAIL_ON_RBRACE) {
             if (data == *"}") {
-                self->topstack->context |= LC_FAIL_NEXT;
+                self->topstack->context |= (context & LC_TEMPLATE) ? LC_FAIL_ON_EQUALS : LC_FAIL_NEXT;
                 return;
             }
             self->topstack->context ^= LC_FAIL_ON_RBRACE;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 67c39cd..2484d4f 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -119,6 +119,7 @@ static PyObject* TagCloseClose;
 #define LC_FAIL_NEXT            0x10000
 #define LC_FAIL_ON_LBRACE       0x20000
 #define LC_FAIL_ON_RBRACE       0x40000
+#define LC_FAIL_ON_EQUALS       0x80000
 
 /* Global contexts: */
 

From ca11d77efd682b62d97be8851cda9afcdea69d7f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 21:49:07 -0500
Subject: [PATCH 052/180] Shorten some longer lines.

---
 mwparserfromhell/parser/tokenizer.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 57c6a62..702e1a3 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1169,14 +1169,20 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         }
         else if (context & LC_FAIL_ON_LBRACE) {
             if (data == *"{") {
-                self->topstack->context |= (context & LC_TEMPLATE) ? LC_FAIL_ON_EQUALS : LC_FAIL_NEXT;
+                if (context & LC_TEMPLATE)
+                    self->topstack->context |= LC_FAIL_ON_EQUALS;
+                else
+                    self->topstack->context |= LC_FAIL_NEXT;
                 return;
             }
             self->topstack->context ^= LC_FAIL_ON_LBRACE;
         }
         else if (context & LC_FAIL_ON_RBRACE) {
             if (data == *"}") {
-                self->topstack->context |= (context & LC_TEMPLATE) ? LC_FAIL_ON_EQUALS : LC_FAIL_NEXT;
+                if (context & LC_TEMPLATE)
+                    self->topstack->context |= LC_FAIL_ON_EQUALS;
+                else
+                    self->topstack->context |= LC_FAIL_NEXT;
                 return;
             }
             self->topstack->context ^= LC_FAIL_ON_RBRACE;
@@ -1208,8 +1214,10 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 static PyObject*
 Tokenizer_parse(Tokenizer* self, int context)
 {
-    static int fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_COMMENT;
-    static int unsafe_contexts = LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME;
+    static int fail_contexts = (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK |
+                                LC_HEADING | LC_COMMENT);
+    static int unsafe_contexts = (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE |
+                                  LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME);
     int this_context, is_marker, i;
     Py_UNICODE this, next, next_next, last;
 

From 1f47e10f048a08c3091416641abd974032467a30 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 17 Nov 2012 23:08:53 -0500
Subject: [PATCH 053/180] Start some Py3k compatibility stuff.

---
 mwparserfromhell/parser/__init__.py |  4 +++-
 mwparserfromhell/parser/builder.c   | 24 ------------------------
 mwparserfromhell/parser/tokenizer.c |  2 +-
 mwparserfromhell/parser/tokenizer.h |  4 ++++
 setup.py                            |  6 +++---
 5 files changed, 11 insertions(+), 29 deletions(-)
 delete mode 100644 mwparserfromhell/parser/builder.c

diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index da24b2f..5baa687 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -28,9 +28,11 @@ joins them together under one interface.
 
 try:
     from ._builder import CBuilder as Builder
-    from ._tokenizer import CTokenizer as Tokenizer
 except ImportError:
     from .builder import Builder
+try:
+    from ._tokenizer import CTokenizer as Tokenizer
+except ImportError:
     from .tokenizer import Tokenizer
 
 __all__ = ["Parser"]
diff --git a/mwparserfromhell/parser/builder.c b/mwparserfromhell/parser/builder.c
deleted file mode 100644
index 7cbe236..0000000
--- a/mwparserfromhell/parser/builder.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-Builder for MWParserFromHell
-Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-#include <Python.h>
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 702e1a3..121ccc9 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1394,7 +1394,7 @@ init_tokenizer(void)
     numdefs = (unsigned) PyList_GET_SIZE(defmap);
     entitydefs = calloc(numdefs + 1, sizeof(char*));
     for (i = 0; i < numdefs; i++)
-        entitydefs[i] = PyString_AsString(PyList_GET_ITEM(deflist, i));
+        entitydefs[i] = PyBytes_AsString(PyList_GET_ITEM(deflist, i));
     Py_DECREF(deflist);
 
     EMPTY = PyUnicode_FromString("");
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 2484d4f..dffa0fb 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -29,6 +29,10 @@ SOFTWARE.
 #include <math.h>
 #include <structmember.h>
 
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#endif
+
 #define malloc PyObject_Malloc
 #define free   PyObject_Free
 
diff --git a/setup.py b/setup.py
index e348ce5..cc034e5 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,8 @@ from mwparserfromhell import __version__
 with open("README.rst") as fp:
     long_docs = fp.read()
 
-builder = Extension("mwparserfromhell.parser._builder",
-                    sources = ["mwparserfromhell/parser/builder.c"])
+# builder = Extension("mwparserfromhell.parser._builder",
+#                     sources = ["mwparserfromhell/parser/builder.c"])
 
 tokenizer = Extension("mwparserfromhell.parser._tokenizer",
                       sources = ["mwparserfromhell/parser/tokenizer.c"])
@@ -37,7 +37,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer",
 setup(
     name = "mwparserfromhell",
     packages = find_packages(exclude=("tests",)),
-    ext_modules = [builder, tokenizer],
+    ext_modules = [tokenizer],
     test_suite = "tests",
     version = __version__,
     author = "Ben Kurtovic",

From dab6cc39ef63fd98196aa4f6a1496942b9d7efdf Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 21 Nov 2012 18:43:51 -0500
Subject: [PATCH 054/180] Fix handling of errors when parsing template param
 keys (closes #13)

---
 mwparserfromhell/parser/tokenizer.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 121ccc9..cc1b4dd 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1163,7 +1163,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
     else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
         if (context & LC_FAIL_ON_EQUALS) {
             if (data == *"=") {
-                self->topstack->context |= LC_FAIL_NEXT;
+                Tokenizer_fail_route(self);
                 return;
             }
         }
@@ -1195,7 +1195,12 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
     if (context & LC_HAS_TEXT) {
         if (context & LC_FAIL_ON_TEXT) {
             if (!Py_UNICODE_ISSPACE(data)) {
-                Tokenizer_fail_route(self);
+                if (context & LC_TEMPLATE_PARAM_KEY) {
+                    self->topstack->context ^= LC_FAIL_ON_TEXT;
+                    self->topstack->context |= LC_FAIL_ON_EQUALS;
+                }
+                else
+                    Tokenizer_fail_route(self);
                 return;
             }
         }
@@ -1220,6 +1225,7 @@ Tokenizer_parse(Tokenizer* self, int context)
                                   LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME);
     int this_context, is_marker, i;
     Py_UNICODE this, next, next_next, last;
+    PyObject *trash;
 
     if (Tokenizer_push(self, context))
         return NULL;
@@ -1228,8 +1234,13 @@ Tokenizer_parse(Tokenizer* self, int context)
         this_context = self->topstack->context;
         if (this_context & unsafe_contexts) {
             Tokenizer_verify_safe(self, this_context, this);
-            if (BAD_ROUTE)
+            if (BAD_ROUTE) {
+                if (this_context & LC_TEMPLATE_PARAM_KEY) {
+                    trash = Tokenizer_pop(self);
+                    Py_XDECREF(trash);
+                }
                 return NULL;
+            }
         }
         is_marker = 0;
         for (i = 0; i < NUM_MARKERS; i++) {
@@ -1245,7 +1256,7 @@ Tokenizer_parse(Tokenizer* self, int context)
         }
         if (this == *"") {
             if (this_context & LC_TEMPLATE_PARAM_KEY) {
-                PyObject* trash = Tokenizer_pop(self);
+                trash = Tokenizer_pop(self);
                 Py_XDECREF(trash);
             }
             if (this_context & fail_contexts)

From 06d3036de29dde3414671a73ea5697fd82310578 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 11 Dec 2012 18:07:13 -0500
Subject: [PATCH 055/180] Fix a certain bug in SmartList with Py3k (closes #17)

---
 mwparserfromhell/smart_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 9a77e19..5df6a0e 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -76,7 +76,7 @@ class SmartList(list):
     def __getitem__(self, key):
         if not isinstance(key, slice):
             return super(SmartList, self).__getitem__(key)
-        sliceinfo = [key.start, key.stop, 1 if not key.step else key.step]
+        sliceinfo = [key.start or 0, key.stop or 0, key.step or 1]
         child = _ListProxy(self, sliceinfo)
         self._children[id(child)] = (child, sliceinfo)
         return child

From 6881caf0bd64389adcec8956dc97256f8be3d46c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 31 Dec 2012 03:09:11 -0500
Subject: [PATCH 056/180] Fix StringMixIn's methods taking option arguments
 (thanks Sigma).

---
 mwparserfromhell/string_mixin.py | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index d63f25d..075ae03 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -125,19 +125,29 @@ class StringMixIn(object):
 
     @inheritdoc
     def center(self, width, fillchar=None):
+        if fillchar is None:
+            return self.__unicode__().center(width)
         return self.__unicode__().center(width, fillchar)
 
     @inheritdoc
-    def count(self, sub=None, start=None, end=None):
+    def count(self, sub, start=None, end=None):
         return self.__unicode__().count(sub, start, end)
 
     if not py3k:
         @inheritdoc
         def decode(self, encoding=None, errors=None):
+            if errors is None:
+                if encoding is None:
+                    return self.__unicode__().decode()
+                return self.__unicode__().decode(encoding)
             return self.__unicode__().decode(encoding, errors)
 
     @inheritdoc
     def encode(self, encoding=None, errors=None):
+        if errors is None:
+            if encoding is None:
+                return self.__unicode__().encode()
+            return self.__unicode__().encode(encoding)
         return self.__unicode__().encode(encoding, errors)
 
     @inheritdoc
@@ -146,10 +156,12 @@ class StringMixIn(object):
 
     @inheritdoc
     def expandtabs(self, tabsize=None):
+        if tabsize is None:
+            return self.__unicode__().expandtabs()
         return self.__unicode__().expandtabs(tabsize)
 
     @inheritdoc
-    def find(self, sub=None, start=None, end=None):
+    def find(self, sub, start=None, end=None):
         return self.__unicode__().find(sub, start, end)
 
     @inheritdoc
@@ -157,7 +169,7 @@ class StringMixIn(object):
         return self.__unicode__().format(*args, **kwargs)
 
     @inheritdoc
-    def index(self, sub=None, start=None, end=None):
+    def index(self, sub, start=None, end=None):
         return self.__unicode__().index(sub, start, end)
 
     @inheritdoc
@@ -202,6 +214,8 @@ class StringMixIn(object):
 
     @inheritdoc
     def ljust(self, width, fillchar=None):
+        if fillchar is None:
+            return self.__unicode__().ljust(width)
         return self.__unicode__().ljust(width, fillchar)
 
     @inheritdoc
@@ -221,15 +235,17 @@ class StringMixIn(object):
         return self.__unicode__().replace(old, new, count)
 
     @inheritdoc
-    def rfind(self, sub=None, start=None, end=None):
+    def rfind(self, sub, start=None, end=None):
         return self.__unicode__().rfind(sub, start, end)
 
     @inheritdoc
-    def rindex(self, sub=None, start=None, end=None):
+    def rindex(self, sub, start=None, end=None):
         return self.__unicode__().rindex(sub, start, end)
 
     @inheritdoc
     def rjust(self, width, fillchar=None):
+        if fillchar is None:
+            return self.__unicode__().rjust(width)
         return self.__unicode__().rjust(width, fillchar)
 
     @inheritdoc
@@ -238,6 +254,10 @@ class StringMixIn(object):
 
     @inheritdoc
     def rsplit(self, sep=None, maxsplit=None):
+        if maxsplit is None:
+            if sep is None:
+                return self.__unicode__().rsplit()
+            return self.__unicode__().rsplit(sep)
         return self.__unicode__().rsplit(sep, maxsplit)
 
     @inheritdoc
@@ -246,10 +266,16 @@ class StringMixIn(object):
 
     @inheritdoc
     def split(self, sep=None, maxsplit=None):
+        if maxsplit is None:
+            if sep is None:
+                return self.__unicode__().split()
+            return self.__unicode__().split(sep)
         return self.__unicode__().split(sep, maxsplit)
 
     @inheritdoc
     def splitlines(self, keepends=None):
+        if keepends is None:
+            return self.__unicode__().splitlines()
         return self.__unicode__().splitlines(keepends)
 
     @inheritdoc
@@ -269,8 +295,8 @@ class StringMixIn(object):
         return self.__unicode__().title()
 
     @inheritdoc
-    def translate(self, table, deletechars=None):
-        return self.__unicode__().translate(table, deletechars)
+    def translate(self, table):
+        return self.__unicode__().translate(table)
 
     @inheritdoc
     def upper(self):

From 11cf5def7538ee8fc3954aab8bc9107d39d87c7f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 7 Jan 2013 17:47:05 -0500
Subject: [PATCH 057/180] Fix handling of sections headers with equal signs
 (closes #20)

---
 mwparserfromhell/parser/tokenizer.c  | 44 ++++++++++++++++--------------------
 mwparserfromhell/parser/tokenizer.py |  4 ++--
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index cc1b4dd..40ec723 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -759,11 +759,10 @@ Tokenizer_parse_heading(Tokenizer* self)
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset + best - 1;
-        char text[best + 1];
-        for (i = 0; i < best; i++) text[i] = *"=";
-        text[best] = *"";
-        if (Tokenizer_write_text_then_stack(self, text))
-            return -1;
+        for (i = 0; i < best; i++) {
+            if (Tokenizer_write_text(self, *"="))
+                return -1;
+        }
         self->global ^= GL_HEADING;
         return 0;
     }
@@ -799,13 +798,12 @@ Tokenizer_parse_heading(Tokenizer* self)
     Py_DECREF(token);
     if (heading->level < best) {
         diff = best - heading->level;
-        char difftext[diff + 1];
-        for (i = 0; i < diff; i++) difftext[i] = *"=";
-        difftext[diff] = *"";
-        if (Tokenizer_write_text_then_stack(self, difftext)) {
-            Py_DECREF(heading->title);
-            free(heading);
-            return -1;
+        for (i = 0; i < diff; i++) {
+            if (Tokenizer_write_text(self, *"=")) {
+                Py_DECREF(heading->title);
+                free(heading);
+                return -1;
+            }
         }
     }
     if (Tokenizer_write_all(self, heading->title)) {
@@ -851,22 +849,20 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         RESET_ROUTE();
         if (level < best) {
             diff = best - level;
-            char difftext[diff + 1];
-            for (i = 0; i < diff; i++) difftext[i] = *"=";
-            difftext[diff] = *"";
-            if (Tokenizer_write_text_then_stack(self, difftext))
-                return NULL;
+            for (i = 0; i < diff; i++) {
+                if (Tokenizer_write_text(self, *"="))
+                    return NULL;
+            }
         }
         self->head = reset + best - 1;
     }
     else {
-        char text[best + 1];
-        for (i = 0; i < best; i++) text[i] = *"=";
-        text[best] = *"";
-        if (Tokenizer_write_text_then_stack(self, text)) {
-            Py_DECREF(after->title);
-            free(after);
-            return NULL;
+        for (i = 0; i < best; i++) {
+            if (Tokenizer_write_text(self, *"=")) {
+                Py_DECREF(after->title);
+                free(after);
+                return NULL;
+            }
         }
         if (Tokenizer_write_all(self, after->title)) {
             Py_DECREF(after->title);
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 5b0e976..455079a 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -339,14 +339,14 @@ class Tokenizer(object):
         current = int(log(self._context / contexts.HEADING_LEVEL_1, 2)) + 1
         level = min(current, min(best, 6))
 
-        try:
+        try:  # Try to check for a heading closure after this one
             after, after_level = self._parse(self._context)
         except BadRoute:
             if level < best:
                 self._write_text("=" * (best - level))
             self._head = reset + best - 1
             return self._pop(), level
-        else:
+        else:  # Found another closure
             self._write_text("=" * best)
             self._write_all(after)
             return self._pop(), after_level

From cd5cc6a7d084a464d67d6b52f369ae81eb31b376 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 17 Jan 2013 02:26:04 -0500
Subject: [PATCH 058/180] Update copyright notices for 2013.

---
 LICENSE                                    | 2 +-
 docs/conf.py                               | 2 +-
 mwparserfromhell/__init__.py               | 4 ++--
 mwparserfromhell/nodes/__init__.py         | 2 +-
 mwparserfromhell/nodes/argument.py         | 2 +-
 mwparserfromhell/nodes/comment.py          | 2 +-
 mwparserfromhell/nodes/extras/__init__.py  | 2 +-
 mwparserfromhell/nodes/extras/attribute.py | 2 +-
 mwparserfromhell/nodes/extras/parameter.py | 2 +-
 mwparserfromhell/nodes/heading.py          | 2 +-
 mwparserfromhell/nodes/html_entity.py      | 2 +-
 mwparserfromhell/nodes/tag.py              | 2 +-
 mwparserfromhell/nodes/template.py         | 2 +-
 mwparserfromhell/nodes/text.py             | 2 +-
 mwparserfromhell/nodes/wikilink.py         | 2 +-
 mwparserfromhell/parser/__init__.py        | 2 +-
 mwparserfromhell/parser/builder.py         | 2 +-
 mwparserfromhell/parser/contexts.py        | 2 +-
 mwparserfromhell/parser/tokenizer.py       | 2 +-
 mwparserfromhell/parser/tokens.py          | 2 +-
 mwparserfromhell/smart_list.py             | 2 +-
 mwparserfromhell/string_mixin.py           | 2 +-
 mwparserfromhell/utils.py                  | 2 +-
 mwparserfromhell/wikicode.py               | 2 +-
 setup.py                                   | 2 +-
 25 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/LICENSE b/LICENSE
index 49b719e..413f1c4 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/docs/conf.py b/docs/conf.py
index cff089b..9fa1e02 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -42,7 +42,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'mwparserfromhell'
-copyright = u'2012 Ben Kurtovic'
+copyright = u'2012, 2013 Ben Kurtovic'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 4f73a0e..e18000b 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 from __future__ import unicode_literals
 
 __author__ = "Ben Kurtovic"
-__copyright__ = "Copyright (C) 2012 Ben Kurtovic"
+__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic"
 __license__ = "MIT License"
 __version__ = "0.2.dev"
 __email__ = "ben.kurtovic@verizon.net"
diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py
index 86a8746..faaa0b2 100644
--- a/mwparserfromhell/nodes/__init__.py
+++ b/mwparserfromhell/nodes/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py
index 918fac6..06facb4 100644
--- a/mwparserfromhell/nodes/argument.py
+++ b/mwparserfromhell/nodes/argument.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py
index 3d06261..b34c29e 100644
--- a/mwparserfromhell/nodes/comment.py
+++ b/mwparserfromhell/nodes/comment.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py
index 2ce4bb1..e860f01 100644
--- a/mwparserfromhell/nodes/extras/__init__.py
+++ b/mwparserfromhell/nodes/extras/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py
index 648bca0..ebb65ab 100644
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py
index 8c5e654..c1c10a0 100644
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py
index 8f389d3..f001234 100644
--- a/mwparserfromhell/nodes/heading.py
+++ b/mwparserfromhell/nodes/heading.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index a3c6079..221040b 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index 5873a49..eaf2b6e 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 08ab4a5..e34ba7a 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py
index 783d8eb..06e5144 100644
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py
index 73f2a8d..f880016 100644
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 5baa687..074b9ba 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 61a8209..2cd7831 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 9d41870..b65946c 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 455079a..a2b405c 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index d23810e..b11ca15 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 5df6a0e..625307f 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 075ae03..d7a0749 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index 41a2044..83264e2 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 2c532f5..8d8ebe2 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/setup.py b/setup.py
index cc034e5..445473e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal

From d6f2723a06c45d92e478cffeedf3ce2c4be21a43 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 24 Jan 2013 03:07:36 -0500
Subject: [PATCH 059/180] Fix safety checks on template params in some odd
 cases (closes #24).

Also, fix parsing of wikilinks in both tokenizers such that newlines
in any location within the title are an automatic failure.
---
 mwparserfromhell/parser/tokenizer.c  | 57 ++++++++++++++++++++++++++----------
 mwparserfromhell/parser/tokenizer.h  |  1 +
 mwparserfromhell/parser/tokenizer.py | 16 ++++++----
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 40ec723..09649a7 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1144,17 +1144,24 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         Tokenizer_fail_route(self);
         return;
     }
-    if (context & (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE)) {
-        if (data == *"{" || data == *"}" || data == *"[" || data == *"]") {
+    if (context & LC_WIKILINK_TITLE) {
+        if (data == *"]" || data == *"{")
+            self->topstack->context |= LC_FAIL_NEXT;
+        else if (data == *"\n" || data == *"[" || data == *"}")
+            Tokenizer_fail_route(self);
+        return;
+    }
+    if (context & LC_TEMPLATE_NAME) {
+        if (data == *"{" || data == *"}" || data == *"[") {
             self->topstack->context |= LC_FAIL_NEXT;
             return;
         }
-        if (data == *"|") {
-            if (context & LC_FAIL_ON_TEXT) {
-                self->topstack->context ^= LC_FAIL_ON_TEXT;
-                return;
-            }
+        if (data == *"]") {
+            Tokenizer_fail_route(self);
+            return;
         }
+        if (data == *"|")
+            return;
     }
     else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
         if (context & LC_FAIL_ON_EQUALS) {
@@ -1210,6 +1217,28 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 }
 
 /*
+    Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used
+    when we preserve a context but previous data becomes invalid, like when
+    moving between template parameters.
+*/
+static void
+Tokenizer_reset_safety_checks(Tokenizer* self)
+{
+    static int checks[] = {
+        LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE,
+        LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0};
+    int context = self->topstack->context, i = 0, this;
+    while (1) {
+        this = checks[i];
+        if (!this)
+            return;
+        if (context & this)
+            self->topstack->context ^= this;
+        i++;
+    }
+}
+
+/*
     Parse the wikicode string, using context for when to stop.
 */
 static PyObject*
@@ -1274,6 +1303,7 @@ Tokenizer_parse(Tokenizer* self, int context)
                 self->topstack->context ^= LC_FAIL_NEXT;
         }
         else if (this == *"|" && this_context & LC_TEMPLATE) {
+            Tokenizer_reset_safety_checks(self);
             if (Tokenizer_handle_template_param(self))
                 return NULL;
         }
@@ -1294,15 +1324,10 @@ Tokenizer_parse(Tokenizer* self, int context)
             Tokenizer_write_text(self, this);
         }
         else if (this == next && next == *"[") {
-            if (!(this_context & LC_WIKILINK_TITLE)) {
-                if (Tokenizer_parse_wikilink(self))
-                    return NULL;
-                if (self->topstack->context & LC_FAIL_NEXT)
-                    self->topstack->context ^= LC_FAIL_NEXT;
-            }
-            else {
-                Tokenizer_write_text(self, this);
-            }
+            if (Tokenizer_parse_wikilink(self))
+                return NULL;
+            if (self->topstack->context & LC_FAIL_NEXT)
+                self->topstack->context ^= LC_FAIL_NEXT;
         }
         else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
             if (Tokenizer_handle_wikilink_separator(self))
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index dffa0fb..3293a8f 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -206,6 +206,7 @@ static int Tokenizer_really_parse_entity(Tokenizer*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
 static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
+static void Tokenizer_reset_safety_checks(Tokenizer*);
 static PyObject* Tokenizer_parse(Tokenizer*, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index a2b405c..eead131 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -213,17 +213,21 @@ class Tokenizer(object):
         self._write_all(argument)
         self._write(tokens.ArgumentClose())
 
-    def _verify_safe(self, unsafes):
+    def _verify_safe(self, unsafes, strip=True):
         """Verify that there are no unsafe characters in the current stack.
 
         The route will be failed if the name contains any element of *unsafes*
-        in it (not merely at the beginning or end). This is used when parsing a
-        template name or parameter key, which cannot contain newlines.
+        in it. This is used when parsing template names, parameter keys, and so
+        on, which cannot contain newlines and some other characters. If *strip*
+        is ``True``, the text will be stripped of whitespace, since this is
+        allowed at the ends of certain elements but not between text.
         """
         self._push_textbuffer()
         if self._stack:
             text = [tok for tok in self._stack if isinstance(tok, tokens.Text)]
-            text = "".join([token.text for token in text]).strip()
+            text = "".join([token.text for token in text])
+            if strip:
+                text = text.strip()
             if text and any([unsafe in text for unsafe in unsafes]):
                 self._fail_route()
 
@@ -291,7 +295,7 @@ class Tokenizer(object):
 
     def _handle_wikilink_separator(self):
         """Handle the separator between a wikilink's title and its text."""
-        self._verify_safe(["\n", "{", "}", "[", "]"])
+        self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
         self._context ^= contexts.WIKILINK_TITLE
         self._context |= contexts.WIKILINK_TEXT
         self._write(tokens.WikilinkSeparator())
@@ -299,7 +303,7 @@ class Tokenizer(object):
     def _handle_wikilink_end(self):
         """Handle the end of a wikilink at the head of the string."""
         if self._context & contexts.WIKILINK_TITLE:
-            self._verify_safe(["\n", "{", "}", "[", "]"])
+            self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
         self._head += 1
         return self._pop()
 

From 357e421fe87c21c9cb8252333d4bf5f5d1d26dfa Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 24 Jan 2013 03:31:14 -0500
Subject: [PATCH 060/180] Text nodes should now appear a bit better in tree
 form.

---
 mwparserfromhell/nodes/text.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py
index 06e5144..60ba847 100644
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -39,6 +39,9 @@ class Text(Node):
     def __strip__(self, normalize, collapse):
         return self
 
+    def __showtree__(self, write, get, mark):
+        write(str(self).encode("unicode_escape").decode("utf8"))
+
     @property
     def value(self):
         """The actual text itself."""

From 28b124a96c8685c8c94eb51d8d29d508e8fe198c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 2 Feb 2013 23:10:58 -0500
Subject: [PATCH 061/180] Restarting work on unit tests with a test covering
 doc examples.

---
 README.rst                 |   4 +-
 mwparserfromhell/compat.py |   2 +
 tests/test_docs.py         | 117 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_parameter.py    | 119 ---------------------------------------------
 tests/test_parser.py       |  63 ------------------------
 tests/test_template.py     | 106 ----------------------------------------
 6 files changed, 122 insertions(+), 289 deletions(-)
 create mode 100644 tests/test_docs.py
 delete mode 100644 tests/test_parameter.py
 delete mode 100644 tests/test_parser.py
 delete mode 100644 tests/test_template.py

diff --git a/README.rst b/README.rst
index 77f12c7..3901103 100644
--- a/README.rst
+++ b/README.rst
@@ -124,7 +124,9 @@ following code (via the API_)::
     import mwparserfromhell
     API_URL = "http://en.wikipedia.org/w/api.php"
     def parse(title):
-        raw = urllib.urlopen(API_URL, data).read()
+        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
+                "rvprop": "content", "format": "json", "titles": title}
+        raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read()
         res = json.loads(raw)
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
         return mwparserfromhell.parse(text)
diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index a1b6b8f..576c2c5 100755
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -18,6 +18,7 @@ if py3k:
     basestring = str
     maxsize = sys.maxsize
     import html.entities as htmlentities
+    from io import StringIO
 
 else:
     bytes = str
@@ -25,5 +26,6 @@ else:
     basestring = basestring
     maxsize = sys.maxint
     import htmlentitydefs as htmlentities
+    from StringIO import StringIO
 
 del sys
diff --git a/tests/test_docs.py b/tests/test_docs.py
new file mode 100644
index 0000000..5ec25e1
--- /dev/null
+++ b/tests/test_docs.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function, unicode_literals
+import json
+import unittest
+import urllib
+
+import mwparserfromhell
+from mwparserfromhell.compat import py3k, str, StringIO
+
+class TestDocs(unittest.TestCase):
+    def assertPrint(self, input, output):
+        """Assertion check that *input*, when printed, produces *output*."""
+        buff = StringIO()
+        print(input, end="", file=buff)
+        buff.seek(0)
+        self.assertEqual(buff.read(), output)
+
+    def test_readme_1(self):
+        text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
+        wikicode = mwparserfromhell.parse(text)
+        self.assertPrint(wikicode,
+                         "I has a template! {{foo|bar|baz|eggs=spam}} See it?")
+        templates = wikicode.filter_templates()
+        if py3k:
+            self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']")
+        else:
+            self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']")
+        template = templates[0]
+        self.assertPrint(template.name, "foo")
+        if py3k:
+            self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']")
+        else:
+            self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']")
+        self.assertPrint(template.get(1).value, "bar")
+        self.assertPrint(template.get("eggs").value, "spam")
+
+    def test_readme_2(self):
+        code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
+        if py3k:
+            self.assertPrint(code.filter_templates(),
+                             "['{{foo|this {{includes a|template}}}}']")
+        else:
+            self.assertPrint(code.filter_templates(),
+                             "[u'{{foo|this {{includes a|template}}}}']")
+        foo = code.filter_templates()[0]
+        self.assertPrint(foo.get(1).value, "this {{includes a|template}}")
+        self.assertPrint(foo.get(1).value.filter_templates()[0],
+                         "{{includes a|template}}")
+        self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value,
+                         "template")
+
+    def test_readme_3(self):
+        text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
+        temps = mwparserfromhell.parse(text).filter_templates(recursive=True)
+        if py3k:
+            res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']"
+        else:
+            res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']"
+        self.assertPrint(temps, res)
+
+    def test_readme_4(self):
+        text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
+        code = mwparserfromhell.parse(text)
+        for template in code.filter_templates():
+            if template.name == "cleanup" and not template.has_param("date"):
+                template.add("date", "July 2012")
+        res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}"
+        self.assertPrint(code, res)
+        code.replace("{{uncategorized}}", "{{bar-stub}}")
+        res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"
+        self.assertPrint(code, res)
+        if py3k:
+            res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']"
+        else:
+            res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']"
+        self.assertPrint(code.filter_templates(), res)
+        text = str(code)
+        res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"
+        self.assertPrint(text, res)
+        self.assertEqual(text, code)
+
+    def test_readme_5(self):
+        url1 = "http://en.wikipedia.org/w/api.php"
+        url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw"
+        title = "Test"
+        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
+                "rvprop": "content", "format": "json", "titles": title}
+        raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
+        res = json.loads(raw)
+        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
+        actual = mwparserfromhell.parse(text)
+        expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
+        self.assertEqual(actual, expected)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_parameter.py b/tests/test_parameter.py
deleted file mode 100644
index 2d5515b..0000000
--- a/tests/test_parameter.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# -*- coding: utf-8  -*-
-#
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import unittest
-
-from mwparserfromhell.parameter import Parameter
-from mwparserfromhell.template import Template
-
-class TestParameter(unittest.TestCase):
-    def setUp(self):
-        self.name = "foo"
-        self.value1 = "bar"
-        self.value2 = "{{spam}}"
-        self.value3 = "bar{{spam}}"
-        self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here"
-        self.templates2 = [Template("spam")]
-        self.templates3 = [Template("spam")]
-        self.templates4 = [Template("eggs", [Parameter("1", "spam"),
-                                             Parameter("baz", "buz")]),
-                           Template("goes")]
-
-    def test_construct(self):
-        Parameter(self.name, self.value1)
-        Parameter(self.name, self.value2, self.templates2)
-        Parameter(name=self.name, value=self.value3)
-        Parameter(name=self.name, value=self.value4, templates=self.templates4)
-
-    def test_name(self):
-        params = [
-            Parameter(self.name, self.value1),
-            Parameter(self.name, self.value2, self.templates2),
-            Parameter(name=self.name, value=self.value3),
-            Parameter(name=self.name, value=self.value4,
-                      templates=self.templates4)
-        ]
-        for param in params:
-            self.assertEqual(param.name, self.name)
-
-    def test_value(self):
-        tests = [
-            (Parameter(self.name, self.value1), self.value1),
-            (Parameter(self.name, self.value2, self.templates2), self.value2),
-            (Parameter(name=self.name, value=self.value3), self.value3),
-            (Parameter(name=self.name, value=self.value4,
-                       templates=self.templates4), self.value4)
-        ]
-        for param, correct in tests:
-            self.assertEqual(param.value, correct)
-
-    def test_templates(self):
-        tests = [
-            (Parameter(self.name, self.value3, self.templates3),
-             self.templates3),
-            (Parameter(name=self.name, value=self.value4,
-                       templates=self.templates4), self.templates4)
-        ]
-        for param, correct in tests:
-            self.assertEqual(param.templates, correct)
-
-    def test_magic(self):
-        params = [Parameter(self.name, self.value1),
-                  Parameter(self.name, self.value2, self.templates2),
-                  Parameter(self.name, self.value3, self.templates3),
-                  Parameter(self.name, self.value4, self.templates4)]
-        for param in params:
-            self.assertEqual(repr(param), repr(param.value))
-            self.assertEqual(str(param), str(param.value))
-            self.assertIs(param < "eggs", param.value < "eggs")
-            self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}")
-            self.assertIs(param == "bar", param.value == "bar")
-            self.assertIs(param != "bar", param.value != "bar")
-            self.assertIs(param > "eggs", param.value > "eggs")
-            self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}")
-            self.assertEquals(bool(param), bool(param.value))
-            self.assertEquals(len(param), len(param.value))
-            self.assertEquals(list(param), list(param.value))
-            self.assertEquals(param[2], param.value[2])
-            self.assertEquals(list(reversed(param)),
-                              list(reversed(param.value)))
-            self.assertIs("bar" in param, "bar" in param.value)
-            self.assertEquals(param + "test", param.value + "test")
-            self.assertEquals("test" + param, "test" + param.value)
-            # add param
-            # add template left
-            # add template right
-
-            self.assertEquals(param * 3, Parameter(param.name, param.value * 3,
-                                                   param.templates * 3))
-            self.assertEquals(3 * param, Parameter(param.name, 3 * param.value,
-                                                   3 * param.templates))
-
-            # add param inplace
-            # add template implace
-            # add str inplace
-            # multiply int inplace
-            self.assertIsInstance(param, Parameter)
-            self.assertIsInstance(param.value, str)
-
-if __name__ == "__main__":
-    unittest.main(verbosity=2)
diff --git a/tests/test_parser.py b/tests/test_parser.py
deleted file mode 100644
index 0c989b8..0000000
--- a/tests/test_parser.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# -*- coding: utf-8  -*-
-#
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import unittest
-
-from mwparserfromhell.parameter import Parameter
-from mwparserfromhell.parser import Parser
-from mwparserfromhell.template import Template
-
-TESTS = [
-    ("", []),
-    ("abcdef ghijhk", []),
-    ("abc{this is not a template}def", []),
-    ("neither is {{this one}nor} {this one {despite}} containing braces", []),
-    ("this is an acceptable {{template}}", [Template("template")]),
-    ("{{multiple}}{{templates}}", [Template("multiple"),
-                                   Template("templates")]),
-    ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]),
-    ("{{{no templates here}}}", []),
-    ("{ {{templates here}}}", [Template("templates here")]),
-    ("{{{{I do not exist}}}}", []),
-    ("{{foo|bar|baz|eggs=spam}}",
-     [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"),
-                       Parameter("eggs", "spam")])]),
-    ("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}",
-     [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"),
-                           Parameter("2", "pqr"), Parameter("st", "uv"),
-                           Parameter("3", "wx"), Parameter("4", "yz")])]),
-    ("{{this has a|{{template}}|inside of it}}",
-     [Template("this has a", [Parameter("1", "{{template}}",
-                                        [Template("template")]),
-                              Parameter("2", "inside of it")])]),
-    ("{{{{I exist}} }}", [Template("I exist", [] )]),
-    ("{{}}")
-]
-
-class TestParser(unittest.TestCase):
-    def test_parse(self):
-        parser = Parser()
-        for unparsed, parsed in TESTS:
-            self.assertEqual(parser.parse(unparsed), parsed)
-
-if __name__ == "__main__":
-    unittest.main(verbosity=2)
diff --git a/tests/test_template.py b/tests/test_template.py
deleted file mode 100644
index b006033..0000000
--- a/tests/test_template.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# -*- coding: utf-8  -*-
-#
-# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-from itertools import permutations
-import unittest
-
-from mwparserfromhell.parameter import Parameter
-from mwparserfromhell.template import Template
-
-class TestTemplate(unittest.TestCase):
-    def setUp(self):
-        self.name = "foo"
-        self.bar = Parameter("1", "bar")
-        self.baz = Parameter("2", "baz")
-        self.eggs = Parameter("eggs", "spam")
-        self.params = [self.bar, self.baz, self.eggs]
-
-    def test_construct(self):
-        Template(self.name)
-        Template(self.name, self.params)
-        Template(name=self.name)
-        Template(name=self.name, params=self.params)
-
-    def test_name(self):
-        templates = [
-            Template(self.name),
-            Template(self.name, self.params),
-            Template(name=self.name),
-            Template(name=self.name, params=self.params)
-        ]
-        for template in templates:
-            self.assertEqual(template.name, self.name)
-
-    def test_params(self):
-        for template in (Template(self.name), Template(name=self.name)):
-            self.assertEqual(template.params, [])
-        for template in (Template(self.name, self.params),
-                         Template(name=self.name, params=self.params)):
-            self.assertEqual(template.params, self.params)
-
-    def test_getitem(self):
-        template = Template(name=self.name, params=self.params)
-        self.assertIs(template[0], self.bar)
-        self.assertIs(template[1], self.baz)
-        self.assertIs(template[2], self.eggs)
-        self.assertIs(template["1"], self.bar)
-        self.assertIs(template["2"], self.baz)
-        self.assertIs(template["eggs"], self.eggs)
-
-    def test_render(self):
-        tests = [
-            (Template(self.name), "{{foo}}"),
-            (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}")
-        ]
-        for template, rendered in tests:
-            self.assertEqual(template.render(), rendered)
-
-    def test_repr(self):
-        correct1=  'Template(name=foo, params={})'
-        correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})'
-        tests = [(Template(self.name), correct1),
-                 (Template(self.name, self.params), correct2)]
-        for template, correct in tests:
-            self.assertEqual(repr(template), correct)
-            self.assertEqual(str(template), correct)
-
-    def test_cmp(self):
-        tmp1 = Template(self.name)
-        tmp2 = Template(name=self.name)
-        tmp3 = Template(self.name, [])
-        tmp4 = Template(name=self.name, params=[])
-        tmp5 = Template(self.name, self.params)
-        tmp6 = Template(name=self.name, params=self.params)
-
-        for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2):
-            self.assertEqual(tmpA, tmpB)
-
-        for tmpA, tmpB in permutations((tmp5, tmp6), 2):
-            self.assertEqual(tmpA, tmpB)
-
-        for tmpA in (tmp5, tmp6):
-            for tmpB in (tmp1, tmp2, tmp3, tmp4):
-                self.assertNotEqual(tmpA, tmpB)
-                self.assertNotEqual(tmpB, tmpA)
-
-if __name__ == "__main__":
-    unittest.main(verbosity=2)

From fb7567d6d0e7974beac39780c741fba3e50693b9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 2 Feb 2013 23:44:15 -0500
Subject: [PATCH 062/180] Some empty testcases.

---
 .gitignore                |  1 +
 tests/_test_tokenizer.py  | 28 ++++++++++++++++++++++++++++
 tests/test_builder.py     | 29 +++++++++++++++++++++++++++++
 tests/test_ctokenizer.py  | 34 ++++++++++++++++++++++++++++++++++
 tests/test_parser.py      | 29 +++++++++++++++++++++++++++++
 tests/test_pytokenizer.py | 34 ++++++++++++++++++++++++++++++++++
 tests/test_tokens.py      | 29 +++++++++++++++++++++++++++++
 7 files changed, 184 insertions(+)
 create mode 100644 tests/_test_tokenizer.py
 create mode 100644 tests/test_builder.py
 create mode 100644 tests/test_ctokenizer.py
 create mode 100644 tests/test_parser.py
 create mode 100644 tests/test_pytokenizer.py
 create mode 100644 tests/test_tokens.py

diff --git a/.gitignore b/.gitignore
index d70b37d..ec4e8ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.pyc
+*.so
 *.egg
 *.egg-info
 .DS_Store
diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
new file mode 100644
index 0000000..29f4e37
--- /dev/null
+++ b/tests/_test_tokenizer.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+class TestTokenizer():
+    def tokenize(self, text):
+        return self.tokenizer().tokenize(text)
+
+    def test_basic(self):
+        self.assertEqual(1, 1)
diff --git a/tests/test_builder.py b/tests/test_builder.py
new file mode 100644
index 0000000..e38e683
--- /dev/null
+++ b/tests/test_builder.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import unittest
+
+class TestBuilder(unittest.TestCase):
+    pass
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
new file mode 100644
index 0000000..e5a7aef
--- /dev/null
+++ b/tests/test_ctokenizer.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import unittest
+
+from _test_tokenizer import TestTokenizer
+
+class TestCTokenizer(unittest.TestCase, TestTokenizer):
+    @classmethod
+    def setUpClass(cls):
+        from mwparserfromhell.parser._tokenizer import CTokenizer
+        cls.tokenizer = CTokenizer
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_parser.py b/tests/test_parser.py
new file mode 100644
index 0000000..3f9b2e6
--- /dev/null
+++ b/tests/test_parser.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import unittest
+
+class TestParser(unittest.TestCase):
+    pass
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
new file mode 100644
index 0000000..01855f7
--- /dev/null
+++ b/tests/test_pytokenizer.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import unittest
+
+from _test_tokenizer import TestTokenizer
+
+class TestPyTokenizer(unittest.TestCase, TestTokenizer):
+    @classmethod
+    def setUpClass(cls):
+        from mwparserfromhell.parser.tokenizer import Tokenizer
+        cls.tokenizer = Tokenizer
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
new file mode 100644
index 0000000..0e7f87b
--- /dev/null
+++ b/tests/test_tokens.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import unittest
+
+class TestTokens(unittest.TestCase):
+    pass
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From 4636fbeb4a46e76b5d04a9c439758ed042eea7eb Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Feb 2013 02:10:36 -0500
Subject: [PATCH 063/180] Built an infrastructure for loading and running
 tokenizer tests.

---
 tests/_test_tokenizer.py  | 74 +++++++++++++++++++++++++++++++++++++++++++----
 tests/test_ctokenizer.py  |  4 +--
 tests/test_pytokenizer.py |  4 +--
 tests/tokenizer/text.test | 11 +++++++
 4 files changed, 84 insertions(+), 9 deletions(-)
 create mode 100644 tests/tokenizer/text.test

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 29f4e37..1efafd9 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -20,9 +20,73 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-class TestTokenizer():
-    def tokenize(self, text):
-        return self.tokenizer().tokenize(text)
+from __future__ import print_function, unicode_literals
+from os import listdir, path
 
-    def test_basic(self):
-        self.assertEqual(1, 1)
+from mwparserfromhell.parser import tokens
+
+class _TestParseError(Exception):
+    """Raised internally when a test could not be parsed."""
+    pass
+
+
+class TokenizerTestCase(object):
+    @classmethod
+    def _build_test_method(cls, funcname, data):
+        def inner(self):
+            actual = self.tokenizer().tokenize(data["input"])
+            self.assertEqual(actual, data["output"])
+        inner.__name__ = funcname.encode("utf8")
+        inner.__doc__ = data["label"]
+        return inner
+
+    @classmethod
+    def _load_tests(cls, filename, text):
+        tests = text.split("\n---\n")
+        for test in tests:
+            data = {"name": "", "label": "", "input": "", "output": []}
+            try:
+                for line in test.strip().splitlines():
+                    if line.startswith("name:"):
+                        data["name"] = line[len("name:"):].strip()
+                    elif line.startswith("label:"):
+                        data["label"] = line[len("label:"):].strip()
+                    elif line.startswith("input:"):
+                        raw = line[len("input:"):].strip()
+                        if raw[0] == '"' and raw[-1] == '"':
+                            raw = raw[1:-1]
+                        data["input"] = raw.decode("unicode_escape")
+                    elif line.startswith("output:"):
+                        raw = line[len("output:"):].strip()
+                        data["output"] = eval(raw, vars(tokens))
+            except _TestParseError:
+                if data["name"]:
+                    error = "Could not parse test {0} in {1}"
+                    print(error.format(data["name"], filename))
+                else:
+                    print("Could not parse a test in {0}".format(filename))
+                continue
+            if not data["name"]:
+                error = "A test in {0} was ignored because it lacked a name"
+                print(error.format(filename))
+                continue
+            if not data["input"] or not data["output"]:
+                error = "Test {0} in {1} was ignored because it lacked an input or an output"
+                print(error.format(data["name"], filename))
+                continue
+            funcname = "test_" + filename + "_" + data["name"]
+            meth = cls._build_test_method(funcname, data)
+            setattr(cls, funcname, meth)
+
+    @classmethod
+    def build(cls):
+        directory = path.join(path.dirname(__file__), "tokenizer")
+        extension = ".test"
+        for filename in listdir(directory):
+            if not filename.endswith(extension):
+                continue
+            with open(path.join(directory, filename), "r") as fp:
+                text = fp.read().decode("utf8")
+                cls._load_tests(filename[:0-len(extension)], text)
+
+TokenizerTestCase.build()
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index e5a7aef..7d3ffd7 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -22,9 +22,9 @@
 
 import unittest
 
-from _test_tokenizer import TestTokenizer
+from _test_tokenizer import TokenizerTestCase
 
-class TestCTokenizer(unittest.TestCase, TestTokenizer):
+class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         from mwparserfromhell.parser._tokenizer import CTokenizer
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 01855f7..f739726 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -22,9 +22,9 @@
 
 import unittest
 
-from _test_tokenizer import TestTokenizer
+from _test_tokenizer import TokenizerTestCase
 
-class TestPyTokenizer(unittest.TestCase, TestTokenizer):
+class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         from mwparserfromhell.parser.tokenizer import Tokenizer
diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test
new file mode 100644
index 0000000..8d97412
--- /dev/null
+++ b/tests/tokenizer/text.test
@@ -0,0 +1,11 @@
+name:   basic
+label:  sanity check for basic text parsing, no gimmicks
+input:  "foobar"
+output: [Text(text="foobar")]
+
+---
+
+name:   basic2
+label:  slightly more complex text parsing, with newlines
+input:  "This is a line of text.\nThis is another line of text."
+output: [Text(text="This is a line of text.\nThis is another line of text.")]

From 357b6dc4470f724eac6a19bef54b27761e6a492f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Feb 2013 02:33:31 -0500
Subject: [PATCH 064/180] Make unit tests work in Python 3; add a unicode text
 test.

---
 tests/_test_tokenizer.py  | 18 ++++++++++++------
 tests/tokenizer/text.test | 13 ++++++++++---
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 1efafd9..98d9434 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -23,6 +23,7 @@
 from __future__ import print_function, unicode_literals
 from os import listdir, path
 
+from mwparserfromhell.compat import py3k
 from mwparserfromhell.parser import tokens
 
 class _TestParseError(Exception):
@@ -36,12 +37,14 @@ class TokenizerTestCase(object):
         def inner(self):
             actual = self.tokenizer().tokenize(data["input"])
             self.assertEqual(actual, data["output"])
-        inner.__name__ = funcname.encode("utf8")
+        if not py3k:
+            inner.__name__ = funcname.encode("utf8")
         inner.__doc__ = data["label"]
         return inner
 
     @classmethod
     def _load_tests(cls, filename, text):
+        counter = 1
         tests = text.split("\n---\n")
         for test in tests:
             data = {"name": "", "label": "", "input": "", "output": []}
@@ -55,7 +58,7 @@ class TokenizerTestCase(object):
                         raw = line[len("input:"):].strip()
                         if raw[0] == '"' and raw[-1] == '"':
                             raw = raw[1:-1]
-                        data["input"] = raw.decode("unicode_escape")
+                        data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape")
                     elif line.startswith("output:"):
                         raw = line[len("output:"):].strip()
                         data["output"] = eval(raw, vars(tokens))
@@ -74,9 +77,10 @@ class TokenizerTestCase(object):
                 error = "Test {0} in {1} was ignored because it lacked an input or an output"
                 print(error.format(data["name"], filename))
                 continue
-            funcname = "test_" + filename + "_" + data["name"]
-            meth = cls._build_test_method(funcname, data)
-            setattr(cls, funcname, meth)
+            fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
+            meth = cls._build_test_method(fname, data)
+            setattr(cls, fname, meth)
+            counter += 1
 
     @classmethod
     def build(cls):
@@ -86,7 +90,9 @@ class TokenizerTestCase(object):
             if not filename.endswith(extension):
                 continue
             with open(path.join(directory, filename), "r") as fp:
-                text = fp.read().decode("utf8")
+                text = fp.read()
+                if not py3k:
+                    text = text.decode("utf8")
                 cls._load_tests(filename[:0-len(extension)], text)
 
 TokenizerTestCase.build()
diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test
index 8d97412..eb5b9b4 100644
--- a/tests/tokenizer/text.test
+++ b/tests/tokenizer/text.test
@@ -5,7 +5,14 @@ output: [Text(text="foobar")]
 
 ---
 
-name:   basic2
+name:   newlines
 label:  slightly more complex text parsing, with newlines
-input:  "This is a line of text.\nThis is another line of text."
-output: [Text(text="This is a line of text.\nThis is another line of text.")]
+input:  "This is a line of text.\nThis is another line of text.\nThis is another."
+output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")]
+
+---
+
+name:   unicode
+label:  ensure unicode data is handled properly
+input:  "Thís ís å sëñtënce with diœcritiçs."
+output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")]

From ecfb2c628f742c7c703fe67e8a0f7b5a51d62570 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Feb 2013 14:16:17 -0500
Subject: [PATCH 065/180] Another test; handle errors when reading output line
 better.

---
 tests/_test_tokenizer.py  | 16 ++++++++++------
 tests/tokenizer/text.test |  7 +++++++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 98d9434..bafb593 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -58,23 +58,27 @@ class TokenizerTestCase(object):
                         raw = line[len("input:"):].strip()
                         if raw[0] == '"' and raw[-1] == '"':
                             raw = raw[1:-1]
-                        data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape")
+                        raw = raw.encode("raw_unicode_escape")
+                        data["input"] = raw.decode("unicode_escape")
                     elif line.startswith("output:"):
                         raw = line[len("output:"):].strip()
-                        data["output"] = eval(raw, vars(tokens))
+                        try:
+                            data["output"] = eval(raw, vars(tokens))
+                        except Exception:
+                            raise _TestParseError()
             except _TestParseError:
                 if data["name"]:
-                    error = "Could not parse test {0} in {1}"
+                    error = "Could not parse test '{0}' in '{1}'"
                     print(error.format(data["name"], filename))
                 else:
-                    print("Could not parse a test in {0}".format(filename))
+                    print("Could not parse a test in '{0}'".format(filename))
                 continue
             if not data["name"]:
-                error = "A test in {0} was ignored because it lacked a name"
+                error = "A test in '{0}' was ignored because it lacked a name"
                 print(error.format(filename))
                 continue
             if not data["input"] or not data["output"]:
-                error = "Test {0} in {1} was ignored because it lacked an input or an output"
+                error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output"
                 print(error.format(data["name"], filename))
                 continue
             fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test
index eb5b9b4..77d5f50 100644
--- a/tests/tokenizer/text.test
+++ b/tests/tokenizer/text.test
@@ -16,3 +16,10 @@ name:   unicode
 label:  ensure unicode data is handled properly
 input:  "Thís ís å sëñtënce with diœcritiçs."
 output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")]
+
+---
+
+name:   unicode2
+label:  additional unicode check for non-BMP codepoints
+input:  "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰"
+output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]

From eb1bd6b281ffe5e193825da4f36cdf1cf8b49767 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Feb 2013 14:38:34 -0500
Subject: [PATCH 066/180] Add some basic tests for templates; adjust error
 messages again.

---
 tests/_test_tokenizer.py       | 13 +++++++------
 tests/tokenizer/templates.test | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 6 deletions(-)
 create mode 100644 tests/tokenizer/templates.test

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index bafb593..2571692 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -64,14 +64,15 @@ class TokenizerTestCase(object):
                         raw = line[len("output:"):].strip()
                         try:
                             data["output"] = eval(raw, vars(tokens))
-                        except Exception:
-                            raise _TestParseError()
-            except _TestParseError:
+                        except Exception as err:
+                            raise _TestParseError(err)
+            except _TestParseError as err:
                 if data["name"]:
-                    error = "Could not parse test '{0}' in '{1}'"
-                    print(error.format(data["name"], filename))
+                    error = "Could not parse test '{0}' in '{1}':\n\t{2}"
+                    print(error.format(data["name"], filename, err))
                 else:
-                    print("Could not parse a test in '{0}'".format(filename))
+                    error = "Could not parse a test in '{0}':\n\t{1}"
+                    print(error.format(filename, err))
                 continue
             if not data["name"]:
                 error = "A test in '{0}' was ignored because it lacked a name"
diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test
new file mode 100644
index 0000000..23ac38f
--- /dev/null
+++ b/tests/tokenizer/templates.test
@@ -0,0 +1,32 @@
+name:   no_params
+label:  simplest type of template
+input:  "{{template}}"
+output: [TemplateOpen(), Text(text="template"), TemplateClose()]
+
+---
+
+name:   one_param_unnamed
+label:  basic template with one unnamed parameter
+input:  "{{foo|bar}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()]
+
+---
+
+name:   one_param_named
+label:  basic template with one named parameter
+input:  "{{foo|bar=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   multiple_unnamed_params
+label:  basic template with multiple unnamed parameters
+input:  "{{foo|bar|baz|biz|buzz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()]
+
+---
+
+name:   multiple_named_params
+label:  basic template with multiple named parameters
+input:  "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]

From 713b83a4d94e05bf907158aa6a5d98f7132d998c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Feb 2013 17:41:55 -0500
Subject: [PATCH 067/180] Added a metric ton of template tests; adjustments;
 docstrings.

---
 README.rst                     |   3 +-
 tests/_test_tokenizer.py       |  22 +++-
 tests/test_ctokenizer.py       |   1 +
 tests/test_docs.py             |   6 +
 tests/test_pytokenizer.py      |   1 +
 tests/tokenizer/templates.test | 285 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 314 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 3901103..90e896f 100644
--- a/README.rst
+++ b/README.rst
@@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell``
     cd mwparserfromhell
     python setup.py install
 
-You can run the comprehensive unit testing suite with ``python setup.py test``.
+You can run the comprehensive unit testing suite with
+``python setup.py test -q``.
 
 Usage
 -----
diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 2571692..bef7569 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -32,8 +32,20 @@ class _TestParseError(Exception):
 
 
 class TokenizerTestCase(object):
+    """A base test case for tokenizers, whose tests are loaded dynamically.
+
+    Subclassed along with unittest.TestCase to form TestPyTokenizer and
+    TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer'
+    directory.
+    """
     @classmethod
     def _build_test_method(cls, funcname, data):
+        """Create and return a method to be treated as a test case method.
+
+        *data* is a dict containing multiple keys: the *input* text to be
+        tokenized, the expected list of tokens as *output*, and an optional
+        *label* for the method's docstring.
+        """
         def inner(self):
             actual = self.tokenizer().tokenize(data["input"])
             self.assertEqual(actual, data["output"])
@@ -44,8 +56,10 @@ class TokenizerTestCase(object):
 
     @classmethod
     def _load_tests(cls, filename, text):
-        counter = 1
+        """Load all tests in *text* from the file *filename*."""
         tests = text.split("\n---\n")
+        counter = 1
+        digits = len(str(len(tests)))
         for test in tests:
             data = {"name": "", "label": "", "input": "", "output": []}
             try:
@@ -79,16 +93,18 @@ class TokenizerTestCase(object):
                 print(error.format(filename))
                 continue
             if not data["input"] or not data["output"]:
-                error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output"
+                error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
                 print(error.format(data["name"], filename))
                 continue
-            fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
+            number = str(counter).zfill(digits)
+            fname = "test_{0}{1}_{2}".format(filename, number, data["name"])
             meth = cls._build_test_method(fname, data)
             setattr(cls, fname, meth)
             counter += 1
 
     @classmethod
     def build(cls):
+        """Load and install all tests from the 'tokenizer' directory."""
         directory = path.join(path.dirname(__file__), "tokenizer")
         extension = ".test"
         for filename in listdir(directory):
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 7d3ffd7..86f4787 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -25,6 +25,7 @@ import unittest
 from _test_tokenizer import TokenizerTestCase
 
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
+    """Test cases for the C tokenizer."""
     @classmethod
     def setUpClass(cls):
         from mwparserfromhell.parser._tokenizer import CTokenizer
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 5ec25e1..d99652f 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -29,6 +29,7 @@ import mwparserfromhell
 from mwparserfromhell.compat import py3k, str, StringIO
 
 class TestDocs(unittest.TestCase):
+    """Integration test cases for mwparserfromhell's documentation."""
     def assertPrint(self, input, output):
         """Assertion check that *input*, when printed, produces *output*."""
         buff = StringIO()
@@ -37,6 +38,7 @@ class TestDocs(unittest.TestCase):
         self.assertEqual(buff.read(), output)
 
     def test_readme_1(self):
+        """test a block of example code in the README"""
         text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
         wikicode = mwparserfromhell.parse(text)
         self.assertPrint(wikicode,
@@ -56,6 +58,7 @@ class TestDocs(unittest.TestCase):
         self.assertPrint(template.get("eggs").value, "spam")
 
     def test_readme_2(self):
+        """test a block of example code in the README"""
         code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
         if py3k:
             self.assertPrint(code.filter_templates(),
@@ -71,6 +74,7 @@ class TestDocs(unittest.TestCase):
                          "template")
 
     def test_readme_3(self):
+        """test a block of example code in the README"""
         text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
         temps = mwparserfromhell.parse(text).filter_templates(recursive=True)
         if py3k:
@@ -80,6 +84,7 @@ class TestDocs(unittest.TestCase):
         self.assertPrint(temps, res)
 
     def test_readme_4(self):
+        """test a block of example code in the README"""
         text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
         code = mwparserfromhell.parse(text)
         for template in code.filter_templates():
@@ -101,6 +106,7 @@ class TestDocs(unittest.TestCase):
         self.assertEqual(text, code)
 
     def test_readme_5(self):
+        """test a block of example code in the README; includes a web call"""
         url1 = "http://en.wikipedia.org/w/api.php"
         url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw"
         title = "Test"
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index f739726..4254748 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -25,6 +25,7 @@ import unittest
 from _test_tokenizer import TokenizerTestCase
 
 class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
+    """Test cases for the Python tokenizer."""
     @classmethod
     def setUpClass(cls):
         from mwparserfromhell.parser.tokenizer import Tokenizer
diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test
index 23ac38f..7399022 100644
--- a/tests/tokenizer/templates.test
+++ b/tests/tokenizer/templates.test
@@ -30,3 +30,288 @@ name:   multiple_named_params
 label:  basic template with multiple named parameters
 input:  "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]
+
+---
+
+name:   multiple_mixed_params
+label:  basic template with multiple unnamed/named parameters
+input:  "{{foo|bar=baz|biz|buzz=buff|usr|bin}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()]
+
+---
+
+name:   multiple_mixed_params2
+label:  basic template with multiple unnamed/named parameters in another order
+input:  "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]
+
+---
+
+name:   nested_unnamed_param
+label:  nested template as an unnamed parameter
+input:  "{{foo|{{bar}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_named_param_value
+label:  nested template as a parameter value with a named parameter
+input:  "{{foo|bar={{baz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_named_param_name_and_value
+label:  nested templates as a parameter name and value
+input:  "{{foo|{{bar}}={{baz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start
+label:  nested template at the beginning of a template name
+input:  "{{{{foo}}bar}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()]
+
+---
+
+name:   nested_name_start_unnamed_param
+label:  nested template at the beginning of a template name and as an unnamed parameter
+input:  "{{{{foo}}bar|{{baz}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start_named_param_value
+label:  nested template at the beginning of a template name and as a parameter value with a named parameter
+input:  "{{{{foo}}bar|baz={{biz}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start_named_param_name_and_value
+label:  nested template at the beginning of a template name and as a parameter name and value
+input:  "{{{{foo}}bar|{{baz}}={{biz}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_end
+label:  nested template at the end of a template name
+input:  "{{foo{{bar}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_end_unnamed_param
+label:  nested template at the end of a template name and as an unnamed parameter
+input:  "{{foo{{bar}}|{{baz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_end_named_param_value
+label:  nested template at the end of a template name and as a parameter value with a named parameter
+input:  "{{foo{{bar}}|baz={{biz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_end_named_param_name_and_value
+label:  nested template at the end of a template name and as a parameter name and value
+input:  "{{foo{{bar}}|{{baz}}={{biz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_mid
+label:  nested template in the middle of a template name
+input:  "{{foo{{bar}}baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   nested_name_mid_unnamed_param
+label:  nested template in the middle of a template name and as an unnamed parameter
+input:  "{{foo{{bar}}baz|{{biz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_mid_named_param_value
+label:  nested template in the middle of a template name and as a parameter value with a named parameter
+input:  "{{foo{{bar}}baz|biz={{buzz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_mid_named_param_name_and_value
+label:  nested template in the middle of a template name and as a parameter name and value
+input:  "{{foo{{bar}}baz|{{biz}}={{buzz}}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start_end
+label:  nested template at the beginning and end of a template name
+input:  "{{{{foo}}{{bar}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start_end_unnamed_param
+label:  nested template at the beginning and end of a template name and as an unnamed parameter
+input:  "{{{{foo}}{{bar}}|{{baz}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start_end_named_param_value
+label:  nested template at the beginning and end of a template name and as a parameter value with a named parameter
+input:  "{{{{foo}}{{bar}}|baz={{biz}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_name_start_end_named_param_name_and_value
+label:  nested template at the beginning and end of a template name and as a parameter name and value
+input:  "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}"
+output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_names_multiple
+label:  multiple nested templates within nested templates
+input:  "{{{{{{{{foo}}bar}}baz}}biz}}"
+output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()]
+
+---
+
+name:   nested_names_multiple_unnamed_param
+label:  multiple nested templates within nested templates with a nested unnamed parameter
+input:  "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}"
+output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_names_multiple_named_param_value
+label:  multiple nested templates within nested templates with a nested parameter value in a named parameter
+input:  "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}"
+output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   nested_names_multiple_named_param_name_and_value
+label:  multiple nested templates within nested templates with a nested parameter name and value
+input:  "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}"
+output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   incomplete_tests
+
+"{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
+
+"{{\nfoobar}}"
+"{{foobar\n}}"
+"{{\nfoobar\n}}"
+"{{foo\nbar}}"
+"{{\nfoo\nbar}}"
+"{{foo\nbar\n}}"
+"{{\nfoo\nbar\n}}"
+
+"{{foo|\nbar}}"
+"{{foo|bar\n}}"
+"{{foo|\nbar\n}}"
+"{{foo|\nb\nar}}"
+"{{foo|b\nar\n}}"
+"{{foo|\nb\nar\n}}"
+"{{\nfoo|\nbar}}"
+"{{\nfoo|bar\n}}"
+"{{\nfoo|\nbar\n}}"
+"{{\nfoo|\nb\nar}}"
+"{{\nfoo|b\nar\n}}"
+"{{\nfoo|\nb\nar\n}}"
+"{{foo\n|\nbar}}"
+"{{foo\n|bar\n}}"
+"{{foo\n|\nbar\n}}"
+"{{foo\n|\nb\nar}}"
+"{{foo\n|b\nar\n}}"
+"{{foo\n|\nb\nar\n}}"
+"{{\nfoo\n|\nbar}}"
+"{{\nfoo\n|bar\n}}"
+"{{\nfoo\n|\nbar\n}}"
+"{{\nfoo\n|\nb\nar}}"
+"{{\nfoo\n|b\nar\n}}"
+"{{\nfoo\n|\nb\nar\n}}"
+"{{f\noo|\nbar}}"
+"{{\nf\noo|\nbar}}"
+"{{f\noo\n|\nbar}}"
+"{{\nf\noo\n|\nbar}}"
+
+"{{foo|1=\nbar}}"
+"{{foo|1=bar\n}}"
+"{{foo|1=\nbar\n}}"
+"{{foo|1=\nb\nar}}"
+"{{foo|1=b\nar\n}}"
+"{{foo|1=\nb\nar\n}}"
+"{{foo|\nbar=baz}}"
+"{{foo|bar\n=baz}}"
+"{{foo|\nbar\n=baz}}"
+"{{foo|\nb\nar=baz}}"
+"{{foo|b\nar\n=baz}}"
+"{{foo|\nb\nar\n=baz}}"
+"{{foo|\nbar=baz\n}}"
+"{{foo|bar\n=baz\n}}"
+"{{foo|\nbar\n=baz\n}}"
+"{{foo|\nb\nar=baz\n}}"
+"{{foo|b\nar\n=baz\n}}"
+"{{foo|\nb\nar\n=baz\n}}"
+"{{foo|\nbar=\nbaz}}"
+"{{foo|bar\n=\nbaz}}"
+"{{foo|\nbar\n=\nbaz}}"
+"{{foo|\nb\nar=\nbaz}}"
+"{{foo|b\nar\n=\nbaz}}"
+"{{foo|\nb\nar\n=\nbaz}}"
+"{{foo|\nbar=\nbaz\n}}"
+"{{foo|bar\n=\nbaz\n}}"
+"{{foo|\nbar\n=\nbaz\n}}"
+"{{foo|\nb\nar=\nbaz\n}}"
+"{{foo|b\nar\n=\nbaz\n}}"
+"{{foo|\nb\nar\n=\nbaz\n}}"
+"{{foo|\nbar=ba\nz}}"
+"{{foo|bar\n=ba\nz}}"
+"{{foo|\nbar\n=ba\nz}}"
+"{{foo|\nb\nar=ba\nz}}"
+"{{foo|b\nar\n=ba\nz}}"
+"{{foo|\nb\nar\n=ba\nz}}"
+
+"{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}"
+"{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}"
+
+"{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
+
+"{{foo{bar}}"
+"{{foo}bar}}"
+"{{{foobar}}"
+"{{foo{b{ar}}"
+"{{foo[bar}}"
+"{{foo]bar}}"
+"{{[foobar}}"
+"{{foobar]}}"
+
+"{{foobar"
+"{{foobar}"
+"{{foobar|"
+"{{foo|bar"
+"{{foo|bar|"
+"{{foo|bar="
+"{{foo|bar=|"
+"{{foo|bar=baz"
+"{{foo|bar=baz|"
+"{{foo|bar|baz"
+"{{foo|bar|baz="
+"{{foo|bar|baz=biz"
+"{{foo|bar=baz|biz"
+"{{foo|bar=baz|biz="
+"{{foo|bar=baz|biz=buzz"

From d500f8972e8a3ae0bfee706d40b76b3bfa1fc00d Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 16 Feb 2013 13:01:41 -0500
Subject: [PATCH 068/180] Add a few more tests; use assert*(expected, actual)
 instead of opposite.

---
 tests/_test_tokenizer.py       |  7 +++--
 tests/test_docs.py             |  6 ++--
 tests/tokenizer/templates.test | 68 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index bef7569..114b835 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -47,8 +47,9 @@ class TokenizerTestCase(object):
         *label* for the method's docstring.
         """
         def inner(self):
+            expected = data["output"]
             actual = self.tokenizer().tokenize(data["input"])
-            self.assertEqual(actual, data["output"])
+            self.assertEqual(expected, actual)
         if not py3k:
             inner.__name__ = funcname.encode("utf8")
         inner.__doc__ = data["label"]
@@ -61,7 +62,7 @@ class TokenizerTestCase(object):
         counter = 1
         digits = len(str(len(tests)))
         for test in tests:
-            data = {"name": "", "label": "", "input": "", "output": []}
+            data = {"name": None, "label": None, "input": None, "output": None}
             try:
                 for line in test.strip().splitlines():
                     if line.startswith("name:"):
@@ -92,7 +93,7 @@ class TokenizerTestCase(object):
                 error = "A test in '{0}' was ignored because it lacked a name"
                 print(error.format(filename))
                 continue
-            if not data["input"] or not data["output"]:
+            if data["input"] is None or data["output"] is None:
                 error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
                 print(error.format(data["name"], filename))
                 continue
diff --git a/tests/test_docs.py b/tests/test_docs.py
index d99652f..8673cb9 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -35,7 +35,7 @@ class TestDocs(unittest.TestCase):
         buff = StringIO()
         print(input, end="", file=buff)
         buff.seek(0)
-        self.assertEqual(buff.read(), output)
+        self.assertEqual(output, buff.read())
 
     def test_readme_1(self):
         """test a block of example code in the README"""
@@ -115,9 +115,9 @@ class TestDocs(unittest.TestCase):
         raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
         res = json.loads(raw)
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
-        actual = mwparserfromhell.parse(text)
         expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
-        self.assertEqual(actual, expected)
+        actual = mwparserfromhell.parse(text)
+        self.assertEqual(expected, actual)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test
index 7399022..348e1f5 100644
--- a/tests/tokenizer/templates.test
+++ b/tests/tokenizer/templates.test
@@ -208,17 +208,62 @@ output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(te
 
 ---
 
-name:   incomplete_tests
+name:   mixed_nested_templates
+label:  mixed assortment of nested templates within template names, parameter names, and values
+input:  "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
+output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   newline_start
+label:  a newline at the start of a template name
+input:  "{{\nfoobar}}"
+output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()]
+
+---
+
+name:   newline_end
+label:  a newline at the end of a template name
+input:  "{{foobar\n}}"
+output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_end
+label:  a newline at the start and end of a template name
+input:  "{{\nfoobar\n}}"
+output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()]
+
+---
 
-"{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
+name:   newline_mid
+label:  a newline at the middle of a template name
+input:  "{{foo\nbar}}"
+output: [Text(text="{{foo\nbar}}")]
 
-"{{\nfoobar}}"
-"{{foobar\n}}"
-"{{\nfoobar\n}}"
-"{{foo\nbar}}"
-"{{\nfoo\nbar}}"
-"{{foo\nbar\n}}"
-"{{\nfoo\nbar\n}}"
+---
+
+name:   newline_start_mid
+label:  a newline at the start and middle of a template name
+input:  "{{\nfoo\nbar}}"
+output: [Text(text="{{\nfoo\nbar}}")]
+
+---
+
+name:   newline_mid_end
+label:  a newline at the middle and end of a template name
+input:  "{{foo\nbar\n}}"
+output: [Text(text="{{foo\nbar\n}}")]
+
+---
+
+name:   newline_start_mid_end
+label:  a newline at the start, middle, and end of a template name
+input:  "{{\nfoo\nbar\n}}"
+output: [Text(text="{{\nfoo\nbar\n}}")]
+
+---
+name:   incomplete_tests
 
 "{{foo|\nbar}}"
 "{{foo|bar\n}}"
@@ -300,6 +345,11 @@ name:   incomplete_tests
 "{{[foobar}}"
 "{{foobar]}}"
 
+"{{foo|ba{r}}"
+"{{foo|ba{r}}}"
+"{{foo|ba{r}=baz}}"
+"{{foo|ba[r]}}"
+
 "{{foobar"
 "{{foobar}"
 "{{foobar|"

From 660a0c31e60ddde8435fb3c8c743e5f1c9f7ea77 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 16 Feb 2013 15:30:12 -0500
Subject: [PATCH 069/180] Adding a bunch more tests.

---
 tests/tokenizer/templates.test | 226 +++++++++++++++++++++++++++++++++++------
 1 file changed, 197 insertions(+), 29 deletions(-)

diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test
index 348e1f5..9223d61 100644
--- a/tests/tokenizer/templates.test
+++ b/tests/tokenizer/templates.test
@@ -263,36 +263,204 @@ input:  "{{\nfoo\nbar\n}}"
 output: [Text(text="{{\nfoo\nbar\n}}")]
 
 ---
-name:   incomplete_tests
 
-"{{foo|\nbar}}"
-"{{foo|bar\n}}"
-"{{foo|\nbar\n}}"
-"{{foo|\nb\nar}}"
-"{{foo|b\nar\n}}"
-"{{foo|\nb\nar\n}}"
-"{{\nfoo|\nbar}}"
-"{{\nfoo|bar\n}}"
-"{{\nfoo|\nbar\n}}"
-"{{\nfoo|\nb\nar}}"
-"{{\nfoo|b\nar\n}}"
-"{{\nfoo|\nb\nar\n}}"
-"{{foo\n|\nbar}}"
-"{{foo\n|bar\n}}"
-"{{foo\n|\nbar\n}}"
-"{{foo\n|\nb\nar}}"
-"{{foo\n|b\nar\n}}"
-"{{foo\n|\nb\nar\n}}"
-"{{\nfoo\n|\nbar}}"
-"{{\nfoo\n|bar\n}}"
-"{{\nfoo\n|\nbar\n}}"
-"{{\nfoo\n|\nb\nar}}"
-"{{\nfoo\n|b\nar\n}}"
-"{{\nfoo\n|\nb\nar\n}}"
-"{{f\noo|\nbar}}"
-"{{\nf\noo|\nbar}}"
-"{{f\noo\n|\nbar}}"
-"{{\nf\noo\n|\nbar}}"
+name:   newline_unnamed_param_start
+label:  a newline at the start of an unnamed template parameter
+input:  "{{foo|\nbar}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
+
+---
+
+name:   newline_unnamed_param_end
+label:  a newline at the end of an unnamed template parameter
+input:  "{{foo|bar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
+
+---
+
+name:   newline_unnamed_param_start_end
+label:  a newline at the start and end of an unnamed template parameter
+input:  "{{foo|\nbar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
+
+---
+
+name:   newline_unnamed_param_start_mid
+label:  a newline at the start and middle of an unnamed template parameter
+input:  "{{foo|\nb\nar}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
+
+---
+
+name:   newline_unnamed_param_mid_end
+label:  a newline at the middle and end of an unnamed template parameter
+input:  "{{foo|b\nar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_unnamed_param_start_mid_end
+label:  a newline at the start, middle, and end of an unnamed template parameter
+input:  "{{foo|\nb\nar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_unnamed_param_start
+label:  a newline at the start of a template name and at the start of an unnamed template parameter
+input:  "{{\nfoo|\nbar}}"
+output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
+
+---
+
+name:   newline_start_unnamed_param_end
+label:  a newline at the start of a template name and at the end of an unnamed template parameter
+input:  "{{\nfoo|bar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_unnamed_param_start_end
+label:  a newline at the start of a template name and at the start and end of an unnamed template parameter
+input:  "{{\nfoo|\nbar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_unnamed_param_start_mid
+label:  a newline at the start of a template name and at the start and middle of an unnamed template parameter
+input:  "{{\nfoo|\nb\nar}}"
+output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
+
+---
+
+name:   newline_start_unnamed_param_mid_end
+label:  a newline at the start of a template name and at the middle and end of an unnamed template parameter
+input:  "{{\nfoo|b\nar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_unnamed_param_start_mid_end
+label:  a newline at the start of a template name and at the start, middle, and end of an unnamed template parameter
+input:  "{{\nfoo|\nb\nar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_end_unnamed_param_start
+label:  a newline at the end of a template name and at the start of an unnamed template parameter
+input:  "{{foo\n|\nbar}}"
+output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
+
+---
+
+name:   newline_end_unnamed_param_end
+label:  a newline at the end of a template name and at the end of an unnamed template parameter
+input:  "{{foo\n|bar\n}}"
+output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
+
+---
+
+name:   newline_end_unnamed_param_start_end
+label:  a newline at the end of a template name and at the start and end of an unnamed template parameter
+input:  "{{foo\n|\nbar\n}}"
+output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
+
+---
+
+name:   newline_end_unnamed_param_start_mid
+label:  a newline at the end of a template name and at the start and middle of an unnamed template parameter
+input:  "{{foo\n|\nb\nar}}"
+output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
+
+---
+
+name:   newline_end_unnamed_param_mid_end
+label:  a newline at the end of a template name and at the middle and end of an unnamed template parameter
+input:  "{{foo\n|b\nar\n}}"
+output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_end_unnamed_param_start_mid_end
+label:  a newline at the end of a template name and at the start, middle, and end of an unnamed template parameter
+input:  "{{foo\n|\nb\nar\n}}"
+output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_end_unnamed_param_end
+label:  a newline at the start and end of a template name and the start of an unnamed template parameter
+input:  "{{\nfoo\n|\nbar}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
+
+---
+
+name:   newline_start_end_unnamed_param_end
+label:  a newline at the start and end of a template name and the end of an unnamed template parameter
+input:  "{{\nfoo\n|bar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_end_unnamed_param_start_end
+label:  a newline at the start and end of a template name and the start and end of an unnamed template parameter
+input:  "{{\nfoo\n|\nbar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_end_unnamed_param_start_mid
+label:  a newline at the start and end of a template name and the start and middle of an unnamed template parameter
+input:  "{{\nfoo\n|\nb\nar}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
+
+---
+
+name:   newline_start_end_unnamed_param_mid_end
+label:  a newline at the start and end of a template name and the middle and end of an unnamed template parameter
+input:  "{{\nfoo\n|b\nar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_start_end_unnamed_param_start_mid_end
+label:  a newline at the start and end of a template name and the start, middle, and end of an unnamed template parameter
+input:  "{{\nfoo\n|\nb\nar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_mid_unnamed_param_start
+label:  a newline at the middle of a template name and at the start of an unnamed template parameter
+input:  "{{f\noo|\nbar}}"
+output: [Text(text="{{f\noo|\nbar}}")]
+
+---
+
+name:   newline_start_mid_unnamed_param_start
+label:  a newline at the start and middle of a template name and at the start of an unnamed template parameter
+input:  "{{\nf\noo|\nbar}}"
+output: [Text(text="{{\nf\noo|\nbar}}")]
+
+---
+
+name:   newline_start_end_unnamed_param_start
+label:  a newline at the middle and of a template name and at the start of an unnamed template parameter
+input:  "{{f\noo\n|\nbar}}"
+output: [Text(text="{{f\noo\n|\nbar}}")]
+
+---
+
+name:   newline_start_mid_end_unnamed_param_start
+label:  a newline at the start, middle, and end of a template name and at the start of an unnamed template parameter
+input:  "{{\nf\noo\n|\nbar}}"
+output: [Text(text="{{\nf\noo\n|\nbar}}")]
+
+---
+
+name:   incomplete_tests
 
 "{{foo|1=\nbar}}"
 "{{foo|1=bar\n}}"

From 556477f8015bd987167e7e0beee0e78ae02b1a47 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 17 Feb 2013 15:04:19 -0500
Subject: [PATCH 070/180] Adding a bunch more tests.

---
 tests/tokenizer/templates.test | 296 +++++++++++++++++++++++++++++++++++------
 1 file changed, 259 insertions(+), 37 deletions(-)

diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test
index 9223d61..c3416ff 100644
--- a/tests/tokenizer/templates.test
+++ b/tests/tokenizer/templates.test
@@ -460,44 +460,266 @@ output: [Text(text="{{\nf\noo\n|\nbar}}")]
 
 ---
 
-name:   incomplete_tests
+name:   newline_named_param_value_start
+label:  a newline at the start of a named parameter value
+input:  "{{foo|1=\nbar}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar"), TemplateClose()]
+
+---
+
+name:   newline_named_param_value_end
+label:  a newline at the end of a named parameter value
+input:  "{{foo|1=bar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="bar\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_value_start_end
+label:  a newline at the start and end of a named parameter value
+input:  "{{foo|1=\nbar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_value_start_mid
+label:  a newline at the start and middle of a named parameter value
+input:  "{{foo|1=\nb\nar}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar"), TemplateClose()]
+
+---
+
+name:   newline_named_param_value_mid_end
+label:  a newline at the middle and end of a named parameter value
+input:  "{{foo|1=b\nar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="b\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_value_start_mid_end
+label:  a newline at the start, middle, and end of a named parameter value
+input:  "{{foo|1=\nb\nar\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start
+label:  a newline at the start of a parameter name
+input:  "{{foo|\nbar=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_end
+label:  a newline at the end of a parameter name
+input:  "{{foo|bar\n=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_end
+label:  a newline at the start and end of a parameter name
+input:  "{{foo|\nbar\n=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_mid
+label:  a newline at the middle of a parameter name
+input:  "{{foo|b\nar=baz}}"
+output: [Text(text="{{foo|b\nar=baz}}")]
+
+---
+
+name:   newline_named_param_name_start_mid
+label:  a newline at the start and middle of a parameter name
+input:  "{{foo|\nb\nar=baz}}"
+output: [Text(text="{{foo|\nb\nar=baz}}")]
+
+---
+
+name:   newline_named_param_name_mid_end
+label:  a newline at the middle and end of a parameter name
+input:  "{{foo|b\nar\n=baz}}"
+output: [Text(text="{{foo|b\nar\n=baz}}")]
+
+---
+
+name:   newline_named_param_name_start_mid_end
+label:  a newline at the start, middle, and end of a parameter name
+input:  "{{foo|\nb\nar\n=baz}}"
+output: [Text(text="{{foo|\nb\nar\n=baz}}")]
+
+---
+
+name:   newline_named_param_name_start_param_value_end
+label:  a newline at the start of a parameter name and the end of a parameter value
+input:  "{{foo|\nbar=baz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_end_param_value_end
+label:  a newline at the end of a parameter name and the end of a parameter value
+input:  "{{foo|bar\n=baz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_end_param_value_end
+label:  a newline at the start and end of a parameter name and the end of a parameter value
+input:  "{{foo|\nbar\n=baz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_mid_param_value_end
+label:  a newline at the start and middle of a parameter name and the end of a parameter value
+input:  "{{foo|\nb\nar=baz\n}}"
+output: [Text(text="{{foo|\nb\nar=baz\n}}")]
+
+---
+
+name:   newline_named_param_name_mid_end_param_value_end
+label:  a newline at the middle and end of a parameter name and the end of a parameter value
+input:  "{{foo|b\nar\n=baz\n}}"
+output: [Text(text="{{foo|b\nar\n=baz\n}}")]
+
+---
+
+name:   newline_named_param_name_start_mid_end_param_value_end
+label:  a newline at the start, middle, and end of a parameter name and at the end of a parameter value
+input:  "{{foo|\nb\nar\n=baz\n}}"
+output: [Text(text="{{foo|\nb\nar\n=baz\n}}")]
+
+---
+
+name:   newline_named_param_name_start_param_value_start
+label:  a newline at the start of a parameter name and at the start of a parameter value
+input:  "{{foo|\nbar=\nbaz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_end_param_value_start
+label:  a newline at the end of a parameter name and at the start of a parameter value
+input:  "{{foo|bar\n=\nbaz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_end_param_value_start
+label:  a newline at the start and end of a parameter name and at the start of a parameter value
+input:  "{{foo|\nbar\n=\nbaz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_mid_param_value_start
+label:  a newline at the start and middle of a parameter name and at the start of a parameter value
+input:  "{{foo|\nb\nar=\nbaz}}"
+output: [Text(text="{{foo|\nb\nar=\nbaz}}")]
+
+---
+
+name:   newline_named_param_name_mid_end_param_value_start
+label:  a newline at the middle and end of a parameter name and at the start of a parameter value
+input:  "{{foo|b\nar\n=\nbaz}}"
+output: [Text(text="{{foo|b\nar\n=\nbaz}}")]
+
+---
+
+name:   newline_named_param_name_start_mid_end_param_value_start
+label:  a newline at the start, middle, and end of a parameter name and at the start of a parameter value
+input:  "{{foo|\nb\nar\n=\nbaz}}"
+output: [Text(text="{{foo|\nb\nar\n=\nbaz}}")]
+
+---
+
+name:   newline_named_param_name_start_param_value_start_end
+label:  a newline at the start of a parameter name and at the start and end of a parameter value
+input:  "{{foo|\nbar=\nbaz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_end_param_value_start_end
+label:  a newline at the end of a parameter name and at the start and end of a parameter value
+input:  "{{foo|bar\n=\nbaz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_end_param_value_start_end
+label:  a newline at the start and end of a parameter name and at the start and end of a parameter value
+input:  "{{foo|\nbar\n=\nbaz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()]
+
+---
 
-"{{foo|1=\nbar}}"
-"{{foo|1=bar\n}}"
-"{{foo|1=\nbar\n}}"
-"{{foo|1=\nb\nar}}"
-"{{foo|1=b\nar\n}}"
-"{{foo|1=\nb\nar\n}}"
-"{{foo|\nbar=baz}}"
-"{{foo|bar\n=baz}}"
-"{{foo|\nbar\n=baz}}"
-"{{foo|\nb\nar=baz}}"
-"{{foo|b\nar\n=baz}}"
-"{{foo|\nb\nar\n=baz}}"
-"{{foo|\nbar=baz\n}}"
-"{{foo|bar\n=baz\n}}"
-"{{foo|\nbar\n=baz\n}}"
-"{{foo|\nb\nar=baz\n}}"
-"{{foo|b\nar\n=baz\n}}"
-"{{foo|\nb\nar\n=baz\n}}"
-"{{foo|\nbar=\nbaz}}"
-"{{foo|bar\n=\nbaz}}"
-"{{foo|\nbar\n=\nbaz}}"
-"{{foo|\nb\nar=\nbaz}}"
-"{{foo|b\nar\n=\nbaz}}"
-"{{foo|\nb\nar\n=\nbaz}}"
-"{{foo|\nbar=\nbaz\n}}"
-"{{foo|bar\n=\nbaz\n}}"
-"{{foo|\nbar\n=\nbaz\n}}"
-"{{foo|\nb\nar=\nbaz\n}}"
-"{{foo|b\nar\n=\nbaz\n}}"
-"{{foo|\nb\nar\n=\nbaz\n}}"
-"{{foo|\nbar=ba\nz}}"
-"{{foo|bar\n=ba\nz}}"
-"{{foo|\nbar\n=ba\nz}}"
-"{{foo|\nb\nar=ba\nz}}"
-"{{foo|b\nar\n=ba\nz}}"
-"{{foo|\nb\nar\n=ba\nz}}"
+name:   newline_named_param_name_start_mid_param_value_start_end
+label:  a newline at the start and middle of a parameter name and at the start and end of a parameter value
+input:  "{{foo|\nb\nar=\nbaz\n}}"
+output: [Text(text="{{foo|\nb\nar=\nbaz\n}}")]
+
+---
+
+name:   newline_named_param_name_mid_end_param_value_start_end
+label:  a newline at the middle and end of a parameter name and at the start and end of a parameter value
+input:  "{{foo|b\nar\n=\nbaz\n}}"
+output: [Text(text="{{foo|b\nar\n=\nbaz\n}}")]
+
+---
+
+name:   newline_named_param_name_start_mid_end_param_value_start_end
+label:  a newline at the start, middle, and end of a parameter name and at the start and end of a parameter value
+input:  "{{foo|\nb\nar\n=\nbaz\n}}"
+output: [Text(text="{{foo|\nb\nar\n=\nbaz\n}}")]
+
+---
+
+name:   newline_named_param_name_start_param_value_mid
+label:  a newline at the start of a parameter name and at the middle of a parameter value
+input:  "{{foo|\nbar=ba\nz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_end_param_value_mid
+label:  a newline at the end of a parameter name and at the middle of a parameter value
+input:  "{{foo|bar\n=ba\nz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_end_param_value_mid
+label:  a newline at the start and end of a parameter name and at the middle of a parameter value
+input:  "{{foo|\nbar\n=ba\nz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()]
+
+---
+
+name:   newline_named_param_name_start_mid_param_value_mid
+label:  a newline at the start and middle of a parameter name and at the middle of a parameter value
+input:  "{{foo|\nb\nar=ba\nz}}"
+output: [Text(text="{{foo|\nb\nar=ba\nz}}")]
+
+---
+
+name:   newline_named_param_name_mid_end_param_value_mid
+label:  a newline at the middle and end of a parameter name and at the middle of a parameter value
+input:  "{{foo|b\nar\n=ba\nz}}"
+output: [Text(text="{{foo|b\nar\n=ba\nz}}")]
+
+---
+
+name:   newline_named_param_start_mid_end_param_value_mid
+label:  a newline at the start, middle, and end of a parameter name and at the middle of a parameter value
+input:  "{{foo|\nb\nar\n=ba\nz}}"
+output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")]
+
+---
+
+name:   incomplete_tests
 
 "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}"
 "{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}"

From 24c55aeeb183f4b7643e521e3125a8610a74674e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 17 Feb 2013 21:52:08 -0500
Subject: [PATCH 071/180] Adding a syntax highlighter for the test-case format.

---
 tests/MWPFHTestCase.tmlanguage                     | 130 +++++++++++++++++++++
 tests/_test_tokenizer.py                           |   2 +-
 .../tokenizer/{templates.test => templates.mwtest} |   0
 tests/tokenizer/{text.test => text.mwtest}         |   0
 4 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 tests/MWPFHTestCase.tmlanguage
 rename tests/tokenizer/{templates.test => templates.mwtest} (100%)
 rename tests/tokenizer/{text.test => text.mwtest} (100%)

diff --git a/tests/MWPFHTestCase.tmlanguage b/tests/MWPFHTestCase.tmlanguage
new file mode 100644
index 0000000..e6ea7f0
--- /dev/null
+++ b/tests/MWPFHTestCase.tmlanguage
@@ -0,0 +1,130 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>fileTypes</key>
+	<array>
+		<string>mwtest</string>
+	</array>
+	<key>name</key>
+	<string>MWParserFromHell Test Case</string>
+	<key>patterns</key>
+	<array>
+		<dict>
+			<key>match</key>
+			<string>---</string>
+			<key>name</key>
+			<string>markup.heading.divider.mwpfh</string>
+		</dict>
+		<dict>
+			<key>captures</key>
+			<dict>
+				<key>1</key>
+				<dict>
+					<key>name</key>
+					<string>keyword.other.name.mwpfh</string>
+				</dict>
+				<key>2</key>
+				<dict>
+					<key>name</key>
+					<string>variable.other.name.mwpfh</string>
+				</dict>
+			</dict>
+			<key>match</key>
+			<string>(name:)\s*(\w*)</string>
+			<key>name</key>
+			<string>meta.name.mwpfh</string>
+		</dict>
+		<dict>
+			<key>captures</key>
+			<dict>
+				<key>1</key>
+				<dict>
+					<key>name</key>
+					<string>keyword.other.label.mwpfh</string>
+				</dict>
+				<key>2</key>
+				<dict>
+					<key>name</key>
+					<string>comment.line.other.label.mwpfh</string>
+				</dict>
+			</dict>
+			<key>match</key>
+			<string>(label:)\s*(.*)</string>
+			<key>name</key>
+			<string>meta.label.mwpfh</string>
+		</dict>
+		<dict>
+			<key>captures</key>
+			<dict>
+				<key>1</key>
+				<dict>
+					<key>name</key>
+					<string>keyword.other.input.mwpfh</string>
+				</dict>
+				<key>2</key>
+				<dict>
+					<key>name</key>
+					<string>string.quoted.double.input.mwpfh</string>
+				</dict>
+			</dict>
+			<key>match</key>
+			<string>(input:)\s*(.*)</string>
+			<key>name</key>
+			<string>meta.input.mwpfh</string>
+		</dict>
+		<dict>
+			<key>captures</key>
+			<dict>
+				<key>1</key>
+				<dict>
+					<key>name</key>
+					<string>keyword.other.output.mwpfh</string>
+				</dict>
+			</dict>
+			<key>match</key>
+			<string>(output:)</string>
+			<key>name</key>
+			<string>meta.output.mwpfh</string>
+		</dict>
+		<dict>
+			<key>captures</key>
+			<dict>
+				<key>1</key>
+				<dict>
+					<key>name</key>
+					<string>support.language.token.mwpfh</string>
+				</dict>
+			</dict>
+			<key>match</key>
+			<string>(\w+)\s*\(</string>
+			<key>name</key>
+			<string>meta.name.token.mwpfh</string>
+		</dict>
+		<dict>
+			<key>captures</key>
+			<dict>
+				<key>1</key>
+				<dict>
+					<key>name</key>
+					<string>variable.parameter.token.mwpfh</string>
+				</dict>
+			</dict>
+			<key>match</key>
+			<string>(\w+)\s*(=)</string>
+			<key>name</key>
+			<string>meta.name.parameter.token.mwpfh</string>
+		</dict>
+		<dict>
+			<key>match</key>
+			<string>".*?"</string>
+			<key>name</key>
+			<string>string.quoted.double.mwpfh</string>
+		</dict>
+	</array>
+	<key>scopeName</key>
+	<string>text.mwpfh</string>
+	<key>uuid</key>
+	<string>cd3e2ffa-a57d-4c40-954f-1a2e87ffd638</string>
+</dict>
+</plist>
diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 114b835..4d12dc9 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -107,7 +107,7 @@ class TokenizerTestCase(object):
     def build(cls):
         """Load and install all tests from the 'tokenizer' directory."""
         directory = path.join(path.dirname(__file__), "tokenizer")
-        extension = ".test"
+        extension = ".mwtest"
         for filename in listdir(directory):
             if not filename.endswith(extension):
                 continue
diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.mwtest
similarity index 100%
rename from tests/tokenizer/templates.test
rename to tests/tokenizer/templates.mwtest
diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.mwtest
similarity index 100%
rename from tests/tokenizer/text.test
rename to tests/tokenizer/text.mwtest

From 31a977bdfe2d12487417d4ef1c343fc12209b148 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 17 Feb 2013 22:39:53 -0500
Subject: [PATCH 072/180] Finish all incomplete template tests.

---
 tests/tokenizer/templates.mwtest | 236 +++++++++++++++++++++++++++++++++------
 1 file changed, 200 insertions(+), 36 deletions(-)

diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index c3416ff..d699ef2 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -719,39 +719,203 @@ output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")]
 
 ---
 
-name:   incomplete_tests
-
-"{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}"
-"{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}"
-
-"{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
-
-"{{foo{bar}}"
-"{{foo}bar}}"
-"{{{foobar}}"
-"{{foo{b{ar}}"
-"{{foo[bar}}"
-"{{foo]bar}}"
-"{{[foobar}}"
-"{{foobar]}}"
-
-"{{foo|ba{r}}"
-"{{foo|ba{r}}}"
-"{{foo|ba{r}=baz}}"
-"{{foo|ba[r]}}"
-
-"{{foobar"
-"{{foobar}"
-"{{foobar|"
-"{{foo|bar"
-"{{foo|bar|"
-"{{foo|bar="
-"{{foo|bar=|"
-"{{foo|bar=baz"
-"{{foo|bar=baz|"
-"{{foo|bar|baz"
-"{{foo|bar|baz="
-"{{foo|bar|baz=biz"
-"{{foo|bar=baz|biz"
-"{{foo|bar=baz|biz="
-"{{foo|bar=baz|biz=buzz"
+name:   newline_wildcard
+label:  a random, complex assortment of templates and newlines
+input:  "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()]
+
+---
+
+name:   newline_wildcard_redux
+label:  an even more random and complex assortment of templates and newlines
+input:  "{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nbaz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()]
+
+---
+
+name:   invalid_name_left_brace_middle
+label:  invalid characters in template name: left brace in middle
+input:  "{{foo{bar}}"
+output: [Text(text="{{foo{bar}}")]
+
+---
+
+name:   invalid_name_right_brace_middle
+label:  invalid characters in template name: right brace in middle
+input:  "{{foo}bar}}"
+output: [Text(text="{{foo}bar}}")]
+
+---
+
+name:   invalid_name_left_braces
+label:  invalid characters in template name: two left braces in middle
+input:  "{{foo{b{ar}}"
+output: [Text(text="{{foo{b{ar}}")]
+
+---
+
+name:   invalid_name_left_bracket_middle
+label:  invalid characters in template name: left bracket in middle
+input:  "{{foo[bar}}"
+output: [Text(text="{{foo[bar}}")]
+
+---
+
+name:   invalid_name_right_bracket_middle
+label:  invalid characters in template name: right bracket in middle
+input:  "{{foo]bar}}"
+output: [Text(text="{{foo]bar}}")]
+
+---
+
+name:   invalid_name_left_bracket_start
+label:  invalid characters in template name: left bracket at start
+input:  "{{[foobar}}"
+output: [Text(text="{{[foobar}}")]
+
+---
+
+name:   invalid_name_right_bracket_start
+label:  invalid characters in template name: right bracket at end
+input:  "{{foobar]}}"
+output: [Text(text="{{foobar]}}")]
+
+---
+
+name:   valid_name_left_brace_start
+label:  valid characters in template name: left brace at start
+input:  "{{{foobar}}"
+output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()]
+
+---
+
+name:   valid_unnamed_param_left_brace
+label:  valid characters in unnamed template parameter: left brace
+input:  "{{foo|ba{r}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()]
+
+---
+
+name:   valid_unnamed_param_braces
+label:  valid characters in unnamed template parameter: left and right braces
+input:  "{{foo|ba{r}}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")]
+
+---
+
+name:   valid_param_name_braces
+label:  valid characters in template parameter name: left and right braces
+input:  "{{foo|ba{r}=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   valid_param_name_brackets
+label:  valid characters in unnamed template parameter: left and right brackets
+input:  "{{foo|ba[r]}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateClose()]
+
+---
+
+name:   incomplete_plain
+label:  incomplete templates that should fail gracefully: no close whatsoever
+input:  "{{stuff}} {{foobar"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")]
+
+---
+
+name:   incomplete_right_brace
+label:  incomplete templates that should fail gracefully: only one right brace
+input:  "{{stuff}} {{foobar}"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")]
+
+---
+
+name:   incomplete_pipe
+label:  incomplete templates that should fail gracefully: a pipe
+input:  "{{stuff}} {{foobar|"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")]
+
+---
+
+name:   incomplete_unnamed_param
+label:  incomplete templates that should fail gracefully: an unnamed parameter
+input:  "{{stuff}} {{foo|bar"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")]
+
+---
+
+name:   incomplete_unnamed_param_pipe
+label:  incomplete templates that should fail gracefully: an unnamed parameter, then a pipe
+input:  "{{stuff}} {{foo|bar|"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")]
+
+---
+
+name:   incomplete_valueless_param
+label:  incomplete templates that should fail gracefully: an a named parameter with no value
+input:  "{{stuff}} {{foo|bar="
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")]
+
+---
+
+name:   incomplete_valueless_param_pipe
+label:  incomplete templates that should fail gracefully: a named parameter with no value, then a pipe
+input:  "{{stuff}} {{foo|bar=|"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")]
+
+---
+
+name:   incomplete_named_param
+label:  incomplete templates that should fail gracefully: a named parameter with a value
+input:  "{{stuff}} {{foo|bar=baz"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")]
+
+---
+
+name:   incomplete_named_param_pipe
+label:  incomplete templates that should fail gracefully: a named parameter with a value, then a paipe
+input:  "{{stuff}} {{foo|bar=baz|"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")]
+
+---
+
+name:   incomplete_two_unnamed_params
+label:  incomplete templates that should fail gracefully: two unnamed parameters
+input:  "{{stuff}} {{foo|bar|baz"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")]
+
+---
+
+name:   incomplete_unnamed_param_valueless_param
+label:  incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value
+input:  "{{stuff}} {{foo|bar|baz="
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")]
+
+---
+
+name:   incomplete_unnamed_param_named_param
+label:  incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value
+input:  "{{stuff}} {{foo|bar|baz=biz"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")]
+
+---
+
+name:   incomplete_named_param_unnamed_param
+label:  incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter
+input:  "{{stuff}} {{foo|bar=baz|biz"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")]
+
+---
+
+name:   incomplete_named_param_valueless_param
+label:  incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value
+input:  "{{stuff}} {{foo|bar=baz|biz="
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")]
+
+---
+
+name:   incomplete_two_named_params
+label:  incomplete templates that should fail gracefully: two named parameters with values
+input:  "{{stuff}} {{foo|bar=baz|biz=buzz"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")]

From acb7e579045e4ab74fbc025894d49cad72241b51 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 23 Feb 2013 12:14:06 -0500
Subject: [PATCH 073/180] Make mwparserfromhell.parser() be an alias for
 parse_anything().

Some other changes, including removal of the 'string' import in the tokenizer.
---
 mwparserfromhell/__init__.py         |  5 ++---
 mwparserfromhell/parser/__init__.py  |  5 +----
 mwparserfromhell/parser/tokenizer.py |  5 ++---
 mwparserfromhell/utils.py            | 14 +++++++-------
 4 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index e18000b..99bc0c2 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -34,7 +34,6 @@ __license__ = "MIT License"
 __version__ = "0.2.dev"
 __email__ = "ben.kurtovic@verizon.net"
 
-from . import nodes, parser, smart_list, string_mixin, wikicode
+from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode
 
-parse = lambda text: parser.Parser(text).parse()
-parse.__doc__ = "Short for :py:meth:`.Parser.parse`."
+parse = utils.parse_anything
diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 074b9ba..3f034f6 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -26,10 +26,7 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module
 joins them together under one interface.
 """
 
-try:
-    from ._builder import CBuilder as Builder
-except ImportError:
-    from .builder import Builder
+from .builder import Builder
 try:
     from ._tokenizer import CTokenizer as Tokenizer
 except ImportError:
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index eead131..c02e353 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -23,7 +23,6 @@
 from __future__ import unicode_literals
 from math import log
 import re
-import string
 
 from . import contexts
 from . import tokens
@@ -377,9 +376,9 @@ class Tokenizer(object):
         else:
             numeric = hexadecimal = False
 
-        valid = string.hexdigits if hexadecimal else string.digits
+        valid = "0123456789abcdefABCDEF" if hexadecimal else "0123456789"
         if not numeric and not hexadecimal:
-            valid += string.ascii_letters
+            valid += "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
         if not all([char in valid for char in this]):
             self._fail_route()
 
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index 83264e2..b797419 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -34,16 +34,16 @@ from .smart_list import SmartList
 def parse_anything(value):
     """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.
 
-    This differs from :py:func:`mwparserfromhell.parse` in that we accept more
-    than just a string to be parsed. Unicode objects (strings in py3k), strings
-    (bytes in py3k), integers (converted to strings), ``None``, existing
+    This differs from :py:meth:`.Parser.parse` in that we accept more than just
+    a string to be parsed. Unicode objects (strings in py3k), strings (bytes in
+    py3k), integers (converted to strings), ``None``, existing
     :py:class:`~.Node` or :py:class:`~.Wikicode` objects, as well as an
     iterable of these types, are supported. This is used to parse input
     on-the-fly by various methods of :py:class:`~.Wikicode` and others like
     :py:class:`~.Template`, such as :py:meth:`wikicode.insert()
     <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.
     """
-    from . import parse
+    from .parser import Parser
     from .wikicode import Wikicode
 
     if isinstance(value, Wikicode):
@@ -51,11 +51,11 @@ def parse_anything(value):
     elif isinstance(value, Node):
         return Wikicode(SmartList([value]))
     elif isinstance(value, str):
-        return parse(value)
+        return Parser(value).parse()
     elif isinstance(value, bytes):
-        return parse(value.decode("utf8"))
+        return Parser(value.decode("utf8")).parse()
     elif isinstance(value, int):
-        return parse(str(value))
+        return Parser(str(value)).parse()
     elif value is None:
         return Wikicode(SmartList())
     try:

From 0803417901d09d7df830e65300355507715e67cb Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 23 Feb 2013 13:12:16 -0500
Subject: [PATCH 074/180] Port CTokenizer's verify_safe method to Python to
 solve a failing test.

---
 mwparserfromhell/parser/contexts.py  |  62 +++++++++++-------
 mwparserfromhell/parser/tokenizer.c  |  12 ++--
 mwparserfromhell/parser/tokenizer.h  |   1 +
 mwparserfromhell/parser/tokenizer.py | 122 +++++++++++++++++++++++++----------
 4 files changed, 137 insertions(+), 60 deletions(-)

diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index b65946c..896d137 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -62,6 +62,15 @@ Local (stack-specific) contexts:
 
 * :py:const:`COMMENT`
 
+* :py:const:`SAFETY_CHECK`
+
+    * :py:const:`HAS_TEXT`
+    * :py:const:`FAIL_ON_TEXT`
+    * :py:const:`FAIL_NEXT`
+    * :py:const:`FAIL_ON_LBRACE`
+    * :py:const:`FAIL_ON_RBRACE`
+    * :py:const:`FAIL_ON_EQUALS`
+
 Global contexts:
 
 * :py:const:`GL_HEADING`
@@ -69,29 +78,36 @@ Global contexts:
 
 # Local contexts:
 
-TEMPLATE =              0b00000000000111
-TEMPLATE_NAME =         0b00000000000001
-TEMPLATE_PARAM_KEY =    0b00000000000010
-TEMPLATE_PARAM_VALUE =  0b00000000000100
-
-ARGUMENT =              0b00000000011000
-ARGUMENT_NAME =         0b00000000001000
-ARGUMENT_DEFAULT =      0b00000000010000
-
-WIKILINK =              0b00000001100000
-WIKILINK_TITLE =        0b00000000100000
-WIKILINK_TEXT =         0b00000001000000
-
-HEADING =               0b01111110000000
-HEADING_LEVEL_1 =       0b00000010000000
-HEADING_LEVEL_2 =       0b00000100000000
-HEADING_LEVEL_3 =       0b00001000000000
-HEADING_LEVEL_4 =       0b00010000000000
-HEADING_LEVEL_5 =       0b00100000000000
-HEADING_LEVEL_6 =       0b01000000000000
-
-COMMENT =               0b10000000000000
-
+TEMPLATE =              0b00000000000000000111
+TEMPLATE_NAME =         0b00000000000000000001
+TEMPLATE_PARAM_KEY =    0b00000000000000000010
+TEMPLATE_PARAM_VALUE =  0b00000000000000000100
+
+ARGUMENT =              0b00000000000000011000
+ARGUMENT_NAME =         0b00000000000000001000
+ARGUMENT_DEFAULT =      0b00000000000000010000
+
+WIKILINK =              0b00000000000001100000
+WIKILINK_TITLE =        0b00000000000000100000
+WIKILINK_TEXT =         0b00000000000001000000
+
+HEADING =               0b00000001111110000000
+HEADING_LEVEL_1 =       0b00000000000010000000
+HEADING_LEVEL_2 =       0b00000000000100000000
+HEADING_LEVEL_3 =       0b00000000001000000000
+HEADING_LEVEL_4 =       0b00000000010000000000
+HEADING_LEVEL_5 =       0b00000000100000000000
+HEADING_LEVEL_6 =       0b00000001000000000000
+
+COMMENT =               0b00000010000000000000
+
+SAFETY_CHECK =          0b11111100000000000000
+HAS_TEXT =              0b00000100000000000000
+FAIL_ON_TEXT =          0b00001000000000000000
+FAIL_NEXT  =            0b00010000000000000000
+FAIL_ON_LBRACE =        0b00100000000000000000
+FAIL_ON_RBRACE =        0b01000000000000000000
+FAIL_ON_EQUALS =        0b10000000000000000000
 
 # Global contexts:
 
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 09649a7..d82b080 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1324,10 +1324,14 @@ Tokenizer_parse(Tokenizer* self, int context)
             Tokenizer_write_text(self, this);
         }
         else if (this == next && next == *"[") {
-            if (Tokenizer_parse_wikilink(self))
-                return NULL;
-            if (self->topstack->context & LC_FAIL_NEXT)
-                self->topstack->context ^= LC_FAIL_NEXT;
+            if (!(this_context & LC_WIKILINK_TITLE)) {
+                if (Tokenizer_parse_wikilink(self))
+                    return NULL;
+                if (self->topstack->context & LC_FAIL_NEXT)
+                    self->topstack->context ^= LC_FAIL_NEXT;
+            }
+            else
+                Tokenizer_write_text(self, this);
         }
         else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
             if (Tokenizer_handle_wikilink_separator(self))
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 3293a8f..af86321 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -118,6 +118,7 @@ static PyObject* TagCloseClose;
 
 #define LC_COMMENT              0x02000
 
+#define LC_SAFETY_CHECK         0xFC000
 #define LC_HAS_TEXT             0x04000
 #define LC_FAIL_ON_TEXT         0x08000
 #define LC_FAIL_NEXT            0x10000
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index eead131..a365db8 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -213,28 +213,9 @@ class Tokenizer(object):
         self._write_all(argument)
         self._write(tokens.ArgumentClose())
 
-    def _verify_safe(self, unsafes, strip=True):
-        """Verify that there are no unsafe characters in the current stack.
-
-        The route will be failed if the name contains any element of *unsafes*
-        in it. This is used when parsing template names, parameter keys, and so
-        on, which cannot contain newlines and some other characters. If *strip*
-        is ``True``, the text will be stripped of whitespace, since this is
-        allowed at the ends of certain elements but not between text.
-        """
-        self._push_textbuffer()
-        if self._stack:
-            text = [tok for tok in self._stack if isinstance(tok, tokens.Text)]
-            text = "".join([token.text for token in text])
-            if strip:
-                text = text.strip()
-            if text and any([unsafe in text for unsafe in unsafes]):
-                self._fail_route()
-
     def _handle_template_param(self):
         """Handle a template parameter at the head of the string."""
         if self._context & contexts.TEMPLATE_NAME:
-            self._verify_safe(["\n", "{", "}", "[", "]"])
             self._context ^= contexts.TEMPLATE_NAME
         elif self._context & contexts.TEMPLATE_PARAM_VALUE:
             self._context ^= contexts.TEMPLATE_PARAM_VALUE
@@ -246,11 +227,6 @@ class Tokenizer(object):
 
     def _handle_template_param_value(self):
         """Handle a template parameter's value at the head of the string."""
-        try:
-            self._verify_safe(["\n", "{{", "}}"])
-        except BadRoute:
-            self._pop()
-            raise
         self._write_all(self._pop(keep_context=True))
         self._context ^= contexts.TEMPLATE_PARAM_KEY
         self._context |= contexts.TEMPLATE_PARAM_VALUE
@@ -258,24 +234,19 @@ class Tokenizer(object):
 
     def _handle_template_end(self):
         """Handle the end of a template at the head of the string."""
-        if self._context & contexts.TEMPLATE_NAME:
-            self._verify_safe(["\n", "{", "}", "[", "]"])
-        elif self._context & contexts.TEMPLATE_PARAM_KEY:
+        if self._context & contexts.TEMPLATE_PARAM_KEY:
             self._write_all(self._pop(keep_context=True))
         self._head += 1
         return self._pop()
 
     def _handle_argument_separator(self):
         """Handle the separator between an argument's name and default."""
-        self._verify_safe(["\n", "{{", "}}"])
         self._context ^= contexts.ARGUMENT_NAME
         self._context |= contexts.ARGUMENT_DEFAULT
         self._write(tokens.ArgumentSeparator())
 
     def _handle_argument_end(self):
         """Handle the end of an argument at the head of the string."""
-        if self._context & contexts.ARGUMENT_NAME:
-            self._verify_safe(["\n", "{{", "}}"])
         self._head += 2
         return self._pop()
 
@@ -295,15 +266,12 @@ class Tokenizer(object):
 
     def _handle_wikilink_separator(self):
         """Handle the separator between a wikilink's title and its text."""
-        self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
         self._context ^= contexts.WIKILINK_TITLE
         self._context |= contexts.WIKILINK_TEXT
         self._write(tokens.WikilinkSeparator())
 
     def _handle_wikilink_end(self):
         """Handle the end of a wikilink at the head of the string."""
-        if self._context & contexts.WIKILINK_TITLE:
-            self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
         self._head += 1
         return self._pop()
 
@@ -424,11 +392,94 @@ class Tokenizer(object):
             self._write(tokens.CommentEnd())
             self._head += 2
 
+    def _verify_safe(self, this):
+        """Make sure we are not trying to write an invalid character."""
+        context = self._context
+        if context & contexts.FAIL_NEXT:
+            self._fail_route()
+        if context & contexts.WIKILINK_TITLE:
+            if this == "]" or this == "{":
+                self._context |= contexts.FAIL_NEXT
+            elif this == "\n" or this == "[" or this == "}":
+                self._fail_route()
+            return
+        if context & contexts.TEMPLATE_NAME:
+            if this == "{" or this == "}" or this == "[":
+                self._context |= contexts.FAIL_NEXT
+                return
+            if this == "]":
+                self._fail_route()
+                return
+            if this == "|":
+                return
+        elif context & (contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME):
+            if context & contexts.FAIL_ON_EQUALS:
+                if this == "=":
+                    self._fail_route()
+                    return
+            elif context & contexts.FAIL_ON_LBRACE:
+                if this == "{":
+                    if context & contexts.TEMPLATE:
+                        self._context |= contexts.FAIL_ON_EQUALS
+                    else:
+                        self._context |= contexts.FAIL_NEXT
+                    return
+                self._context ^= contexts.FAIL_ON_LBRACE
+            elif context & contexts.FAIL_ON_RBRACE:
+                if this == "}":
+                    if context & contexts.TEMPLATE:
+                        self._context |= contexts.FAIL_ON_EQUALS
+                    else:
+                        self._context |= contexts.FAIL_NEXT
+                    return
+                self._context ^= contexts.FAIL_ON_RBRACE
+            elif this == "{":
+                self._context |= contexts.FAIL_ON_LBRACE
+            elif this == "}":
+                self._context |= contexts.FAIL_ON_RBRACE
+        if context & contexts.HAS_TEXT:
+            if context & contexts.FAIL_ON_TEXT:
+                if this is self.END or not this.isspace():
+                    if context & contexts.TEMPLATE_PARAM_KEY:
+                        self._context ^= contexts.FAIL_ON_TEXT
+                        self._context |= contexts.FAIL_ON_EQUALS
+                    else:
+                        self._fail_route()
+                    return
+            else:
+                if this == "\n":
+                    self._context |= contexts.FAIL_ON_TEXT
+        elif this is self.END or not this.isspace():
+            self._context |= contexts.HAS_TEXT
+
+    def _reset_safety_checks(self):
+        """Unset any safety-checking contexts set by Tokenizer_verify_safe().
+
+        Used when we preserve a context but previous data becomes invalid, like
+        when moving between template parameters.
+        """
+        context = self._context
+        checks = (contexts.HAS_TEXT, contexts.FAIL_ON_TEXT, contexts.FAIL_NEXT,
+                  contexts.FAIL_ON_LBRACE, contexts.FAIL_ON_RBRACE,
+                  contexts.FAIL_ON_EQUALS)
+        for check in checks:
+            if context & check:
+                self._context ^= check;
+
     def _parse(self, context=0):
         """Parse the wikicode string, using *context* for when to stop."""
         self._push(context)
         while True:
             this = self._read()
+            unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE |
+                      contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME)
+            if self._context & unsafe:
+                try:
+                    self._verify_safe(this)
+                except BadRoute:
+                    if self._context & contexts.TEMPLATE_PARAM_KEY:
+                        self._pop()
+                    raise
             if this not in self.MARKERS:
                 self._write_text(this)
                 self._head += 1
@@ -450,7 +501,10 @@ class Tokenizer(object):
                     self._write_text(this)
             elif this == next == "{":
                 self._parse_template_or_argument()
+                if self._context & contexts.FAIL_NEXT:
+                    self._context ^= contexts.FAIL_NEXT
             elif this == "|" and self._context & contexts.TEMPLATE:
+                self._reset_safety_checks()
                 self._handle_template_param()
             elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
                 self._handle_template_param_value()
@@ -466,6 +520,8 @@ class Tokenizer(object):
             elif this == next == "[":
                 if not self._context & contexts.WIKILINK_TITLE:
                     self._parse_wikilink()
+                    if self._context & contexts.FAIL_NEXT:
+                        self._context ^= contexts.FAIL_NEXT
                 else:
                     self._write_text("[")
             elif this == "|" and self._context & contexts.WIKILINK_TITLE:

From 111a71f0c242b6827b2f5a02731f2e198ba7b70e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Feb 2013 00:18:03 -0500
Subject: [PATCH 075/180] Committing an empty file to work on later.

---
 tests/test_string_mixin.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 tests/test_string_mixin.py

diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
new file mode 100644
index 0000000..b9413ec
--- /dev/null
+++ b/tests/test_string_mixin.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+
+import mwparserfromhell
+
+class TestStringMixIn(unittest.TestCase):
+    """Test cases for the StringMixIn class."""
+    def test_(self):
+        pass
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From 221af8a9d7100d69d03e1af8ad6b4e020e2cceb4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 26 Feb 2013 10:55:49 -0500
Subject: [PATCH 076/180] Adding some tests to TestStringMixIn

---
 mwparserfromhell/string_mixin.py |  1 -
 tests/test_string_mixin.py       | 80 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index d7a0749..ac47251 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -50,7 +50,6 @@ class StringMixIn(object):
     :py:meth:`__unicode__` instead of the immutable ``self`` like the regular
     ``str`` type.
     """
-
     if py3k:
         def __str__(self):
             return self.__unicode__()
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index b9413ec..0d2ca43 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -21,12 +21,88 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import py3k, str
+from mwparserfromhell.string_mixin import StringMixIn
+
+class _FakeString(StringMixIn):
+    def __init__(self, data):
+        self._data = data
+
+    def __unicode__(self):
+        return self._data
 
-import mwparserfromhell
 
 class TestStringMixIn(unittest.TestCase):
     """Test cases for the StringMixIn class."""
-    def test_(self):
+    def test_docs(self):
+        """make sure the various functions of StringMixIn have docstrings"""
+        methods = [
+            "capitalize", "center", "count", "encode", "endswith",
+            "expandtabs", "find", "format", "index", "isalnum", "isalpha",
+            "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
+            "istitle", "isupper", "join", "ljust", "lstrip", "partition",
+            "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",
+            "rstrip", "split", "splitlines", "startswith", "strip", "swapcase",
+            "title", "translate", "upper", "zfill"]
+        if not py3k:
+            methods.append("decode")
+        for meth in methods:
+            expected = getattr(str, meth).__doc__
+            actual = getattr(StringMixIn, meth).__doc__
+            self.assertEquals(expected, actual)
+
+    def test_types(self):
+        """make sure StringMixIns convert to different types correctly"""
+        pass
+
+    def test_comparisons(self):
+        """make sure comparison operators work"""
+        str1 = _FakeString("this is a fake string")
+        str2 = _FakeString("this is a fake string")
+        str3 = _FakeString("fake string, this is")
+        str4 = "this is a fake string"
+        str5 = "fake string, this is"
+
+        self.assertFalse(str1 > str2)
+        self.assertTrue(str1 >= str2)
+        self.assertTrue(str1 == str2)
+        self.assertFalse(str1 != str2)
+        self.assertFalse(str1 < str2)
+        self.assertTrue(str1 <= str2)
+
+        self.assertTrue(str1 > str3)
+        self.assertTrue(str1 >= str3)
+        self.assertFalse(str1 == str3)
+        self.assertTrue(str1 != str3)
+        self.assertFalse(str1 < str3)
+        self.assertFalse(str1 <= str3)
+
+        self.assertFalse(str1 > str4)
+        self.assertTrue(str1 >= str4)
+        self.assertTrue(str1 == str4)
+        self.assertFalse(str1 != str4)
+        self.assertFalse(str1 < str4)
+        self.assertTrue(str1 <= str4)
+
+        self.assertTrue(str1 > str5)
+        self.assertTrue(str1 >= str5)
+        self.assertFalse(str1 == str5)
+        self.assertTrue(str1 != str5)
+        self.assertFalse(str1 < str5)
+        self.assertFalse(str1 <= str5)
+
+    def test_operators(self):
+        """make sure string addition and multiplication work"""
+        pass
+
+    def test_other_magics(self):
+        """test other magically implemented features, like len() and iter()"""
+        pass
+
+    def test_other_methods(self):
+        """test the remaining non-magic methods of StringMixIn"""
         pass
 
 if __name__ == "__main__":

From 6e748004d1fa16ec812a527644f2f24515d0ff00 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Feb 2013 10:41:21 -0500
Subject: [PATCH 077/180] test_types(), test_other_magics(); add range to
 compat

---
 mwparserfromhell/compat.py |  2 ++
 tests/test_string_mixin.py | 68 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 576c2c5..48b9807 100755
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -16,6 +16,7 @@ if py3k:
     bytes = bytes
     str = str
     basestring = str
+    range = range
     maxsize = sys.maxsize
     import html.entities as htmlentities
     from io import StringIO
@@ -24,6 +25,7 @@ else:
     bytes = str
     str = unicode
     basestring = basestring
+    range = xrange
     maxsize = sys.maxint
     import htmlentitydefs as htmlentities
     from StringIO import StringIO
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 0d2ca43..0e60309 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -21,9 +21,10 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+from types import GeneratorType
 import unittest
 
-from mwparserfromhell.compat import py3k, str
+from mwparserfromhell.compat import bytes, py3k, range, str
 from mwparserfromhell.string_mixin import StringMixIn
 
 class _FakeString(StringMixIn):
@@ -55,7 +56,20 @@ class TestStringMixIn(unittest.TestCase):
 
     def test_types(self):
         """make sure StringMixIns convert to different types correctly"""
-        pass
+        fstr = _FakeString("fake string")
+        self.assertEquals(str(fstr), "fake string")
+        self.assertEquals(bytes(fstr), b"fake string")
+        if py3k:
+            self.assertEquals(repr(fstr), "'fake string'")
+        else:
+            self.assertEquals(repr(fstr), b"u'fake string'")
+
+        self.assertIsInstance(str(fstr), str)
+        self.assertIsInstance(bytes(fstr), bytes)
+        if py3k:
+            self.assertIsInstance(repr(fstr), str)
+        else:
+            self.assertIsInstance(repr(fstr), bytes)
 
     def test_comparisons(self):
         """make sure comparison operators work"""
@@ -93,13 +107,53 @@ class TestStringMixIn(unittest.TestCase):
         self.assertFalse(str1 < str5)
         self.assertFalse(str1 <= str5)
 
-    def test_operators(self):
-        """make sure string addition and multiplication work"""
-        pass
-
     def test_other_magics(self):
         """test other magically implemented features, like len() and iter()"""
-        pass
+        str1 = _FakeString("fake string")
+        str2 = _FakeString("")
+        expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"]
+
+        self.assertTrue(str1)
+        self.assertFalse(str2)
+        self.assertEquals(11, len(str1))
+        self.assertEquals(0, len(str2))
+
+        out = []
+        for ch in str1:
+            out.append(ch)
+        self.assertEquals(expected, out)
+
+        out = []
+        for ch in str2:
+            out.append(ch)
+        self.assertEquals([], out)
+
+        gen1 = iter(str1)
+        gen2 = iter(str2)
+        self.assertIsInstance(gen1, GeneratorType)
+        self.assertIsInstance(gen2, GeneratorType)
+
+        out = []
+        for i in range(len(str1)):
+            out.append(gen1.next())
+        self.assertRaises(StopIteration, gen1.next)
+        self.assertEquals(expected, out)
+        self.assertRaises(StopIteration, gen2.next)
+
+        self.assertEquals("f", str1[0])
+        self.assertEquals(" ", str1[4])
+        self.assertEquals("g", str1[10])
+        self.assertEquals("n", str1[-2])
+        self.assertRaises(IndexError, lambda: str1[11])
+        self.assertRaises(IndexError, lambda: str2[0])
+
+        self.assertTrue("k" in str1)
+        self.assertTrue("fake" in str1)
+        self.assertTrue("str" in str1)
+        self.assertTrue("" in str1)
+        self.assertTrue("" in str2)
+        self.assertFalse("real" in str1)
+        self.assertFalse("s" in str2)
 
     def test_other_methods(self):
         """test the remaining non-magic methods of StringMixIn"""

From e2fe0120ea128ac6df646e09b25468507e3f2aec Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Feb 2013 10:56:57 -0500
Subject: [PATCH 078/180] Some tests for test_other_methods()

---
 tests/test_string_mixin.py | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 0e60309..74da9ff 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -157,7 +157,43 @@ class TestStringMixIn(unittest.TestCase):
 
     def test_other_methods(self):
         """test the remaining non-magic methods of StringMixIn"""
-        pass
+        fstr = _FakeString("fake string")
+
+        self.assertEquals("Fake string", fstr.capitalize())
+
+        self.assertEquals("  fake string  ", fstr.center(15))
+        self.assertEquals("  fake string   ", fstr.center(16))
+        self.assertEquals("qqfake stringqq", fstr.center(15, "q"))
+
+        self.assertEquals(1, fstr.count("e"))
+        self.assertEquals(0, fstr.count("z"))
+        self.assertEquals(1, fstr.count("r", 7))
+        self.assertEquals(0, fstr.count("r", 8))
+        self.assertEquals(1, fstr.count("r", 5, 9))
+        self.assertEquals(0, fstr.count("r", 5, 7))
+
+        if not py3k:
+            self.assertEquals(fstr, fstr.decode())
+            self.assertEquals("𐌲𐌿𐍄", '\\U00010332\\U0001033f\\U00010344'.decode("unicode_escape"))
+
+        self.assertEquals(b"fake string", fstr.encode())
+        self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
+                          "𐌲𐌿𐍄".encode("utf8"))
+        self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode)
+        self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii")
+        self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii", "strict")
+        self.assertEquals("", "𐌲𐌿𐍄".encode("ascii", "ignore"))
+
+        self.assertTrue(fstr.endswith("ing"))
+        self.assertFalse(fstr.endswith("ingh"))
+
+        methods = [
+            "expandtabs", "find", "format", "index", "isalnum", "isalpha",
+            "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
+            "istitle", "isupper", "join", "ljust", "lstrip", "partition",
+            "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",
+            "rstrip", "split", "splitlines", "startswith", "strip", "swapcase",
+            "title", "translate", "upper", "zfill"]
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 9a87329d690db98bd3594fb122f43de849e3c8b1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 28 Feb 2013 10:58:19 -0500
Subject: [PATCH 079/180] More tests for test_other_methods()

---
 mwparserfromhell/string_mixin.py |  7 ++++---
 tests/test_string_mixin.py       | 40 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index ac47251..6490051 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -179,9 +179,10 @@ class StringMixIn(object):
     def isalpha(self):
         return self.__unicode__().isalpha()
 
-    @inheritdoc
-    def isdecimal(self):
-        return self.__unicode__().isdecimal()
+    if py3k:
+        @inheritdoc
+        def isdecimal(self):
+            return self.__unicode__().isdecimal()
 
     @inheritdoc
     def isdigit(self):
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 74da9ff..4e4fa68 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -174,7 +174,11 @@ class TestStringMixIn(unittest.TestCase):
 
         if not py3k:
             self.assertEquals(fstr, fstr.decode())
-            self.assertEquals("𐌲𐌿𐍄", '\\U00010332\\U0001033f\\U00010344'.decode("unicode_escape"))
+            actual = '\\U00010332\\U0001033f\\U00010344'
+            self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape"))
+            self.assertEquals("𐌲", '\\U00010332'.decode("unicode_escape"))
+            self.assertRaises(UnicodeError, "fo".decode, "punycode")
+            self.assertEquals("", "fo".decode("punycode", "ignore"))
 
         self.assertEquals(b"fake string", fstr.encode())
         self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
@@ -187,8 +191,40 @@ class TestStringMixIn(unittest.TestCase):
         self.assertTrue(fstr.endswith("ing"))
         self.assertFalse(fstr.endswith("ingh"))
 
+        self.assertEquals("fake string", fstr)
+        self.assertEquals("        foobar", "\tfoobar".expandtabs())
+        self.assertEquals("    foobar", "\tfoobar".expandtabs(4))
+
+        self.assertEquals(3, fstr.find("e"))
+        self.assertEquals(-1, fstr.find("z"))
+        self.assertEquals(7, fstr.find("r", 7))
+        self.assertEquals(-1, fstr.find("r", 8))
+        self.assertEquals(7, fstr.find("r", 5, 9))
+        self.assertEquals(-1, fstr.find("r", 5, 7))
+
+        self.assertEquals("fake string", fstr.format())
+        self.assertEquals("foobarbaz", "foo{0}baz".format("bar"))
+        self.assertEquals("foobarbaz", "foo{abc}baz".format(abc="bar"))
+        self.assertEquals("foobarbazbuzz",
+                          "foo{0}{abc}buzz".format("bar", abc="baz"))
+        self.assertRaises(IndexError, "{0}{1}".format, "abc")
+
+        self.assertEquals(3, fstr.index("e"))
+        self.assertRaises(ValueError, fstr.index, "z")
+        self.assertEquals(7, fstr.index("r", 7))
+        self.assertRaises(ValueError, fstr.index, "r", 8)
+        self.assertEquals(7, fstr.index("r", 5, 9))
+        self.assertRaises(ValueError, fstr.index, "r", 5, 7)
+
+        self.assertTrue("foobar".isalnum())
+        self.assertTrue("foobar123".isalnum())
+        self.assertFalse("foo bar".isalnum())
+
+        self.assertTrue("foobar".isalpha())
+        self.assertFalse("foobar123".isalpha())
+        self.assertFalse("foo bar".isalpha())
+
         methods = [
-            "expandtabs", "find", "format", "index", "isalnum", "isalpha",
             "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
             "istitle", "isupper", "join", "ljust", "lstrip", "partition",
             "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",

From 5a0a00ba98f0edde985239cc4717e70c0d37c618 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Mar 2013 20:29:34 -0500
Subject: [PATCH 080/180] Change the way verify_safe() handles template params
 (#25).

- Newlines are now allowed in template param names.
- Changes also affect handling of arguments like {{{foo}}}.
- Update unit tests: remove some unnecessary ones, and add some to cover the changes.
- Update StringMixIn tests to actually work for some of the methods.
- Update copyright notices for the C extensions.
---
 mwparserfromhell/parser/tokenizer.c  |   2 +-
 mwparserfromhell/parser/tokenizer.h  |   2 +-
 mwparserfromhell/parser/tokenizer.py |  70 ++---
 tests/test_string_mixin.py           | 166 ++++++-----
 tests/tokenizer/templates.mwtest     | 540 +++++++----------------------------
 5 files changed, 235 insertions(+), 545 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d82b080..6716698 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1,6 +1,6 @@
 /*
 Tokenizer for MWParserFromHell
-Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index af86321..8d51013 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -1,6 +1,6 @@
 /*
 Tokenizer Header File for MWParserFromHell
-Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
+Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index a365db8..67638ca 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -396,34 +396,42 @@ class Tokenizer(object):
         """Make sure we are not trying to write an invalid character."""
         context = self._context
         if context & contexts.FAIL_NEXT:
-            self._fail_route()
+            return False
         if context & contexts.WIKILINK_TITLE:
             if this == "]" or this == "{":
                 self._context |= contexts.FAIL_NEXT
             elif this == "\n" or this == "[" or this == "}":
-                self._fail_route()
-            return
+                return False
+            return True
         if context & contexts.TEMPLATE_NAME:
             if this == "{" or this == "}" or this == "[":
                 self._context |= contexts.FAIL_NEXT
-                return
+                return True
             if this == "]":
-                self._fail_route()
-                return
+                return False
             if this == "|":
-                return
-        elif context & (contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME):
+                return True
+            if context & contexts.HAS_TEXT:
+                if context & contexts.FAIL_ON_TEXT:
+                    if this is self.END or not this.isspace():
+                        return False
+                else:
+                    if this == "\n":
+                        self._context |= contexts.FAIL_ON_TEXT
+            elif this is not self.END or not this.isspace():
+                self._context |= contexts.HAS_TEXT
+            return True
+        else:
             if context & contexts.FAIL_ON_EQUALS:
                 if this == "=":
-                    self._fail_route()
-                    return
+                    return False
             elif context & contexts.FAIL_ON_LBRACE:
-                if this == "{":
+                if this == "{" or (self._read(-1) == self._read(-2) == "{"):
                     if context & contexts.TEMPLATE:
                         self._context |= contexts.FAIL_ON_EQUALS
                     else:
                         self._context |= contexts.FAIL_NEXT
-                    return
+                    return True
                 self._context ^= contexts.FAIL_ON_LBRACE
             elif context & contexts.FAIL_ON_RBRACE:
                 if this == "}":
@@ -431,40 +439,13 @@ class Tokenizer(object):
                         self._context |= contexts.FAIL_ON_EQUALS
                     else:
                         self._context |= contexts.FAIL_NEXT
-                    return
+                    return True
                 self._context ^= contexts.FAIL_ON_RBRACE
             elif this == "{":
                 self._context |= contexts.FAIL_ON_LBRACE
             elif this == "}":
                 self._context |= contexts.FAIL_ON_RBRACE
-        if context & contexts.HAS_TEXT:
-            if context & contexts.FAIL_ON_TEXT:
-                if this is self.END or not this.isspace():
-                    if context & contexts.TEMPLATE_PARAM_KEY:
-                        self._context ^= contexts.FAIL_ON_TEXT
-                        self._context |= contexts.FAIL_ON_EQUALS
-                    else:
-                        self._fail_route()
-                    return
-            else:
-                if this == "\n":
-                    self._context |= contexts.FAIL_ON_TEXT
-        elif this is self.END or not this.isspace():
-            self._context |= contexts.HAS_TEXT
-
-    def _reset_safety_checks(self):
-        """Unset any safety-checking contexts set by Tokenizer_verify_safe().
-
-        Used when we preserve a context but previous data becomes invalid, like
-        when moving between template parameters.
-        """
-        context = self._context
-        checks = (contexts.HAS_TEXT, contexts.FAIL_ON_TEXT, contexts.FAIL_NEXT,
-                  contexts.FAIL_ON_LBRACE, contexts.FAIL_ON_RBRACE,
-                  contexts.FAIL_ON_EQUALS)
-        for check in checks:
-            if context & check:
-                self._context ^= check;
+            return True
 
     def _parse(self, context=0):
         """Parse the wikicode string, using *context* for when to stop."""
@@ -474,12 +455,10 @@ class Tokenizer(object):
             unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE |
                       contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME)
             if self._context & unsafe:
-                try:
-                    self._verify_safe(this)
-                except BadRoute:
+                if not self._verify_safe(this):
                     if self._context & contexts.TEMPLATE_PARAM_KEY:
                         self._pop()
-                    raise
+                    self._fail_route()
             if this not in self.MARKERS:
                 self._write_text(this)
                 self._head += 1
@@ -504,7 +483,6 @@ class Tokenizer(object):
                 if self._context & contexts.FAIL_NEXT:
                     self._context ^= contexts.FAIL_NEXT
             elif this == "|" and self._context & contexts.TEMPLATE:
-                self._reset_safety_checks()
                 self._handle_template_param()
             elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
                 self._handle_template_param_value()
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 4e4fa68..43a9e9a 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -42,12 +42,14 @@ class TestStringMixIn(unittest.TestCase):
         methods = [
             "capitalize", "center", "count", "encode", "endswith",
             "expandtabs", "find", "format", "index", "isalnum", "isalpha",
-            "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
-            "istitle", "isupper", "join", "ljust", "lstrip", "partition",
-            "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",
-            "rstrip", "split", "splitlines", "startswith", "strip", "swapcase",
-            "title", "translate", "upper", "zfill"]
-        if not py3k:
+            "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper",
+            "join", "ljust", "lstrip", "partition", "replace", "rfind",
+            "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split",
+            "splitlines", "startswith", "strip", "swapcase", "title",
+            "translate", "upper", "zfill"]
+        if py3k:
+            methods.append("isdecimal")
+        else:
             methods.append("decode")
         for meth in methods:
             expected = getattr(str, meth).__doc__
@@ -157,75 +159,107 @@ class TestStringMixIn(unittest.TestCase):
 
     def test_other_methods(self):
         """test the remaining non-magic methods of StringMixIn"""
-        fstr = _FakeString("fake string")
-
-        self.assertEquals("Fake string", fstr.capitalize())
+        str1 = _FakeString("fake string")
+        self.assertEquals("Fake string", str1.capitalize())
 
-        self.assertEquals("  fake string  ", fstr.center(15))
-        self.assertEquals("  fake string   ", fstr.center(16))
-        self.assertEquals("qqfake stringqq", fstr.center(15, "q"))
+        self.assertEquals("  fake string  ", str1.center(15))
+        self.assertEquals("  fake string   ", str1.center(16))
+        self.assertEquals("qqfake stringqq", str1.center(15, "q"))
 
-        self.assertEquals(1, fstr.count("e"))
-        self.assertEquals(0, fstr.count("z"))
-        self.assertEquals(1, fstr.count("r", 7))
-        self.assertEquals(0, fstr.count("r", 8))
-        self.assertEquals(1, fstr.count("r", 5, 9))
-        self.assertEquals(0, fstr.count("r", 5, 7))
+        self.assertEquals(1, str1.count("e"))
+        self.assertEquals(0, str1.count("z"))
+        self.assertEquals(1, str1.count("r", 7))
+        self.assertEquals(0, str1.count("r", 8))
+        self.assertEquals(1, str1.count("r", 5, 9))
+        self.assertEquals(0, str1.count("r", 5, 7))
 
         if not py3k:
-            self.assertEquals(fstr, fstr.decode())
-            actual = '\\U00010332\\U0001033f\\U00010344'
+            str2 = _FakeString("fo")
+            self.assertEquals(str1, str1.decode())
+            actual = _FakeString("\\U00010332\\U0001033f\\U00010344")
             self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape"))
-            self.assertEquals("𐌲", '\\U00010332'.decode("unicode_escape"))
-            self.assertRaises(UnicodeError, "fo".decode, "punycode")
-            self.assertEquals("", "fo".decode("punycode", "ignore"))
+            self.assertRaises(UnicodeError, str2.decode, "punycode")
+            self.assertEquals("", str2.decode("punycode", "ignore"))
 
-        self.assertEquals(b"fake string", fstr.encode())
+        str3 = _FakeString("𐌲𐌿𐍄")
+        self.assertEquals(b"fake string", str1.encode())
         self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
-                          "𐌲𐌿𐍄".encode("utf8"))
-        self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode)
-        self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii")
-        self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii", "strict")
-        self.assertEquals("", "𐌲𐌿𐍄".encode("ascii", "ignore"))
-
-        self.assertTrue(fstr.endswith("ing"))
-        self.assertFalse(fstr.endswith("ingh"))
-
-        self.assertEquals("fake string", fstr)
-        self.assertEquals("        foobar", "\tfoobar".expandtabs())
-        self.assertEquals("    foobar", "\tfoobar".expandtabs(4))
-
-        self.assertEquals(3, fstr.find("e"))
-        self.assertEquals(-1, fstr.find("z"))
-        self.assertEquals(7, fstr.find("r", 7))
-        self.assertEquals(-1, fstr.find("r", 8))
-        self.assertEquals(7, fstr.find("r", 5, 9))
-        self.assertEquals(-1, fstr.find("r", 5, 7))
-
-        self.assertEquals("fake string", fstr.format())
-        self.assertEquals("foobarbaz", "foo{0}baz".format("bar"))
-        self.assertEquals("foobarbaz", "foo{abc}baz".format(abc="bar"))
-        self.assertEquals("foobarbazbuzz",
-                          "foo{0}{abc}buzz".format("bar", abc="baz"))
-        self.assertRaises(IndexError, "{0}{1}".format, "abc")
-
-        self.assertEquals(3, fstr.index("e"))
-        self.assertRaises(ValueError, fstr.index, "z")
-        self.assertEquals(7, fstr.index("r", 7))
-        self.assertRaises(ValueError, fstr.index, "r", 8)
-        self.assertEquals(7, fstr.index("r", 5, 9))
-        self.assertRaises(ValueError, fstr.index, "r", 5, 7)
-
-        self.assertTrue("foobar".isalnum())
-        self.assertTrue("foobar123".isalnum())
-        self.assertFalse("foo bar".isalnum())
-
-        self.assertTrue("foobar".isalpha())
-        self.assertFalse("foobar123".isalpha())
-        self.assertFalse("foo bar".isalpha())
+                          str3.encode("utf8"))
+        self.assertRaises(UnicodeEncodeError, str3.encode)
+        self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
+        self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
+        self.assertEquals("", str3.encode("ascii", "ignore"))
+
+        self.assertTrue(str1.endswith("ing"))
+        self.assertFalse(str1.endswith("ingh"))
+
+        str4 = _FakeString("\tfoobar")
+        self.assertEquals("fake string", str1)
+        self.assertEquals("        foobar", str4.expandtabs())
+        self.assertEquals("    foobar", str4.expandtabs(4))
+
+        self.assertEquals(3, str1.find("e"))
+        self.assertEquals(-1, str1.find("z"))
+        self.assertEquals(7, str1.find("r", 7))
+        self.assertEquals(-1, str1.find("r", 8))
+        self.assertEquals(7, str1.find("r", 5, 9))
+        self.assertEquals(-1, str1.find("r", 5, 7))
+
+        str5 = _FakeString("foo{0}baz")
+        str6 = _FakeString("foo{abc}baz")
+        str7 = _FakeString("foo{0}{abc}buzz")
+        str8 = _FakeString("{0}{1}")
+        self.assertEquals("fake string", str1.format())
+        self.assertEquals("foobarbaz", str5.format("bar"))
+        self.assertEquals("foobarbaz", str6.format(abc="bar"))
+        self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz"))
+        self.assertRaises(IndexError, str8.format, "abc")
+
+        self.assertEquals(3, str1.index("e"))
+        self.assertRaises(ValueError, str1.index, "z")
+        self.assertEquals(7, str1.index("r", 7))
+        self.assertRaises(ValueError, str1.index, "r", 8)
+        self.assertEquals(7, str1.index("r", 5, 9))
+        self.assertRaises(ValueError, str1.index, "r", 5, 7)
+
+        str9 = _FakeString("foobar")
+        str10 = _FakeString("foobar123")
+        str11 = _FakeString("foo bar")
+        self.assertTrue(str9.isalnum())
+        self.assertTrue(str10.isalnum())
+        self.assertFalse(str11.isalnum())
+
+        self.assertTrue(str9.isalpha())
+        self.assertFalse(str10.isalpha())
+        self.assertFalse(str11.isalpha())
+
+        str12 = _FakeString("123")
+        str13 = _FakeString("\u2155")
+        str14 = _FakeString("\u00B2")
+        if py3k:
+            self.assertFalse(str9.isdecimal())
+            self.assertTrue(str12.isdecimal())
+            self.assertFalse(str13.isdecimal())
+            self.assertFalse(str14.isdecimal())
+
+        self.assertFalse(str9.isdigit())
+        self.assertTrue(str12.isdigit())
+        self.assertFalse(str13.isdigit())
+        self.assertTrue(str14.isdigit())
+
+        str15 = _FakeString("")
+        str16 = _FakeString("FooBar")
+        self.assertTrue(str9.islower())
+        self.assertFalse(str15.islower())
+        self.assertFalse(str16.islower())
+
+        self.assertFalse(str9.isnumeric())
+        self.assertTrue(str12.isnumeric())
+        self.assertTrue(str13.isnumeric())
+        self.assertTrue(str14.isnumeric())
 
         methods = [
-            "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
+            "isspace",
             "istitle", "isupper", "join", "ljust", "lstrip", "partition",
             "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",
             "rstrip", "split", "splitlines", "startswith", "strip", "swapcase",
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index d699ef2..fa3c0a4 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -215,521 +215,150 @@ output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(te
 
 ---
 
-name:   newline_start
+name:   newlines_start
 label:  a newline at the start of a template name
 input:  "{{\nfoobar}}"
 output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()]
 
 ---
 
-name:   newline_end
+name:   newlines_end
 label:  a newline at the end of a template name
 input:  "{{foobar\n}}"
 output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()]
 
 ---
 
-name:   newline_start_end
+name:   newlines_start_end
 label:  a newline at the start and end of a template name
 input:  "{{\nfoobar\n}}"
 output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()]
 
 ---
 
-name:   newline_mid
+name:   newlines_mid
 label:  a newline at the middle of a template name
 input:  "{{foo\nbar}}"
 output: [Text(text="{{foo\nbar}}")]
 
 ---
 
-name:   newline_start_mid
+name:   newlines_start_mid
 label:  a newline at the start and middle of a template name
 input:  "{{\nfoo\nbar}}"
 output: [Text(text="{{\nfoo\nbar}}")]
 
 ---
 
-name:   newline_mid_end
+name:   newlines_mid_end
 label:  a newline at the middle and end of a template name
 input:  "{{foo\nbar\n}}"
 output: [Text(text="{{foo\nbar\n}}")]
 
 ---
 
-name:   newline_start_mid_end
+name:   newlines_start_mid_end
 label:  a newline at the start, middle, and end of a template name
 input:  "{{\nfoo\nbar\n}}"
 output: [Text(text="{{\nfoo\nbar\n}}")]
 
 ---
 
-name:   newline_unnamed_param_start
-label:  a newline at the start of an unnamed template parameter
-input:  "{{foo|\nbar}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
-
----
-
-name:   newline_unnamed_param_end
-label:  a newline at the end of an unnamed template parameter
-input:  "{{foo|bar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
-
----
-
-name:   newline_unnamed_param_start_end
-label:  a newline at the start and end of an unnamed template parameter
-input:  "{{foo|\nbar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
-
----
-
-name:   newline_unnamed_param_start_mid
-label:  a newline at the start and middle of an unnamed template parameter
-input:  "{{foo|\nb\nar}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
-
----
-
-name:   newline_unnamed_param_mid_end
-label:  a newline at the middle and end of an unnamed template parameter
-input:  "{{foo|b\nar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
-
----
-
-name:   newline_unnamed_param_start_mid_end
-label:  a newline at the start, middle, and end of an unnamed template parameter
+name:   newlines_unnamed_param
+label:  newlines within an unnamed template parameter
 input:  "{{foo|\nb\nar\n}}"
 output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
 
 ---
 
-name:   newline_start_unnamed_param_start
-label:  a newline at the start of a template name and at the start of an unnamed template parameter
-input:  "{{\nfoo|\nbar}}"
-output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
-
----
-
-name:   newline_start_unnamed_param_end
-label:  a newline at the start of a template name and at the end of an unnamed template parameter
-input:  "{{\nfoo|bar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
-
----
-
-name:   newline_start_unnamed_param_start_end
-label:  a newline at the start of a template name and at the start and end of an unnamed template parameter
-input:  "{{\nfoo|\nbar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
-
----
-
-name:   newline_start_unnamed_param_start_mid
-label:  a newline at the start of a template name and at the start and middle of an unnamed template parameter
-input:  "{{\nfoo|\nb\nar}}"
-output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
-
----
-
-name:   newline_start_unnamed_param_mid_end
-label:  a newline at the start of a template name and at the middle and end of an unnamed template parameter
-input:  "{{\nfoo|b\nar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
-
----
-
-name:   newline_start_unnamed_param_start_mid_end
-label:  a newline at the start of a template name and at the start, middle, and end of an unnamed template parameter
-input:  "{{\nfoo|\nb\nar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
-
----
-
-name:   newline_end_unnamed_param_start
-label:  a newline at the end of a template name and at the start of an unnamed template parameter
-input:  "{{foo\n|\nbar}}"
-output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
-
----
-
-name:   newline_end_unnamed_param_end
-label:  a newline at the end of a template name and at the end of an unnamed template parameter
-input:  "{{foo\n|bar\n}}"
-output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
-
----
-
-name:   newline_end_unnamed_param_start_end
-label:  a newline at the end of a template name and at the start and end of an unnamed template parameter
-input:  "{{foo\n|\nbar\n}}"
-output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
-
----
-
-name:   newline_end_unnamed_param_start_mid
-label:  a newline at the end of a template name and at the start and middle of an unnamed template parameter
-input:  "{{foo\n|\nb\nar}}"
-output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
-
----
-
-name:   newline_end_unnamed_param_mid_end
-label:  a newline at the end of a template name and at the middle and end of an unnamed template parameter
-input:  "{{foo\n|b\nar\n}}"
-output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
-
----
-
-name:   newline_end_unnamed_param_start_mid_end
-label:  a newline at the end of a template name and at the start, middle, and end of an unnamed template parameter
-input:  "{{foo\n|\nb\nar\n}}"
-output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
-
----
-
-name:   newline_start_end_unnamed_param_end
-label:  a newline at the start and end of a template name and the start of an unnamed template parameter
-input:  "{{\nfoo\n|\nbar}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()]
-
----
-
-name:   newline_start_end_unnamed_param_end
-label:  a newline at the start and end of a template name and the end of an unnamed template parameter
-input:  "{{\nfoo\n|bar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()]
-
----
-
-name:   newline_start_end_unnamed_param_start_end
-label:  a newline at the start and end of a template name and the start and end of an unnamed template parameter
-input:  "{{\nfoo\n|\nbar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()]
-
----
-
-name:   newline_start_end_unnamed_param_start_mid
-label:  a newline at the start and end of a template name and the start and middle of an unnamed template parameter
-input:  "{{\nfoo\n|\nb\nar}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()]
-
----
-
-name:   newline_start_end_unnamed_param_mid_end
-label:  a newline at the start and end of a template name and the middle and end of an unnamed template parameter
-input:  "{{\nfoo\n|b\nar\n}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()]
-
----
-
-name:   newline_start_end_unnamed_param_start_mid_end
-label:  a newline at the start and end of a template name and the start, middle, and end of an unnamed template parameter
+name:   newlines_enclose_template_name_unnamed_param
+label:  newlines enclosing a template name and within an unnamed template parameter
 input:  "{{\nfoo\n|\nb\nar\n}}"
 output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]
 
 ---
 
-name:   newline_mid_unnamed_param_start
-label:  a newline at the middle of a template name and at the start of an unnamed template parameter
-input:  "{{f\noo|\nbar}}"
-output: [Text(text="{{f\noo|\nbar}}")]
-
----
-
-name:   newline_start_mid_unnamed_param_start
-label:  a newline at the start and middle of a template name and at the start of an unnamed template parameter
-input:  "{{\nf\noo|\nbar}}"
-output: [Text(text="{{\nf\noo|\nbar}}")]
-
----
-
-name:   newline_start_end_unnamed_param_start
-label:  a newline at the middle and of a template name and at the start of an unnamed template parameter
-input:  "{{f\noo\n|\nbar}}"
-output: [Text(text="{{f\noo\n|\nbar}}")]
-
----
-
-name:   newline_start_mid_end_unnamed_param_start
-label:  a newline at the start, middle, and end of a template name and at the start of an unnamed template parameter
-input:  "{{\nf\noo\n|\nbar}}"
-output: [Text(text="{{\nf\noo\n|\nbar}}")]
-
----
-
-name:   newline_named_param_value_start
-label:  a newline at the start of a named parameter value
-input:  "{{foo|1=\nbar}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar"), TemplateClose()]
-
----
-
-name:   newline_named_param_value_end
-label:  a newline at the end of a named parameter value
-input:  "{{foo|1=bar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="bar\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_value_start_end
-label:  a newline at the start and end of a named parameter value
-input:  "{{foo|1=\nbar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_value_start_mid
-label:  a newline at the start and middle of a named parameter value
-input:  "{{foo|1=\nb\nar}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar"), TemplateClose()]
-
----
-
-name:   newline_named_param_value_mid_end
-label:  a newline at the middle and end of a named parameter value
-input:  "{{foo|1=b\nar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="b\nar\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_value_start_mid_end
-label:  a newline at the start, middle, and end of a named parameter value
-input:  "{{foo|1=\nb\nar\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start
-label:  a newline at the start of a parameter name
-input:  "{{foo|\nbar=baz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_end
-label:  a newline at the end of a parameter name
-input:  "{{foo|bar\n=baz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_end
-label:  a newline at the start and end of a parameter name
-input:  "{{foo|\nbar\n=baz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+name:   newlines_within_template_name_unnamed_param
+label:  newlines within a template name and within an unnamed template parameter
+input:  "{{\nfo\no\n|\nb\nar\n}}"
+output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")]
 
 ---
 
-name:   newline_named_param_name_mid
-label:  a newline at the middle of a parameter name
-input:  "{{foo|b\nar=baz}}"
-output: [Text(text="{{foo|b\nar=baz}}")]
+name:   newlines_enclose_template_name_named_param_value
+label:  newlines enclosing a template name and within a named parameter value
+input:  "{{\nfoo\n|1=\nb\nar\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()]
 
 ---
 
-name:   newline_named_param_name_start_mid
-label:  a newline at the start and middle of a parameter name
-input:  "{{foo|\nb\nar=baz}}"
-output: [Text(text="{{foo|\nb\nar=baz}}")]
+name:   newlines_within_template_name_named_param_value
+label:  newlines within a template name and within a named parameter value
+input:  "{{\nf\noo\n|1=\nb\nar\n}}"
+output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")]
 
 ---
 
-name:   newline_named_param_name_mid_end
-label:  a newline at the middle and end of a parameter name
-input:  "{{foo|b\nar\n=baz}}"
-output: [Text(text="{{foo|b\nar\n=baz}}")]
-
----
-
-name:   newline_named_param_name_start_mid_end
-label:  a newline at the start, middle, and end of a parameter name
+name:   newlines_named_param_name
+label:  newlines within a parameter name
 input:  "{{foo|\nb\nar\n=baz}}"
-output: [Text(text="{{foo|\nb\nar\n=baz}}")]
-
----
-
-name:   newline_named_param_name_start_param_value_end
-label:  a newline at the start of a parameter name and the end of a parameter value
-input:  "{{foo|\nbar=baz\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_end_param_value_end
-label:  a newline at the end of a parameter name and the end of a parameter value
-input:  "{{foo|bar\n=baz\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_end_param_value_end
-label:  a newline at the start and end of a parameter name and the end of a parameter value
-input:  "{{foo|\nbar\n=baz\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_mid_param_value_end
-label:  a newline at the start and middle of a parameter name and the end of a parameter value
-input:  "{{foo|\nb\nar=baz\n}}"
-output: [Text(text="{{foo|\nb\nar=baz\n}}")]
-
----
-
-name:   newline_named_param_name_mid_end_param_value_end
-label:  a newline at the middle and end of a parameter name and the end of a parameter value
-input:  "{{foo|b\nar\n=baz\n}}"
-output: [Text(text="{{foo|b\nar\n=baz\n}}")]
-
----
-
-name:   newline_named_param_name_start_mid_end_param_value_end
-label:  a newline at the start, middle, and end of a parameter name and at the end of a parameter value
-input:  "{{foo|\nb\nar\n=baz\n}}"
-output: [Text(text="{{foo|\nb\nar\n=baz\n}}")]
-
----
-
-name:   newline_named_param_name_start_param_value_start
-label:  a newline at the start of a parameter name and at the start of a parameter value
-input:  "{{foo|\nbar=\nbaz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_end_param_value_start
-label:  a newline at the end of a parameter name and at the start of a parameter value
-input:  "{{foo|bar\n=\nbaz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_end_param_value_start
-label:  a newline at the start and end of a parameter name and at the start of a parameter value
-input:  "{{foo|\nbar\n=\nbaz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_mid_param_value_start
-label:  a newline at the start and middle of a parameter name and at the start of a parameter value
-input:  "{{foo|\nb\nar=\nbaz}}"
-output: [Text(text="{{foo|\nb\nar=\nbaz}}")]
-
----
-
-name:   newline_named_param_name_mid_end_param_value_start
-label:  a newline at the middle and end of a parameter name and at the start of a parameter value
-input:  "{{foo|b\nar\n=\nbaz}}"
-output: [Text(text="{{foo|b\nar\n=\nbaz}}")]
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
 
 ---
 
-name:   newline_named_param_name_start_mid_end_param_value_start
-label:  a newline at the start, middle, and end of a parameter name and at the start of a parameter value
-input:  "{{foo|\nb\nar\n=\nbaz}}"
-output: [Text(text="{{foo|\nb\nar\n=\nbaz}}")]
+name:   newlines_named_param_name_param_value
+label:  newlines within a parameter name and within a parameter value
+input:  "{{foo|\nb\nar\n=\nba\nz\n}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()]
 
 ---
 
-name:   newline_named_param_name_start_param_value_start_end
-label:  a newline at the start of a parameter name and at the start and end of a parameter value
-input:  "{{foo|\nbar=\nbaz\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()]
+name:   newlines_enclose_template_name_named_param_name
+label:  newlines enclosing a template name and within a parameter name
+input:  "{{\nfoo\n|\nb\nar\n=baz}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
 
 ---
 
-name:   newline_named_param_name_end_param_value_start_end
-label:  a newline at the end of a parameter name and at the start and end of a parameter value
-input:  "{{foo|bar\n=\nbaz\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()]
+name:   newlines_enclose_template_name_named_param_name_param_value
+label:  newlines enclosing a template name and within a parameter name and within a parameter value
+input:  "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()]
 
 ---
 
-name:   newline_named_param_name_start_end_param_value_start_end
-label:  a newline at the start and end of a parameter name and at the start and end of a parameter value
-input:  "{{foo|\nbar\n=\nbaz\n}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()]
+name:   newlines_within_template_name_named_param_name
+label:  newlines within a template name and within a parameter name
+input:  "{{\nfo\no\n|\nb\nar\n=baz}}"
+output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")]
 
 ---
 
-name:   newline_named_param_name_start_mid_param_value_start_end
-label:  a newline at the start and middle of a parameter name and at the start and end of a parameter value
-input:  "{{foo|\nb\nar=\nbaz\n}}"
-output: [Text(text="{{foo|\nb\nar=\nbaz\n}}")]
+name:   newlines_within_template_name_named_param_name_param_value
+label:  newlines within a template name and within a parameter name and within a parameter value
+input:  "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}"
+output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")]
 
 ---
 
-name:   newline_named_param_name_mid_end_param_value_start_end
-label:  a newline at the middle and end of a parameter name and at the start and end of a parameter value
-input:  "{{foo|b\nar\n=\nbaz\n}}"
-output: [Text(text="{{foo|b\nar\n=\nbaz\n}}")]
-
----
-
-name:   newline_named_param_name_start_mid_end_param_value_start_end
-label:  a newline at the start, middle, and end of a parameter name and at the start and end of a parameter value
-input:  "{{foo|\nb\nar\n=\nbaz\n}}"
-output: [Text(text="{{foo|\nb\nar\n=\nbaz\n}}")]
-
----
-
-name:   newline_named_param_name_start_param_value_mid
-label:  a newline at the start of a parameter name and at the middle of a parameter value
-input:  "{{foo|\nbar=ba\nz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_end_param_value_mid
-label:  a newline at the end of a parameter name and at the middle of a parameter value
-input:  "{{foo|bar\n=ba\nz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_end_param_value_mid
-label:  a newline at the start and end of a parameter name and at the middle of a parameter value
-input:  "{{foo|\nbar\n=ba\nz}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()]
-
----
-
-name:   newline_named_param_name_start_mid_param_value_mid
-label:  a newline at the start and middle of a parameter name and at the middle of a parameter value
-input:  "{{foo|\nb\nar=ba\nz}}"
-output: [Text(text="{{foo|\nb\nar=ba\nz}}")]
-
----
-
-name:   newline_named_param_name_mid_end_param_value_mid
-label:  a newline at the middle and end of a parameter name and at the middle of a parameter value
-input:  "{{foo|b\nar\n=ba\nz}}"
-output: [Text(text="{{foo|b\nar\n=ba\nz}}")]
-
----
-
-name:   newline_named_param_start_mid_end_param_value_mid
-label:  a newline at the start, middle, and end of a parameter name and at the middle of a parameter value
-input:  "{{foo|\nb\nar\n=ba\nz}}"
-output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")]
+name:   newlines_wildcard
+label:  a random, complex assortment of templates and newlines
+input:  "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()]
 
 ---
 
-name:   newline_wildcard
-label:  a random, complex assortment of templates and newlines
-input:  "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()]
+name:   newlines_wildcard_redux
+label:  an even more random and complex assortment of templates and newlines
+input:  "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
+output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()]
 
 ---
 
-name:   newline_wildcard_redux
-label:  an even more random and complex assortment of templates and newlines
-input:  "{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
-output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nbaz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()]
+name:   newlines_wildcard_redux_invalid
+label:  a variation of the newlines_wildcard_redux test that is invalid
+input:  "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
+output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")]
 
 ---
 
@@ -812,8 +441,43 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="
 
 name:   valid_param_name_brackets
 label:  valid characters in unnamed template parameter: left and right brackets
-input:  "{{foo|ba[r]}}"
-output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateClose()]
+input:  "{{foo|ba[r]=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   valid_param_name_double_left_brackets
+label:  valid characters in unnamed template parameter: double left brackets
+input:  "{{foo|bar[[in\nvalid=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   valid_param_name_double_right_brackets
+label:  valid characters in unnamed template parameter: double right brackets
+input:  "{{foo|bar]]=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   valid_param_name_double_brackets
+label:  valid characters in unnamed template parameter: double left and right brackets
+input:  "{{foo|bar[[in\nvalid]]=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   invalid_param_name_double_left_braces
+label:  invalid characters in template parameter name: double left braces
+input:  "{{foo|bar{{in\nvalid=baz}}"
+output: [Text(text="{{foo|bar{{in\nvalid=baz}}")]
+
+---
+
+name:   invalid_param_name_double_braces
+label:  invalid characters in template parameter name: double left and right braces
+input:  "{{foo|bar{{in\nvalid}}=baz}}"
+output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")]
 
 ---
 
@@ -919,3 +583,17 @@ name:   incomplete_two_named_params
 label:  incomplete templates that should fail gracefully: two named parameters with values
 input:  "{{stuff}} {{foo|bar=baz|biz=buzz"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")]
+
+---
+
+name:   incomplete_nested_template_as_unnamed_param
+label:  incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter
+input:  "{{stuff}} {{foo|{{bar}}"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()]
+
+---
+
+name:   incomplete_nested_template_as_param_value
+label:  incomplete templates that should fail gracefully: a valid nested template as a parameter value
+input:  "{{stuff}} {{foo|bar={{baz}}"
+output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()]

From 718fcb24c86415a5ec4f597d63dbe71ce3a49fea Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 8 Mar 2013 23:08:49 -0500
Subject: [PATCH 081/180] Fix eight failing tests; all template parsing tests
 now passing (#25).

---
 mwparserfromhell/parser/tokenizer.c | 90 +++++++++++++------------------------
 mwparserfromhell/parser/tokenizer.h |  3 +-
 2 files changed, 31 insertions(+), 62 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 6716698..3e6527a 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1135,48 +1135,59 @@ Tokenizer_parse_comment(Tokenizer* self)
 }
 
 /*
-    Make sure we are not trying to write an invalid character.
+    Make sure we are not trying to write an invalid character. Return 0 if
+    everything is safe, or -1 if the route must be failed.
 */
-static void
+static int
 Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 {
     if (context & LC_FAIL_NEXT) {
-        Tokenizer_fail_route(self);
-        return;
+        return -1;
     }
     if (context & LC_WIKILINK_TITLE) {
         if (data == *"]" || data == *"{")
             self->topstack->context |= LC_FAIL_NEXT;
         else if (data == *"\n" || data == *"[" || data == *"}")
-            Tokenizer_fail_route(self);
-        return;
+            return -1;
+        return 0;
     }
     if (context & LC_TEMPLATE_NAME) {
         if (data == *"{" || data == *"}" || data == *"[") {
             self->topstack->context |= LC_FAIL_NEXT;
-            return;
+            return 0;
         }
         if (data == *"]") {
-            Tokenizer_fail_route(self);
-            return;
+            return -1;
         }
         if (data == *"|")
-            return;
+            return 0;
+
+        if (context & LC_HAS_TEXT) {
+            if (context & LC_FAIL_ON_TEXT) {
+                if (!Py_UNICODE_ISSPACE(data))
+                    return -1;
+            }
+            else {
+                if (data == *"\n")
+                    self->topstack->context |= LC_FAIL_ON_TEXT;
+            }
+        }
+        else if (!Py_UNICODE_ISSPACE(data))
+            self->topstack->context |= LC_HAS_TEXT;
     }
-    else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
+    else {
         if (context & LC_FAIL_ON_EQUALS) {
             if (data == *"=") {
-                Tokenizer_fail_route(self);
-                return;
+                return -1;
             }
         }
         else if (context & LC_FAIL_ON_LBRACE) {
-            if (data == *"{") {
+            if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && Tokenizer_READ(self, -2) == *"{")) {
                 if (context & LC_TEMPLATE)
                     self->topstack->context |= LC_FAIL_ON_EQUALS;
                 else
                     self->topstack->context |= LC_FAIL_NEXT;
-                return;
+                return 0;
             }
             self->topstack->context ^= LC_FAIL_ON_LBRACE;
         }
@@ -1186,7 +1197,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
                     self->topstack->context |= LC_FAIL_ON_EQUALS;
                 else
                     self->topstack->context |= LC_FAIL_NEXT;
-                return;
+                return 0;
             }
             self->topstack->context ^= LC_FAIL_ON_RBRACE;
         }
@@ -1195,47 +1206,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         else if (data == *"}")
             self->topstack->context |= LC_FAIL_ON_RBRACE;
     }
-    if (context & LC_HAS_TEXT) {
-        if (context & LC_FAIL_ON_TEXT) {
-            if (!Py_UNICODE_ISSPACE(data)) {
-                if (context & LC_TEMPLATE_PARAM_KEY) {
-                    self->topstack->context ^= LC_FAIL_ON_TEXT;
-                    self->topstack->context |= LC_FAIL_ON_EQUALS;
-                }
-                else
-                    Tokenizer_fail_route(self);
-                return;
-            }
-        }
-        else {
-            if (data == *"\n")
-                self->topstack->context |= LC_FAIL_ON_TEXT;
-        }
-    }
-    else if (!Py_UNICODE_ISSPACE(data))
-        self->topstack->context |= LC_HAS_TEXT;
-}
-
-/*
-    Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used
-    when we preserve a context but previous data becomes invalid, like when
-    moving between template parameters.
-*/
-static void
-Tokenizer_reset_safety_checks(Tokenizer* self)
-{
-    static int checks[] = {
-        LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE,
-        LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0};
-    int context = self->topstack->context, i = 0, this;
-    while (1) {
-        this = checks[i];
-        if (!this)
-            return;
-        if (context & this)
-            self->topstack->context ^= this;
-        i++;
-    }
+    return 0;
 }
 
 /*
@@ -1258,12 +1229,12 @@ Tokenizer_parse(Tokenizer* self, int context)
         this = Tokenizer_READ(self, 0);
         this_context = self->topstack->context;
         if (this_context & unsafe_contexts) {
-            Tokenizer_verify_safe(self, this_context, this);
-            if (BAD_ROUTE) {
+            if (Tokenizer_verify_safe(self, this_context, this) < 0) {
                 if (this_context & LC_TEMPLATE_PARAM_KEY) {
                     trash = Tokenizer_pop(self);
                     Py_XDECREF(trash);
                 }
+                Tokenizer_fail_route(self);
                 return NULL;
             }
         }
@@ -1303,7 +1274,6 @@ Tokenizer_parse(Tokenizer* self, int context)
                 self->topstack->context ^= LC_FAIL_NEXT;
         }
         else if (this == *"|" && this_context & LC_TEMPLATE) {
-            Tokenizer_reset_safety_checks(self);
             if (Tokenizer_handle_template_param(self))
                 return NULL;
         }
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 8d51013..d4b42f5 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -206,8 +206,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
 static int Tokenizer_really_parse_entity(Tokenizer*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
-static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
-static void Tokenizer_reset_safety_checks(Tokenizer*);
+static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
 static PyObject* Tokenizer_parse(Tokenizer*, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 

From 054a84afe09681974d8438c70c619b3a40be59ee Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 8 Mar 2013 23:17:08 -0500
Subject: [PATCH 082/180] A bit of misc cleanup.

---
 mwparserfromhell/parser/tokenizer.c | 40 ++++++++++++++++++++-----------------
 mwparserfromhell/parser/tokenizer.h |  4 ++++
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 3e6527a..8c96500 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         self->head++;
     }
     current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
-    level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
+    level = current > best ? (best > 6 ? 6 : best) :
+                             (current > 6 ? 6 : current);
     after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
     if (BAD_ROUTE) {
         RESET_ROUTE();
@@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self)
     else
         numeric = hexadecimal = 0;
     if (hexadecimal)
-        valid = "0123456789abcdefABCDEF";
+        valid = HEXDIGITS;
     else if (numeric)
-        valid = "0123456789";
+        valid = DIGITS;
     else
-        valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+        valid = ALPHANUM;
     text = calloc(MAX_ENTITY_SIZE, sizeof(char));
     if (!text) {
         PyErr_NoMemory();
@@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         i = 0;
         while (1) {
             def = entitydefs[i];
-            if (!def)  // We've reached the end of the def list without finding it
+            if (!def)  // We've reached the end of the defs without finding it
                 FAIL_ROUTE_AND_EXIT()
             if (strcmp(text, def) == 0)
                 break;
@@ -1161,7 +1162,6 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         }
         if (data == *"|")
             return 0;
-
         if (context & LC_HAS_TEXT) {
             if (context & LC_FAIL_ON_TEXT) {
                 if (!Py_UNICODE_ISSPACE(data))
@@ -1182,7 +1182,8 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
             }
         }
         else if (context & LC_FAIL_ON_LBRACE) {
-            if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && Tokenizer_READ(self, -2) == *"{")) {
+            if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" &&
+                                 Tokenizer_READ(self, -2) == *"{")) {
                 if (context & LC_TEMPLATE)
                     self->topstack->context |= LC_FAIL_ON_EQUALS;
                 else
@@ -1375,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 PyMODINIT_FUNC
 init_tokenizer(void)
 {
-    PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname;
+    PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals,
+             *fromlist, *modname;
     unsigned numdefs, i;
     char *name;
 
@@ -1386,13 +1388,13 @@ init_tokenizer(void)
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
 
-    tempmodule = PyImport_ImportModule("htmlentitydefs");
-    if (!tempmodule)
+    tempmod = PyImport_ImportModule("htmlentitydefs");
+    if (!tempmod)
         return;
-    defmap = PyObject_GetAttrString(tempmodule, "entitydefs");
+    defmap = PyObject_GetAttrString(tempmod, "entitydefs");
     if (!defmap)
         return;
-    Py_DECREF(tempmodule);
+    Py_DECREF(tempmod);
     deflist = PyDict_Keys(defmap);
     if (!deflist)
         return;
@@ -1416,18 +1418,20 @@ init_tokenizer(void)
     if (!modname)
         return;
     PyList_SET_ITEM(fromlist, 0, modname);
-    tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
+    tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
     Py_DECREF(fromlist);
-    if (!tempmodule)
+    if (!tempmod)
         return;
-    tokens = PyObject_GetAttrString(tempmodule, "tokens");
-    Py_DECREF(tempmodule);
+    tokens = PyObject_GetAttrString(tempmod, "tokens");
+    Py_DECREF(tempmod);
 
     Text = PyObject_GetAttrString(tokens, "Text");
 
     TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen");
-    TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
-    TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals");
+    TemplateParamSeparator = PyObject_GetAttrString(tokens,
+                                                    "TemplateParamSeparator");
+    TemplateParamEquals = PyObject_GetAttrString(tokens,
+                                                 "TemplateParamEquals");
     TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose");
 
     ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen");
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index d4b42f5..693538c 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -36,6 +36,10 @@ SOFTWARE.
 #define malloc PyObject_Malloc
 #define free   PyObject_Free
 
+#define DIGITS    "0123456789"
+#define HEXDIGITS "0123456789abcdefABCDEF"
+#define ALPHANUM  "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
 static const char* MARKERS[] = {
     "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
     "!", "\n", ""};

From fb71f5507eca7bc73fae764549a7579889817cba Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 14 Mar 2013 10:51:04 -0400
Subject: [PATCH 083/180] Support a 'use_c' field to explicitly disable the C
 tokenizer.

---
 mwparserfromhell/parser/__init__.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 3f034f6..1fb95b5 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -27,12 +27,15 @@ joins them together under one interface.
 """
 
 from .builder import Builder
+from .tokenizer import Tokenizer
 try:
-    from ._tokenizer import CTokenizer as Tokenizer
+    from ._tokenizer import CTokenizer
+    use_c = True
 except ImportError:
-    from .tokenizer import Tokenizer
+    CTokenizer = None
+    use_c = False
 
-__all__ = ["Parser"]
+__all__ = ["use_c", "Parser"]
 
 class Parser(object):
     """Represents a parser for wikicode.
@@ -45,7 +48,10 @@ class Parser(object):
 
     def __init__(self, text):
         self.text = text
-        self._tokenizer = Tokenizer()
+        if use_c and CTokenizer:
+            self._tokenizer = CTokenizer()
+        else:
+            self._tokenizer = Tokenizer()
         self._builder = Builder()
 
     def parse(self):

From 57d48f1adb8969e6a210fff5c613831d5e70b5e7 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 16 Mar 2013 18:15:00 -0400
Subject: [PATCH 084/180] More tests for StringMixIns.

---
 tests/test_string_mixin.py | 78 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 72 insertions(+), 6 deletions(-)

diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 43a9e9a..c0fe99d 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -258,12 +258,78 @@ class TestStringMixIn(unittest.TestCase):
         self.assertTrue(str13.isnumeric())
         self.assertTrue(str14.isnumeric())
 
-        methods = [
-            "isspace",
-            "istitle", "isupper", "join", "ljust", "lstrip", "partition",
-            "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit",
-            "rstrip", "split", "splitlines", "startswith", "strip", "swapcase",
-            "title", "translate", "upper", "zfill"]
+        str17 = _FakeString(" ")
+        str18 = _FakeString("\t     \t \r\n")
+        self.assertFalse(str1.isspace())
+        self.assertFalse(str9.isspace())
+        self.assertTrue(str17.isspace())
+        self.assertTrue(str18.isspace())
+
+        str19 = _FakeString("This Sentence Looks Like A Title")
+        str20 = _FakeString("This sentence doesn't LookLikeATitle")
+        self.assertFalse(str15.istitle())
+        self.assertTrue(str19.istitle())
+        self.assertFalse(str20.istitle())
+
+        str21 = _FakeString("FOOBAR")
+        self.assertFalse(str9.isupper())
+        self.assertFalse(str15.isupper())
+        self.assertTrue(str21.isupper())
+
+        self.assertEquals("foobar", str15.join(["foo", "bar"]))
+        self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz")))
+
+        self.assertEquals("fake string    ", str1.ljust(15))
+        self.assertEquals("fake string     ", str1.ljust(16))
+        self.assertEquals("fake stringqqqq", str1.ljust(15, "q"))
+
+        # lstrip
+
+        # partition
+
+        # replace
+
+        self.assertEquals(3, str1.rfind("e"))
+        self.assertEquals(-1, str1.rfind("z"))
+        self.assertEquals(7, str1.rfind("r", 7))
+        self.assertEquals(-1, str1.rfind("r", 8))
+        self.assertEquals(7, str1.rfind("r", 5, 9))
+        self.assertEquals(-1, str1.rfind("r", 5, 7))
+
+        self.assertEquals(3, str1.rindex("e"))
+        self.assertRaises(ValueError, str1.rindex, "z")
+        self.assertEquals(7, str1.rindex("r", 7))
+        self.assertRaises(ValueError, str1.rindex, "r", 8)
+        self.assertEquals(7, str1.rindex("r", 5, 9))
+        self.assertRaises(ValueError, str1.rindex, "r", 5, 7)
+
+        self.assertEquals("    fake string", str1.rjust(15))
+        self.assertEquals("     fake string", str1.rjust(16))
+        self.assertEquals("qqqqfake string", str1.rjust(15, "q"))
+
+        # rpartition
+
+        # rsplit
+
+        # rstrip
+
+        # split
+
+        # splitlines
+
+        # startswith
+
+        # strip
+
+        # swapcase
+
+        # title
+
+        # translate
+
+        # upper
+
+        # zfill
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 6704bcd6621e73352b1a138c24d65e6c2ca09b56 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 16 Mar 2013 19:39:56 -0400
Subject: [PATCH 085/180] Document the remaining universal methods.

Still need to do some py3k-only ones.
---
 mwparserfromhell/string_mixin.py |  11 ++--
 tests/test_string_mixin.py       | 117 +++++++++++++++++++++++++++------------
 2 files changed, 89 insertions(+), 39 deletions(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 6490051..2539f74 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -179,10 +179,9 @@ class StringMixIn(object):
     def isalpha(self):
         return self.__unicode__().isalpha()
 
-    if py3k:
-        @inheritdoc
-        def isdecimal(self):
-            return self.__unicode__().isdecimal()
+    @inheritdoc
+    def isdecimal(self):
+        return self.__unicode__().isdecimal()
 
     @inheritdoc
     def isdigit(self):
@@ -231,7 +230,9 @@ class StringMixIn(object):
         return self.__unicode__().partition(sep)
 
     @inheritdoc
-    def replace(self, old, new, count):
+    def replace(self, old, new, count=None):
+        if count is None:
+            return self.__unicode__().replace(old, new)
         return self.__unicode__().replace(old, new, count)
 
     @inheritdoc
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index c0fe99d..455f2e6 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -42,13 +42,13 @@ class TestStringMixIn(unittest.TestCase):
         methods = [
             "capitalize", "center", "count", "encode", "endswith",
             "expandtabs", "find", "format", "index", "isalnum", "isalpha",
-            "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper",
-            "join", "ljust", "lstrip", "partition", "replace", "rfind",
-            "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split",
-            "splitlines", "startswith", "strip", "swapcase", "title",
-            "translate", "upper", "zfill"]
+            "isdecimal", "isdigit", "islower", "isnumeric", "isspace",
+            "istitle", "isupper", "join", "ljust", "lower", "lstrip",
+            "partition", "replace", "rfind", "rindex", "rjust", "rpartition",
+            "rsplit", "rstrip", "split", "splitlines", "startswith", "strip",
+            "swapcase", "title", "translate", "upper", "zfill"]
         if py3k:
-            methods.append("isdecimal")
+            methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"])
         else:
             methods.append("decode")
         for meth in methods:
@@ -236,11 +236,10 @@ class TestStringMixIn(unittest.TestCase):
         str12 = _FakeString("123")
         str13 = _FakeString("\u2155")
         str14 = _FakeString("\u00B2")
-        if py3k:
-            self.assertFalse(str9.isdecimal())
-            self.assertTrue(str12.isdecimal())
-            self.assertFalse(str13.isdecimal())
-            self.assertFalse(str14.isdecimal())
+        self.assertFalse(str9.isdecimal())
+        self.assertTrue(str12.isdecimal())
+        self.assertFalse(str13.isdecimal())
+        self.assertFalse(str14.isdecimal())
 
         self.assertFalse(str9.isdigit())
         self.assertTrue(str12.isdigit())
@@ -283,11 +282,20 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals("fake string     ", str1.ljust(16))
         self.assertEquals("fake stringqqqq", str1.ljust(15, "q"))
 
-        # lstrip
+        self.assertEquals("", str15.lower())
+        self.assertEquals("foobar", str16.lower())
+
+        str22 = _FakeString("  fake string  ")
+        self.assertEquals("fake string", str1.lstrip())
+        self.assertEquals("fake string  ", str22.lstrip())
+        self.assertEquals("ke string", str1.lstrip("abcdef"))
 
-        # partition
+        self.assertEquals(("fa", "ke", " string"), str1.partition("ke"))
+        self.assertEquals(("fake string", "", ""), str1.partition("asdf"))
 
-        # replace
+        str23 = _FakeString("boo foo moo")
+        self.assertEquals("real string", str1.replace("fake", "real"))
+        self.assertEquals("bu fu moo", str23.replace("oo", "u", 2))
 
         self.assertEquals(3, str1.rfind("e"))
         self.assertEquals(-1, str1.rfind("z"))
@@ -307,29 +315,70 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals("     fake string", str1.rjust(16))
         self.assertEquals("qqqqfake string", str1.rjust(15, "q"))
 
-        # rpartition
-
-        # rsplit
-
-        # rstrip
-
-        # split
-
-        # splitlines
+        self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke"))
+        self.assertEquals(("", "", "fake string"), str1.rpartition("asdf"))
+
+        str24 = _FakeString("   this is a   sentence with  whitespace ")
+        actual = ["this", "is", "a", "sentence", "with", "whitespace"]
+        self.assertEquals(actual, str24.rsplit())
+        self.assertEquals(actual, str24.rsplit(None))
+        actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
+                  "", "whitespace", ""]
+        self.assertEquals(actual, str24.rsplit(" "))
+        actual = ["   this is a", "sentence", "with", "whitespace"]
+        self.assertEquals(actual, str24.rsplit(None, 3))
+        actual = ["   this is a   sentence with", "", "whitespace", ""]
+        self.assertEquals(actual, str24.rsplit(" ", 3))
+
+        self.assertEquals("fake string", str1.rstrip())
+        self.assertEquals("  fake string", str22.rstrip())
+        self.assertEquals("fake stri", str1.rstrip("ngr"))
+
+        actual = ["this", "is", "a", "sentence", "with", "whitespace"]
+        self.assertEquals(actual, str24.split())
+        self.assertEquals(actual, str24.split(None))
+        actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
+                  "", "whitespace", ""]
+        self.assertEquals(actual, str24.split(" "))
+        actual = ["this", "is", "a", "sentence with  whitespace "]
+        self.assertEquals(actual, str24.split(None, 3))
+        actual = ["", "", "", "this is a   sentence with  whitespace "]
+        self.assertEquals(actual, str24.split(" ", 3))
+
+        str25 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
+        self.assertEquals(["lines", "of", "text", "are", "presented", "here"],
+                          str25.splitlines())
+        self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n",
+                           "presented\n", "here"], str25.splitlines(True))
+
+        self.assertTrue(str1.startswith("fake"))
+        self.assertFalse(str1.startswith("faker"))
+
+        self.assertEquals("fake string", str1.strip())
+        self.assertEquals("fake string", str22.strip())
+        self.assertEquals("ke stri", str1.strip("abcdefngr"))
+
+        self.assertEquals("fOObAR", str16.swapcase())
+
+        self.assertEquals("Fake String", str1.title())
 
-        # startswith
-
-        # strip
-
-        # swapcase
-
-        # title
-
-        # translate
+        if py3k:
+            table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4",
+                                    117: "5"})
+            table2 = str.maketrans("aeiou", "12345")
+            table3 = str.maketrans("aeiou", "12345", "rts")
+            self.assertEquals("f1k2 str3ng", str1.translate(table1))
+            self.assertEquals("f1k2 str3ng", str1.translate(table2))
+            self.assertEquals("f1k2 3ng", str1.translate(table3))
+        else:
+            table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}
+            self.assertEquals("f1k2 str3ng", str1.translate(table))
 
-        # upper
+        self.assertEquals("", str15.upper())
+        self.assertEquals("FOOBAR", str16.upper())
 
-        # zfill
+        self.assertEquals("123", str12.zfill(3))
+        self.assertEquals("000123", str12.zfill(6))
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From cf14b5ef4e02dadcba08083e62ceb800ec9edb6d Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 16 Mar 2013 19:55:25 -0400
Subject: [PATCH 086/180] Add some missing methods to StringMixIn.

---
 mwparserfromhell/string_mixin.py | 30 ++++++++++++++++++++++++++++++
 tests/test_string_mixin.py       |  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 2539f74..9e6d551 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -122,6 +122,11 @@ class StringMixIn(object):
     def capitalize(self):
         return self.__unicode__().capitalize()
 
+    if py3k:
+        @inheritdoc
+        def casefold(self):
+            return self.__unicode__().casefold()
+
     @inheritdoc
     def center(self, width, fillchar=None):
         if fillchar is None:
@@ -167,6 +172,11 @@ class StringMixIn(object):
     def format(self, *args, **kwargs):
         return self.__unicode__().format(*args, **kwargs)
 
+    if py3k:
+        @inheritdoc
+        def format_map(self, mapping):
+            return self.__unicode__().format_map(mapping)
+
     @inheritdoc
     def index(self, sub, start=None, end=None):
         return self.__unicode__().index(sub, start, end)
@@ -187,6 +197,11 @@ class StringMixIn(object):
     def isdigit(self):
         return self.__unicode__().isdigit()
 
+    if py3k:
+        @inheritdoc
+        def isidentifier(self):
+            return self.__unicode__().isidentifier()
+
     @inheritdoc
     def islower(self):
         return self.__unicode__().islower()
@@ -195,6 +210,11 @@ class StringMixIn(object):
     def isnumeric(self):
         return self.__unicode__().isnumeric()
 
+    if py3k:
+        @inheritdoc
+        def isprintable(self):
+            return self.__unicode__().isprintable()
+
     @inheritdoc
     def isspace(self):
         return self.__unicode__().isspace()
@@ -225,6 +245,16 @@ class StringMixIn(object):
     def lstrip(self, chars=None):
         return self.__unicode__().lstrip(chars)
 
+    if py3k:
+        @inheritdoc
+        @staticmethod
+        def maketrans(self, x, y=None, z=None):
+            if z is None:
+                if y is None:
+                    return self.__unicode__.maketrans(x)
+                return self.__unicode__.maketrans(x, y)
+            return self.__unicode__.maketrans(x, y, z)
+
     @inheritdoc
     def partition(self, sep):
         return self.__unicode__().partition(sep)
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 455f2e6..cff3848 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -48,7 +48,8 @@ class TestStringMixIn(unittest.TestCase):
             "rsplit", "rstrip", "split", "splitlines", "startswith", "strip",
             "swapcase", "title", "translate", "upper", "zfill"]
         if py3k:
-            methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"])
+            methods.extend(["casefold", "format_map", "isidentifier",
+                            "isprintable", "maketrans"])
         else:
             methods.append("decode")
         for meth in methods:

From 0af5894647c96d3d7cd2273aa2ddf30c864cdbca Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 18 Mar 2013 02:44:25 -0400
Subject: [PATCH 087/180] Finish tests for py3k-only methods in
 TestStringMixIn.

---
 tests/test_string_mixin.py | 65 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 20 deletions(-)

diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index cff3848..bf49629 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -216,6 +216,11 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz"))
         self.assertRaises(IndexError, str8.format, "abc")
 
+        if py3k:
+            self.assertEquals("fake string", str1.format_map({}))
+            self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"}))
+            self.assertRaises(ValueError, str5.format_map, {0: "abc"})
+
         self.assertEquals(3, str1.index("e"))
         self.assertRaises(ValueError, str1.index, "z")
         self.assertEquals(7, str1.index("r", 7))
@@ -247,6 +252,12 @@ class TestStringMixIn(unittest.TestCase):
         self.assertFalse(str13.isdigit())
         self.assertTrue(str14.isdigit())
 
+        if py3k:
+            self.assertTrue(str9.isidentifier())
+            self.assertTrue(str10.isidentifier())
+            self.assertFalse(str11.isidentifier())
+            self.assertFalse(str12.isidentifier())
+
         str15 = _FakeString("")
         str16 = _FakeString("FooBar")
         self.assertTrue(str9.islower())
@@ -258,6 +269,14 @@ class TestStringMixIn(unittest.TestCase):
         self.assertTrue(str13.isnumeric())
         self.assertTrue(str14.isnumeric())
 
+        if py3k:
+            str16B = _FakeString("\x01\x02")
+            self.assertTrue(str9.isprintable())
+            self.assertTrue(str13.isprintable())
+            self.assertTrue(str14.isprintable())
+            self.assertTrue(str15.isprintable())
+            self.assertFalse(str16B.isprintable())
+
         str17 = _FakeString(" ")
         str18 = _FakeString("\t     \t \r\n")
         self.assertFalse(str1.isspace())
@@ -283,20 +302,26 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals("fake string     ", str1.ljust(16))
         self.assertEquals("fake stringqqqq", str1.ljust(15, "q"))
 
+        str22 = _FakeString("ß")
         self.assertEquals("", str15.lower())
         self.assertEquals("foobar", str16.lower())
+        self.assertEquals("ß", str22.lower())
+        if py3k:
+            self.assertEquals("", str15.casefold())
+            self.assertEquals("foobar", str16.casefold())
+            self.assertEquals("ss", str22.casefold())
 
-        str22 = _FakeString("  fake string  ")
+        str23 = _FakeString("  fake string  ")
         self.assertEquals("fake string", str1.lstrip())
-        self.assertEquals("fake string  ", str22.lstrip())
+        self.assertEquals("fake string  ", str23.lstrip())
         self.assertEquals("ke string", str1.lstrip("abcdef"))
 
         self.assertEquals(("fa", "ke", " string"), str1.partition("ke"))
         self.assertEquals(("fake string", "", ""), str1.partition("asdf"))
 
-        str23 = _FakeString("boo foo moo")
+        str24 = _FakeString("boo foo moo")
         self.assertEquals("real string", str1.replace("fake", "real"))
-        self.assertEquals("bu fu moo", str23.replace("oo", "u", 2))
+        self.assertEquals("bu fu moo", str24.replace("oo", "u", 2))
 
         self.assertEquals(3, str1.rfind("e"))
         self.assertEquals(-1, str1.rfind("z"))
@@ -319,44 +344,44 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke"))
         self.assertEquals(("", "", "fake string"), str1.rpartition("asdf"))
 
-        str24 = _FakeString("   this is a   sentence with  whitespace ")
+        str25 = _FakeString("   this is a   sentence with  whitespace ")
         actual = ["this", "is", "a", "sentence", "with", "whitespace"]
-        self.assertEquals(actual, str24.rsplit())
-        self.assertEquals(actual, str24.rsplit(None))
+        self.assertEquals(actual, str25.rsplit())
+        self.assertEquals(actual, str25.rsplit(None))
         actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
                   "", "whitespace", ""]
-        self.assertEquals(actual, str24.rsplit(" "))
+        self.assertEquals(actual, str25.rsplit(" "))
         actual = ["   this is a", "sentence", "with", "whitespace"]
-        self.assertEquals(actual, str24.rsplit(None, 3))
+        self.assertEquals(actual, str25.rsplit(None, 3))
         actual = ["   this is a   sentence with", "", "whitespace", ""]
-        self.assertEquals(actual, str24.rsplit(" ", 3))
+        self.assertEquals(actual, str25.rsplit(" ", 3))
 
         self.assertEquals("fake string", str1.rstrip())
-        self.assertEquals("  fake string", str22.rstrip())
+        self.assertEquals("  fake string", str23.rstrip())
         self.assertEquals("fake stri", str1.rstrip("ngr"))
 
         actual = ["this", "is", "a", "sentence", "with", "whitespace"]
-        self.assertEquals(actual, str24.split())
-        self.assertEquals(actual, str24.split(None))
+        self.assertEquals(actual, str25.split())
+        self.assertEquals(actual, str25.split(None))
         actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
                   "", "whitespace", ""]
-        self.assertEquals(actual, str24.split(" "))
+        self.assertEquals(actual, str25.split(" "))
         actual = ["this", "is", "a", "sentence with  whitespace "]
-        self.assertEquals(actual, str24.split(None, 3))
+        self.assertEquals(actual, str25.split(None, 3))
         actual = ["", "", "", "this is a   sentence with  whitespace "]
-        self.assertEquals(actual, str24.split(" ", 3))
+        self.assertEquals(actual, str25.split(" ", 3))
 
-        str25 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
+        str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
         self.assertEquals(["lines", "of", "text", "are", "presented", "here"],
-                          str25.splitlines())
+                          str26.splitlines())
         self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n",
-                           "presented\n", "here"], str25.splitlines(True))
+                           "presented\n", "here"], str26.splitlines(True))
 
         self.assertTrue(str1.startswith("fake"))
         self.assertFalse(str1.startswith("faker"))
 
         self.assertEquals("fake string", str1.strip())
-        self.assertEquals("fake string", str22.strip())
+        self.assertEquals("fake string", str23.strip())
         self.assertEquals("ke stri", str1.strip("abcdefngr"))
 
         self.assertEquals("fOObAR", str16.swapcase())

From 88201ecb5425689fae53343899b3ee1cc89d77c4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 18 Mar 2013 03:21:36 -0400
Subject: [PATCH 088/180] Adding TestTokens.

Add from __future__ import unicode_literals to a few files.
---
 tests/test_builder.py     |  1 +
 tests/test_ctokenizer.py  |  1 +
 tests/test_parser.py      |  1 +
 tests/test_pytokenizer.py |  1 +
 tests/test_tokens.py      | 78 ++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index e38e683..a3518fd 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -20,6 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 import unittest
 
 class TestBuilder(unittest.TestCase):
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 86f4787..07b5290 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -20,6 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 import unittest
 
 from _test_tokenizer import TokenizerTestCase
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 3f9b2e6..5ea2b49 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -20,6 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 import unittest
 
 class TestParser(unittest.TestCase):
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 4254748..a2f2482 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -20,6 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 import unittest
 
 from _test_tokenizer import TokenizerTestCase
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 0e7f87b..5a18b8e 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -20,10 +20,86 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 import unittest
 
+from mwparserfromhell.compat import py3k
+from mwparserfromhell.parser import tokens
+
 class TestTokens(unittest.TestCase):
-    pass
+    """Test cases for the Token class and its subclasses."""
+
+    def test_issubclass(self):
+        """check that all classes within the tokens module are really Tokens"""
+        for name in tokens.__all__:
+            klass = getattr(tokens, name)
+            self.assertTrue(issubclass(klass, tokens.Token))
+            self.assertIsInstance(klass(), klass)
+            self.assertIsInstance(klass(), tokens.Token)
+
+    def test_attributes(self):
+        """check that Token attributes can be managed properly"""
+        token1 = tokens.Token()
+        token2 = tokens.Token(foo="bar", baz=123)
+
+        self.assertEquals("bar", token2.foo)
+        self.assertEquals(123, token2.baz)
+        self.assertRaises(KeyError, lambda: token1.foo)
+        self.assertRaises(KeyError, lambda: token2.bar)
+
+        token1.spam = "eggs"
+        token2.foo = "ham"
+        del token2.baz
+
+        self.assertEquals("eggs", token1.spam)
+        self.assertEquals("ham", token2.foo)
+        self.assertRaises(KeyError, lambda: token2.baz)
+        self.assertRaises(KeyError, delattr, token2, "baz")
+
+    def test_repr(self):
+        """check that repr() on a Token works as expected"""
+        token1 = tokens.Token()
+        token2 = tokens.Token(foo="bar", baz=123)
+        token3 = tokens.Text(text="earwig" * 100)
+        hundredchars = ("earwig" * 100)[:97] + "..."
+
+        self.assertEquals("Token()", repr(token1))
+        if py3k:
+            token2repr = "Token(foo='bar', baz=123)"
+            token3repr = "Text(text='" + hundredchars + "')"
+        else:
+            token2repr = "Token(foo=u'bar', baz=123)"
+            token3repr = "Text(text=u'" + hundredchars + "')"
+        self.assertEquals(token2repr, repr(token2))
+        self.assertEquals(token3repr, repr(token3))
+
+    def test_equality(self):
+        """check that equivalent tokens are considered equal"""
+        token1 = tokens.Token()
+        token2 = tokens.Token()
+        token3 = tokens.Token(foo="bar", baz=123)
+        token4 = tokens.Text(text="asdf")
+        token5 = tokens.Text(text="asdf")
+        token6 = tokens.TemplateOpen(text="asdf")
+
+        self.assertEquals(token1, token2)
+        self.assertEquals(token2, token1)
+        self.assertEquals(token4, token5)
+        self.assertEquals(token5, token4)
+        self.assertNotEquals(token1, token3)
+        self.assertNotEquals(token2, token3)
+        self.assertNotEquals(token4, token6)
+        self.assertNotEquals(token5, token6)
+
+    def test_repr_equality(self):
+        "check that eval(repr(token)) == token"
+        tests = [
+            tokens.Token(),
+            tokens.Token(foo="bar", baz=123),
+            tokens.Text(text="earwig")
+        ]
+        for token in tests:
+            self.assertEquals(token, eval(repr(token), vars(tokens)))
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From b9d2a83b8a7d187be92772af7510a15fdbd414cd Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Tue, 19 Mar 2013 10:50:41 -0400
Subject: [PATCH 089/180] Starting TestSmartList.

---
 mwparserfromhell/smart_list.py |  3 +++
 tests/test_smart_list.py       | 42 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_string_mixin.py     |  2 +-
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_smart_list.py

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 625307f..67d96be 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -361,3 +361,6 @@ class _ListProxy(list):
         else:
             item.sort()
         self._parent[self._start:self._stop:self._step] = item
+
+
+del inheritdoc
\ No newline at end of file
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
new file mode 100644
index 0000000..e22ad27
--- /dev/null
+++ b/tests/test_smart_list.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.smart_list import SmartList, _ListProxy
+
+class TestSmartList(unittest.TestCase):
+    """Test cases for the SmartList class and its child, _ListProxy."""
+    def test_docs(self):
+        """make sure the methods of SmartList/_ListProxy have docstrings"""
+        methods = ["append", "count", "extend", "index", "insert", "pop",
+                   "remove", "reverse", "sort"]
+        for meth in methods:
+            expected = getattr(list, meth).__doc__
+            smartlist_doc = getattr(SmartList, meth).__doc__
+            listproxy_doc = getattr(_ListProxy, meth).__doc__
+            self.assertEquals(expected, smartlist_doc)
+            self.assertEquals(expected, listproxy_doc)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index bf49629..28b30dd 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -38,7 +38,7 @@ class _FakeString(StringMixIn):
 class TestStringMixIn(unittest.TestCase):
     """Test cases for the StringMixIn class."""
     def test_docs(self):
-        """make sure the various functions of StringMixIn have docstrings"""
+        """make sure the various methods of StringMixIn have docstrings"""
         methods = [
             "capitalize", "center", "count", "encode", "endswith",
             "expandtabs", "find", "format", "index", "isalnum", "isalpha",

From fe3328aa386c9212d19cebeb3a0c5e626c53b7fc Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 22 Mar 2013 08:38:29 -0400
Subject: [PATCH 090/180] test_doctest()

---
 mwparserfromhell/smart_list.py |  2 +-
 tests/test_smart_list.py       | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 67d96be..7c29c60 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -363,4 +363,4 @@ class _ListProxy(list):
         self._parent[self._start:self._stop:self._step] = item
 
 
-del inheritdoc
\ No newline at end of file
+del inheritdoc
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index e22ad27..b83f4d3 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -38,5 +38,15 @@ class TestSmartList(unittest.TestCase):
             self.assertEquals(expected, smartlist_doc)
             self.assertEquals(expected, listproxy_doc)
 
+    def test_doctest(self):
+        """make sure a test embedded in SmartList's docstring passes"""
+        parent = SmartList([0, 1, 2, 3])
+        self.assertEquals([0, 1, 2, 3], parent)
+        child = parent[2:]
+        self.assertEquals([2, 3], child)
+        child.append(4)
+        self.assertEquals([2, 3, 4], child)
+        self.assertEquals([0, 1, 2, 3, 4], parent)
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From a3a35b1e73e7f4cfa84c449d7dfcc191105154f0 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 23 Mar 2013 11:29:20 -0400
Subject: [PATCH 091/180] Only compile Tokenizer on Python 2 for now.

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 445473e..8b4ae86 100644
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,7 @@
 from setuptools import setup, find_packages, Extension
 
 from mwparserfromhell import __version__
+from mwparserfromhell.compat import py3k
 
 with open("README.rst") as fp:
     long_docs = fp.read()
@@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer",
 setup(
     name = "mwparserfromhell",
     packages = find_packages(exclude=("tests",)),
-    ext_modules = [tokenizer],
+    ext_modules = [] if py3k else [tokenizer],
     test_suite = "tests",
     version = __version__,
     author = "Ben Kurtovic",

From ff51d7f5e59577fb99d03d7848b7091be1b82d80 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 23 Mar 2013 15:12:36 -0400
Subject: [PATCH 092/180] Some tests for SmartLists; __reversed__ in
 StringMixIn

---
 mwparserfromhell/string_mixin.py |   3 +
 tests/test_smart_list.py         | 164 ++++++++++++++++++++++++++++++++++++++-
 tests/test_string_mixin.py       |   3 +
 3 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 9e6d551..7d269f5 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -113,6 +113,9 @@ class StringMixIn(object):
     def __getitem__(self, key):
         return self.__unicode__()[key]
 
+    def __reversed__(self):
+        return reversed(self.__unicode__())
+
     def __contains__(self, item):
         if isinstance(item, StringMixIn):
             return str(item) in self.__unicode__()
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index b83f4d3..5fc26b3 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -23,6 +23,7 @@
 from __future__ import unicode_literals
 import unittest
 
+from mwparserfromhell.compat import py3k
 from mwparserfromhell.smart_list import SmartList, _ListProxy
 
 class TestSmartList(unittest.TestCase):
@@ -39,7 +40,7 @@ class TestSmartList(unittest.TestCase):
             self.assertEquals(expected, listproxy_doc)
 
     def test_doctest(self):
-        """make sure a test embedded in SmartList's docstring passes"""
+        """make sure the test embedded in SmartList's docstring passes"""
         parent = SmartList([0, 1, 2, 3])
         self.assertEquals([0, 1, 2, 3], parent)
         child = parent[2:]
@@ -48,5 +49,166 @@ class TestSmartList(unittest.TestCase):
         self.assertEquals([2, 3, 4], child)
         self.assertEquals([0, 1, 2, 3, 4], parent)
 
+    def test_parent_magics(self):
+        """make sure magically implemented SmartList features work"""
+        # __getitem__
+        # __setitem__
+        # __delitem__
+        # if not py3k:
+        #     __getslice__
+        #     __setslice__
+        #     __delslice__
+        # __add__
+        # __radd__
+        # __iadd__
+
+    def test_parent_unaffected_magics(self):
+        """sanity checks against SmartList features that were not modified"""
+        list1 = SmartList([0, 1, 2, 3, "one", "two"])
+        list2 = SmartList([])
+        list3 = SmartList([0, 2, 3, 4])
+        list4 = SmartList([0, 1, 2])
+
+        if py3k:
+            self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1))
+            self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1))
+            self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1))
+        else:
+            self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1))
+            self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
+            self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))
+
+        self.assertTrue(list1 < list3)
+        self.assertTrue(list1 <= list3)
+        self.assertFalse(list1 == list3)
+        self.assertTrue(list1 != list3)
+        self.assertFalse(list1 > list3)
+        self.assertFalse(list1 >= list3)
+
+        other1 = [0, 2, 3, 4]
+        self.assertTrue(list1 < other1)
+        self.assertTrue(list1 <= other1)
+        self.assertFalse(list1 == other1)
+        self.assertTrue(list1 != other1)
+        self.assertFalse(list1 > other1)
+        self.assertFalse(list1 >= other1)
+
+        other2 = [0, 0, 1, 2]
+        self.assertFalse(list1 < other2)
+        self.assertFalse(list1 <= other2)
+        self.assertFalse(list1 == other2)
+        self.assertTrue(list1 != other2)
+        self.assertTrue(list1 > other2)
+        self.assertTrue(list1 >= other2)
+
+        other3 = [0, 1, 2, 3, "one", "two"]
+        self.assertFalse(list1 < other3)
+        self.assertTrue(list1 <= other3)
+        self.assertTrue(list1 == other3)
+        self.assertFalse(list1 != other3)
+        self.assertFalse(list1 > other3)
+        self.assertTrue(list1 >= other3)
+
+        self.assertTrue(bool(list1))
+        self.assertFalse(bool(list2))
+
+        self.assertEquals(6, len(list1))
+        self.assertEquals(0, len(list2))
+
+        out = []
+        for obj in list1:
+            out.append(obj)
+        self.assertEquals([0, 1, 2, 3, "one", "two"], out)
+
+        out = []
+        for ch in list2:
+            out.append(ch)
+        self.assertEquals([], out)
+
+        gen1 = iter(list1)
+        out = []
+        for i in range(len(list1)):
+            out.append(gen1.next())
+        self.assertRaises(StopIteration, gen1.next)
+        self.assertEquals([0, 1, 2, 3, "one", "two"], out)
+        gen2 = iter(list2)
+        self.assertRaises(StopIteration, gen2.next)
+
+        self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
+        self.assertEquals([], list(reversed(list2)))
+
+        self.assertTrue("one" in list1)
+        self.assertTrue(3 in list1)
+        self.assertFalse(10 in list1)
+        self.assertFalse(0 in list2)
+
+        self.assertEquals([], list2 * 5)
+        self.assertEquals([], 5 * list2)
+        self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3)
+        self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4)
+        list4 *= 2
+        self.assertEquals([0, 1, 2, 0, 1, 2], list4)
+
+    def test_parent_methods(self):
+        # append
+        # count
+        # extend
+        # index
+        # insert
+        # pop
+        # remove
+        # reverse
+        # sort
+
+    def test_child_magics(self):
+        # if py3k:
+        #     __str__
+        #     __bytes__
+        # else:
+        #     __unicode__
+        #     __str__
+        # __repr__
+        # __lt__
+        # __le__
+        # __eq__
+        # __ne__
+        # __gt__
+        # __ge__
+        # if py3k:
+        #     __bool__
+        # else:
+        #     __nonzero__
+        # __len__
+        # __getitem__
+        # __setitem__
+        # __delitem__
+        # __iter__
+        # __reversed__
+        # __contains__
+        # if not py3k:
+        #     __getslice__
+        #     __setslice__
+        #     __delslice__
+        # __add__
+        # __radd__
+        # __iadd__
+        # __mul__
+        # __rmul__
+        # __imul__
+
+    def test_child_methods(self):
+        # append
+        # count
+        # extend
+        # index
+        # insert
+        # pop
+        # remove
+        # reverse
+        # sort
+
+    def test_influence(self):
+        pass
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 28b30dd..0d95311 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -143,6 +143,9 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals(expected, out)
         self.assertRaises(StopIteration, gen2.next)
 
+        self.assertEquals("gnirts ekaf", "".join(list(reversed(str1))))
+        self.assertEquals([], list(reversed(str2)))
+
         self.assertEquals("f", str1[0])
         self.assertEquals(" ", str1[4])
         self.assertEquals("g", str1[10])

From 65c3950e89bd3d5f60590707fc0aa4269c2b9612 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 12:01:43 -0400
Subject: [PATCH 093/180] Fix bug when an extended slice's stop argument is
 missing.

---
 mwparserfromhell/smart_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 7c29c60..47f7232 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -76,7 +76,7 @@ class SmartList(list):
     def __getitem__(self, key):
         if not isinstance(key, slice):
             return super(SmartList, self).__getitem__(key)
-        sliceinfo = [key.start or 0, key.stop or 0, key.step or 1]
+        sliceinfo = [key.start or 0, key.stop or maxsize, key.step or 1]
         child = _ListProxy(self, sliceinfo)
         self._children[id(child)] = (child, sliceinfo)
         return child

From 10a7e5d2418e9d7afc652b03ef0686434bde8683 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 12:17:52 -0400
Subject: [PATCH 094/180] Fix the same bug in __setitem__ and __delitem__

---
 mwparserfromhell/smart_list.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 47f7232..e42dbae 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -86,6 +86,7 @@ class SmartList(list):
             return super(SmartList, self).__setitem__(key, item)
         item = list(item)
         super(SmartList, self).__setitem__(key, item)
+        key = slice(key.start or 0, key.stop or maxsize)
         diff = len(item) - key.stop + key.start
         values = self._children.values if py3k else self._children.itervalues
         if diff:
@@ -97,7 +98,9 @@ class SmartList(list):
 
     def __delitem__(self, key):
         super(SmartList, self).__delitem__(key)
-        if not isinstance(key, slice):
+        if isinstance(key, slice):
+            key = slice(key.start or 0, key.stop or maxsize)
+        else:
             key = slice(key, key + 1)
         diff = key.stop - key.start
         values = self._children.values if py3k else self._children.itervalues

From ce6929107edf88065a7dd96082c41ab59732984b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 12:50:10 -0400
Subject: [PATCH 095/180] Implementing test_parent_get_set_del().

---
 tests/test_smart_list.py | 87 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 78 insertions(+), 9 deletions(-)

diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 5fc26b3..71f428b 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -49,15 +49,79 @@ class TestSmartList(unittest.TestCase):
         self.assertEquals([2, 3, 4], child)
         self.assertEquals([0, 1, 2, 3, 4], parent)
 
-    def test_parent_magics(self):
-        """make sure magically implemented SmartList features work"""
-        # __getitem__
-        # __setitem__
-        # __delitem__
-        # if not py3k:
-        #     __getslice__
-        #     __setslice__
-        #     __delslice__
+    def test_parent_get_set_del(self):
+        """make sure SmartList's getitem/setitem/delitem work"""
+        def assign(L, s1, s2, s3, val):
+            L[s1:s2:s3] = val
+        def delete(L, s1):
+            del L[s1]
+
+        list1 = SmartList([0, 1, 2, 3, "one", "two"])
+        list2 = SmartList(list(range(10)))
+
+        self.assertEquals(1, list1[1])
+        self.assertEquals("one", list1[-2])
+        self.assertEquals([2, 3], list1[2:4])
+        self.assertRaises(IndexError, lambda: list1[6])
+        self.assertRaises(IndexError, lambda: list1[-7])
+
+        self.assertEquals([0, 1, 2], list1[:3])
+        self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:])
+        self.assertEquals([3, "one", "two"], list1[3:])
+        self.assertEquals(["one", "two"], list1[-2:])
+        self.assertEquals([0, 1], list1[:-4])
+        self.assertEquals([], list1[6:])
+        self.assertEquals([], list1[4:2])
+
+        self.assertEquals([0, 2, "one"], list1[0:5:2])
+        self.assertEquals([0, 2], list1[0:-3:2])
+        self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::])
+        self.assertEquals([2, 3, "one", "two"], list1[2::])
+        self.assertEquals([0, 1, 2, 3], list1[:4:])
+        self.assertEquals([2, 3], list1[2:4:])
+        self.assertEquals([0, 2, 4, 6, 8], list2[::2])
+        self.assertEquals([2, 5, 8], list2[2::3])
+        self.assertEquals([0, 3], list2[:6:3])
+        self.assertEquals([2, 5, 8], list2[-8:9:3])
+        self.assertEquals([], list2[100000:1000:-100])
+
+        list1[3] = 100
+        self.assertEquals(100, list1[3])
+        list1[5:] = [6, 7, 8]
+        self.assertEquals([6, 7, 8], list1[5:])
+        self.assertEquals([0, 1, 2, 100, "one", 6, 7, 8], list1)
+        list1[2:4] = [-1, -2, -3, -4, -5]
+        self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1)
+        list1[0:-3] = [99]
+        self.assertEquals([99, 6, 7, 8], list1)
+        list2[0:6:2] = [100, 102, 104]
+        self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2)
+        list2[::3] = [200, 203, 206, 209]
+        self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2)
+        list2[::] = range(7)
+        self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2)
+        self.assertRaises(ValueError,
+                          lambda: assign(list2, 0, 5, 2, [100, 102, 104, 106]))
+
+        del list2[2]
+        self.assertEquals([0, 1, 3, 4, 5, 6], list2)
+        del list2[-3]
+        self.assertEquals([0, 1, 3, 5, 6], list2)
+        self.assertRaises(IndexError, lambda: delete(list2, 100))
+        self.assertRaises(IndexError, lambda: delete(list2, -6))
+        list2[:] = range(10)
+        del list2[3:6]
+        self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2)
+        del list2[-2:]
+        self.assertEquals([0, 1, 2, 6, 7], list2)
+        del list2[:2]
+        self.assertEquals([2, 6, 7], list2)
+        list2[:] = range(10)
+        del list2[2:8:2]
+        self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2)
+
+    def test_parent_add(self):
+        """make sure SmartList's add/radd/iadd work"""
         # __add__
         # __radd__
         # __iadd__
@@ -150,6 +214,7 @@ class TestSmartList(unittest.TestCase):
         self.assertEquals([0, 1, 2, 0, 1, 2], list4)
 
     def test_parent_methods(self):
+        pass
         # append
         # count
         # extend
@@ -161,6 +226,7 @@ class TestSmartList(unittest.TestCase):
         # sort
 
     def test_child_magics(self):
+        pass
         # if py3k:
         #     __str__
         #     __bytes__
@@ -197,6 +263,7 @@ class TestSmartList(unittest.TestCase):
         # __imul__
 
     def test_child_methods(self):
+        pass
         # append
         # count
         # extend
@@ -209,6 +276,8 @@ class TestSmartList(unittest.TestCase):
 
     def test_influence(self):
         pass
+        # test whether changes are propogated correctly
+        # also test whether children that exit scope are removed from parent's map
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 67611bfb5bdbc2c445b264d48fea710d99ad56f7 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 13:43:15 -0400
Subject: [PATCH 096/180] Implement test_parent_add().

---
 tests/test_smart_list.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 71f428b..1b1c267 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -23,7 +23,7 @@
 from __future__ import unicode_literals
 import unittest
 
-from mwparserfromhell.compat import py3k
+from mwparserfromhell.compat import py3k, range
 from mwparserfromhell.smart_list import SmartList, _ListProxy
 
 class TestSmartList(unittest.TestCase):
@@ -122,9 +122,15 @@ class TestSmartList(unittest.TestCase):
 
     def test_parent_add(self):
         """make sure SmartList's add/radd/iadd work"""
-        # __add__
-        # __radd__
-        # __iadd__
+        list1 = SmartList(range(5))
+        list2 = SmartList(range(5, 10))
+        self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6])
+        self.assertEquals([0, 1, 2, 3, 4], list1)
+        self.assertEquals(list(range(10)), list1 + list2)
+        self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1)
+        self.assertEquals([0, 1, 2, 3, 4], list1)
+        list1 += ["foo", "bar", "baz"]
+        self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1)
 
     def test_parent_unaffected_magics(self):
         """sanity checks against SmartList features that were not modified"""

From fb92349909f302833ebcfe905578d1d6e75fd891 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 14:09:47 -0400
Subject: [PATCH 097/180] Fix parsing of arguments in SmartList.sort()

---
 mwparserfromhell/smart_list.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index e42dbae..b8d02d5 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -168,16 +168,14 @@ class SmartList(list):
         copy = list(self)
         for child in self._children:
             child._parent = copy
+        kwargs = {}
         if cmp is not None:
-            if key is not None:
-                if reverse is not None:
-                    super(SmartList, self).sort(cmp, key, reverse)
-                else:
-                    super(SmartList, self).sort(cmp, key)
-            else:
-                super(SmartList, self).sort(cmp)
-        else:
-            super(SmartList, self).sort()
+            kwargs["cmp"] = cmp
+        if key is not None:
+            kwargs["key"] = key
+        if reverse is not None:
+            kwargs["reverse"] = reverse
+        super(SmartList, self).sort(**kwargs)
 
 
 class _ListProxy(list):

From 986e3ed855971593d2ea6f68962fed4d1ca8d2ca Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 14:41:31 -0400
Subject: [PATCH 098/180] Implement test_parent_methods().

---
 tests/test_smart_list.py | 79 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 14 deletions(-)

diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 1b1c267..2fdfeff 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -100,15 +100,15 @@ class TestSmartList(unittest.TestCase):
         self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2)
         list2[::] = range(7)
         self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2)
-        self.assertRaises(ValueError,
-                          lambda: assign(list2, 0, 5, 2, [100, 102, 104, 106]))
+        self.assertRaises(ValueError, assign, list2, 0, 5, 2,
+                          [100, 102, 104, 106])
 
         del list2[2]
         self.assertEquals([0, 1, 3, 4, 5, 6], list2)
         del list2[-3]
         self.assertEquals([0, 1, 3, 5, 6], list2)
-        self.assertRaises(IndexError, lambda: delete(list2, 100))
-        self.assertRaises(IndexError, lambda: delete(list2, -6))
+        self.assertRaises(IndexError, delete, list2, 100)
+        self.assertRaises(IndexError, delete, list2, -6)
         list2[:] = range(10)
         del list2[3:6]
         self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2)
@@ -220,16 +220,67 @@ class TestSmartList(unittest.TestCase):
         self.assertEquals([0, 1, 2, 0, 1, 2], list4)
 
     def test_parent_methods(self):
-        pass
-        # append
-        # count
-        # extend
-        # index
-        # insert
-        # pop
-        # remove
-        # reverse
-        # sort
+        """make sure SmartList's non-magic methods work, like append()"""
+        list1 = SmartList(range(5))
+        list2 = SmartList(["foo"])
+        list3 = SmartList([("a", 5), ("d", 2), ("b", 8), ("c", 3)])
+
+        list1.append(5)
+        list1.append(1)
+        list1.append(2)
+        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1)
+
+        self.assertEquals(0, list1.count(6))
+        self.assertEquals(2, list1.count(1))
+
+        list1.extend(range(5, 8))
+        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+
+        self.assertEquals(1, list1.index(1))
+        self.assertEquals(6, list1.index(1, 3))
+        self.assertEquals(6, list1.index(1, 3, 7))
+        self.assertRaises(ValueError, list1.index, 1, 3, 5)
+
+        list1.insert(0, -1)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+        list1.insert(-1, 6.5)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
+        list1.insert(100, 8)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
+
+        self.assertEquals(8, list1.pop())
+        self.assertEquals(7, list1.pop())
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
+        self.assertEquals(-1, list1.pop(0))
+        self.assertEquals(5, list1.pop(5))
+        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6, 6.5], list1)
+        self.assertEquals("foo", list2.pop())
+        self.assertRaises(IndexError, list2.pop)
+        self.assertEquals([], list2)
+
+        list1.remove(6.5)
+        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
+        list1.remove(1)
+        self.assertEquals([0, 2, 3, 4, 1, 2, 5, 6], list1)
+        list1.remove(1)
+        self.assertEquals([0, 2, 3, 4, 2, 5, 6], list1)
+        self.assertRaises(ValueError, list1.remove, 1)
+
+        list1.reverse()
+        self.assertEquals([6, 5, 2, 4, 3, 2, 0], list1)
+
+        list1.sort()
+        self.assertEquals([0, 2, 2, 3, 4, 5, 6], list1)
+        list1.sort(reverse=True)
+        self.assertEquals([6, 5, 4, 3, 2, 2, 0], list1)
+        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
+        self.assertEquals([3, 4, 2, 2, 5, 6, 0], list1)
+        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
+        self.assertEquals([6, 0, 5, 4, 2, 2, 3], list1)
+        list3.sort(key=lambda i: i[1])
+        self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
+        list3.sort(key=lambda i: i[1], reverse=True)
+        self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
 
     def test_child_magics(self):
         pass

From d85ff73c19e026fa209c252b4d96699bbeb75121 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 15:46:30 -0400
Subject: [PATCH 099/180] Squashing some sneaky bugs in SmartLists's children.

---
 mwparserfromhell/smart_list.py | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index b8d02d5..229500c 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -76,7 +76,8 @@ class SmartList(list):
     def __getitem__(self, key):
         if not isinstance(key, slice):
             return super(SmartList, self).__getitem__(key)
-        sliceinfo = [key.start or 0, key.stop or maxsize, key.step or 1]
+        sliceinfo = [key.start or 0, maxsize if key.stop is None else key.stop,
+                     key.step or 1]
         child = _ListProxy(self, sliceinfo)
         self._children[id(child)] = (child, sliceinfo)
         return child
@@ -86,12 +87,12 @@ class SmartList(list):
             return super(SmartList, self).__setitem__(key, item)
         item = list(item)
         super(SmartList, self).__setitem__(key, item)
-        key = slice(key.start or 0, key.stop or maxsize)
+        key = slice(key.start or 0, maxsize if key.stop is None else key.stop)
         diff = len(item) - key.stop + key.start
         values = self._children.values if py3k else self._children.itervalues
         if diff:
             for child, (start, stop, step) in values():
-                if start >= key.stop:
+                if start > key.stop:
                     self._children[id(child)][1][0] += diff
                 if stop >= key.stop and stop != maxsize:
                     self._children[id(child)][1][1] += diff
@@ -99,7 +100,8 @@ class SmartList(list):
     def __delitem__(self, key):
         super(SmartList, self).__delitem__(key)
         if isinstance(key, slice):
-            key = slice(key.start or 0, key.stop or maxsize)
+            key = slice(key.start or 0,
+                        maxsize if key.stop is None else key.stop)
         else:
             key = slice(key, key + 1)
         diff = key.stop - key.start
@@ -107,7 +109,7 @@ class SmartList(list):
         for child, (start, stop, step) in values():
             if start > key.start:
                 self._children[id(child)][1][0] -= diff
-            if stop >= key.stop:
+            if stop >= key.stop and stop != maxsize:
                 self._children[id(child)][1][1] -= diff
 
     if not py3k:
@@ -296,6 +298,8 @@ class _ListProxy(list):
     @property
     def _stop(self):
         """The ending index of this list, exclusive."""
+        if self._sliceinfo[1] == maxsize:
+            return len(self._parent)
         return self._sliceinfo[1]
 
     @property
@@ -329,18 +333,25 @@ class _ListProxy(list):
 
     @inheritdoc
     def insert(self, index, item):
+        if index < 0:
+            index = len(self) + index
         self._parent.insert(self._start + index, item)
 
     @inheritdoc
     def pop(self, index=None):
+        length = len(self)
         if index is None:
-            index = len(self) - 1
+            index = length - 1
+        elif index < 0:
+            index = length + index
+        if index < 0 or index >= length:
+            raise IndexError("pop index out of range")
         return self._parent.pop(self._start + index)
 
     @inheritdoc
     def remove(self, item):
         index = self.index(item)
-        del self._parent[index]
+        del self._parent[self._start + index]
 
     @inheritdoc
     def reverse(self):
@@ -351,16 +362,14 @@ class _ListProxy(list):
     @inheritdoc
     def sort(self, cmp=None, key=None, reverse=None):
         item = self._render()
+        kwargs = {}
         if cmp is not None:
-            if key is not None:
-                if reverse is not None:
-                    item.sort(cmp, key, reverse)
-                else:
-                    item.sort(cmp, key)
-            else:
-                item.sort(cmp)
-        else:
-            item.sort()
+            kwargs["cmp"] = cmp
+        if key is not None:
+            kwargs["key"] = key
+        if reverse is not None:
+            kwargs["reverse"] = reverse
+        item.sort(**kwargs)
         self._parent[self._start:self._stop:self._step] = item
 
 

From b8e926a2569c7ec15001d19e767dd475a4f249e9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 24 Mar 2013 15:47:21 -0400
Subject: [PATCH 100/180] Abstract out public list method tests; implement
 test_child_methods()

---
 tests/test_smart_list.py | 145 ++++++++++++++++++++++++-----------------------
 1 file changed, 74 insertions(+), 71 deletions(-)

diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 2fdfeff..44775b4 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -28,6 +28,71 @@ from mwparserfromhell.smart_list import SmartList, _ListProxy
 
 class TestSmartList(unittest.TestCase):
     """Test cases for the SmartList class and its child, _ListProxy."""
+
+    def _test_list_methods(self, builder):
+        """Run tests on the public methods of a list built with *builder*."""
+        list1 = builder(range(5))
+        list2 = builder(["foo"])
+        list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)])
+
+        list1.append(5)
+        list1.append(1)
+        list1.append(2)
+        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1)
+
+        self.assertEquals(0, list1.count(6))
+        self.assertEquals(2, list1.count(1))
+
+        list1.extend(range(5, 8))
+        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+
+        self.assertEquals(1, list1.index(1))
+        self.assertEquals(6, list1.index(1, 3))
+        self.assertEquals(6, list1.index(1, 3, 7))
+        self.assertRaises(ValueError, list1.index, 1, 3, 5)
+
+        list1.insert(0, -1)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+        list1.insert(-1, 6.5)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
+        list1.insert(13, 8)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
+
+        self.assertEquals(8, list1.pop())
+        self.assertEquals(7, list1.pop())
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
+        self.assertEquals(-1, list1.pop(0))
+        self.assertEquals(5, list1.pop(5))
+        self.assertEquals(6.5, list1.pop(-1))
+        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
+        self.assertEquals("foo", list2.pop())
+        self.assertRaises(IndexError, list2.pop)
+        self.assertEquals([], list2)
+
+        list1.remove(6)
+        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1)
+        list1.remove(1)
+        self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1)
+        list1.remove(1)
+        self.assertEquals([0, 2, 3, 4, 2, 5], list1)
+        self.assertRaises(ValueError, list1.remove, 1)
+
+        list1.reverse()
+        self.assertEquals([5, 2, 4, 3, 2, 0], list1)
+
+        list1.sort()
+        self.assertEquals([0, 2, 2, 3, 4, 5], list1)
+        list1.sort(reverse=True)
+        self.assertEquals([5, 4, 3, 2, 2, 0], list1)
+        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
+        self.assertEquals([3, 4, 2, 2, 5, 0], list1)
+        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
+        self.assertEquals([0, 5, 4, 2, 2, 3], list1)
+        list3.sort(key=lambda i: i[1])
+        self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
+        list3.sort(key=lambda i: i[1], reverse=True)
+        self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
+
     def test_docs(self):
         """make sure the methods of SmartList/_ListProxy have docstrings"""
         methods = ["append", "count", "extend", "index", "insert", "pop",
@@ -221,68 +286,10 @@ class TestSmartList(unittest.TestCase):
 
     def test_parent_methods(self):
         """make sure SmartList's non-magic methods work, like append()"""
-        list1 = SmartList(range(5))
-        list2 = SmartList(["foo"])
-        list3 = SmartList([("a", 5), ("d", 2), ("b", 8), ("c", 3)])
-
-        list1.append(5)
-        list1.append(1)
-        list1.append(2)
-        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1)
-
-        self.assertEquals(0, list1.count(6))
-        self.assertEquals(2, list1.count(1))
-
-        list1.extend(range(5, 8))
-        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
-
-        self.assertEquals(1, list1.index(1))
-        self.assertEquals(6, list1.index(1, 3))
-        self.assertEquals(6, list1.index(1, 3, 7))
-        self.assertRaises(ValueError, list1.index, 1, 3, 5)
-
-        list1.insert(0, -1)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
-        list1.insert(-1, 6.5)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
-        list1.insert(100, 8)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
-
-        self.assertEquals(8, list1.pop())
-        self.assertEquals(7, list1.pop())
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
-        self.assertEquals(-1, list1.pop(0))
-        self.assertEquals(5, list1.pop(5))
-        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6, 6.5], list1)
-        self.assertEquals("foo", list2.pop())
-        self.assertRaises(IndexError, list2.pop)
-        self.assertEquals([], list2)
-
-        list1.remove(6.5)
-        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
-        list1.remove(1)
-        self.assertEquals([0, 2, 3, 4, 1, 2, 5, 6], list1)
-        list1.remove(1)
-        self.assertEquals([0, 2, 3, 4, 2, 5, 6], list1)
-        self.assertRaises(ValueError, list1.remove, 1)
-
-        list1.reverse()
-        self.assertEquals([6, 5, 2, 4, 3, 2, 0], list1)
-
-        list1.sort()
-        self.assertEquals([0, 2, 2, 3, 4, 5, 6], list1)
-        list1.sort(reverse=True)
-        self.assertEquals([6, 5, 4, 3, 2, 2, 0], list1)
-        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
-        self.assertEquals([3, 4, 2, 2, 5, 6, 0], list1)
-        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
-        self.assertEquals([6, 0, 5, 4, 2, 2, 3], list1)
-        list3.sort(key=lambda i: i[1])
-        self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
-        list3.sort(key=lambda i: i[1], reverse=True)
-        self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
+        self._test_list_methods(lambda L: SmartList(L))
 
     def test_child_magics(self):
+        """make sure _ListProxy's magically implemented features work"""
         pass
         # if py3k:
         #     __str__
@@ -320,20 +327,16 @@ class TestSmartList(unittest.TestCase):
         # __imul__
 
     def test_child_methods(self):
-        pass
-        # append
-        # count
-        # extend
-        # index
-        # insert
-        # pop
-        # remove
-        # reverse
-        # sort
+        """make sure _ListProxy's non-magic methods work, like append()"""
+        self._test_list_methods(lambda L: SmartList(list(L))[:])
+        self._test_list_methods(lambda L: SmartList([999] + list(L))[1:])
+        self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1])
+        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
+        self._test_list_methods(builder)
 
     def test_influence(self):
+        """make sure changes are propagated from parents to children"""
         pass
-        # test whether changes are propogated correctly
         # also test whether children that exit scope are removed from parent's map
 
 if __name__ == "__main__":

From 49b9863b77e91e1199c8f036910b862b8fddf0fb Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 15:56:38 -0400
Subject: [PATCH 101/180] Handle keyword arguments in some methods with py3k
 correctly.

---
 mwparserfromhell/string_mixin.py | 70 ++++++++++++++++++++++++++--------------
 tests/test_string_mixin.py       |  8 +++++
 2 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 7d269f5..efd28d8 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -143,19 +143,21 @@ class StringMixIn(object):
     if not py3k:
         @inheritdoc
         def decode(self, encoding=None, errors=None):
-            if errors is None:
-                if encoding is None:
-                    return self.__unicode__().decode()
-                return self.__unicode__().decode(encoding)
-            return self.__unicode__().decode(encoding, errors)
+            kwargs = {}
+            if encoding is not None:
+                kwargs["encoding"] = encoding
+            if errors is not None:
+                kwargs["errors"] = errors
+            return self.__unicode__().decode(**kwargs)
 
     @inheritdoc
     def encode(self, encoding=None, errors=None):
-        if errors is None:
-            if encoding is None:
-                return self.__unicode__().encode()
-            return self.__unicode__().encode(encoding)
-        return self.__unicode__().encode(encoding, errors)
+        kwargs = {}
+        if encoding is not None:
+            kwargs["encoding"] = encoding
+        if errors is not None:
+            kwargs["errors"] = errors
+        return self.__unicode__().encode(**kwargs)
 
     @inheritdoc
     def endswith(self, prefix, start=None, end=None):
@@ -286,25 +288,45 @@ class StringMixIn(object):
     def rpartition(self, sep):
         return self.__unicode__().rpartition(sep)
 
-    @inheritdoc
-    def rsplit(self, sep=None, maxsplit=None):
-        if maxsplit is None:
-            if sep is None:
-                return self.__unicode__().rsplit()
-            return self.__unicode__().rsplit(sep)
-        return self.__unicode__().rsplit(sep, maxsplit)
+    if py3k:
+        @inheritdoc
+        def rsplit(self, sep=None, maxsplit=None):
+            kwargs = {}
+            if sep is not None:
+                kwargs["sep"] = sep
+            if maxsplit is not None:
+                kwargs["maxsplit"] = maxsplit
+            return self.__unicode__().rsplit(**kwargs)
+    else:
+        @inheritdoc
+        def rsplit(self, sep=None, maxsplit=None):
+            if maxsplit is None:
+                if sep is None:
+                    return self.__unicode__().rsplit()
+                return self.__unicode__().rsplit(sep)
+            return self.__unicode__().rsplit(sep, maxsplit)
 
     @inheritdoc
     def rstrip(self, chars=None):
         return self.__unicode__().rstrip(chars)
 
-    @inheritdoc
-    def split(self, sep=None, maxsplit=None):
-        if maxsplit is None:
-            if sep is None:
-                return self.__unicode__().split()
-            return self.__unicode__().split(sep)
-        return self.__unicode__().split(sep, maxsplit)
+    if py3k:
+        @inheritdoc
+        def split(self, sep=None, maxsplit=None):
+            kwargs = {}
+            if sep is not None:
+                kwargs["sep"] = sep
+            if maxsplit is not None:
+                kwargs["maxsplit"] = maxsplit
+            return self.__unicode__().split(**kwargs)
+    else:
+        @inheritdoc
+        def split(self, sep=None, maxsplit=None):
+            if maxsplit is None:
+                if sep is None:
+                    return self.__unicode__().split()
+                return self.__unicode__().split(sep)
+            return self.__unicode__().split(sep, maxsplit)
 
     @inheritdoc
     def splitlines(self, keepends=None):
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 0d95311..8d86c8e 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -189,10 +189,14 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals(b"fake string", str1.encode())
         self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
                           str3.encode("utf8"))
+        self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
+                          str3.encode(encoding="utf8"))
         self.assertRaises(UnicodeEncodeError, str3.encode)
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
+        self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
         self.assertEquals("", str3.encode("ascii", "ignore"))
+        self.assertEquals("", str3.encode(errors="ignore"))
 
         self.assertTrue(str1.endswith("ing"))
         self.assertFalse(str1.endswith("ingh"))
@@ -358,6 +362,8 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals(actual, str25.rsplit(None, 3))
         actual = ["   this is a   sentence with", "", "whitespace", ""]
         self.assertEquals(actual, str25.rsplit(" ", 3))
+        if py3k:
+            self.assertEquals(actual, str25.rsplit(maxsplit=3))
 
         self.assertEquals("fake string", str1.rstrip())
         self.assertEquals("  fake string", str23.rstrip())
@@ -373,6 +379,8 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEquals(actual, str25.split(None, 3))
         actual = ["", "", "", "this is a   sentence with  whitespace "]
         self.assertEquals(actual, str25.split(" ", 3))
+        if py3k:
+            self.assertEquals(actual, str25.split(maxsplit=3))
 
         str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
         self.assertEquals(["lines", "of", "text", "are", "presented", "here"],

From 740db6ddfa86c3c52776ea57503ef9254f2bbd7a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 16:42:37 -0400
Subject: [PATCH 102/180] Implement some more tests; squash bugs in
 SmartList/_ListProxy

---
 mwparserfromhell/smart_list.py |  44 +++++--
 tests/test_smart_list.py       | 274 ++++++++++++++++++++---------------------
 2 files changed, 173 insertions(+), 145 deletions(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 229500c..062e9ad 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -76,8 +76,8 @@ class SmartList(list):
     def __getitem__(self, key):
         if not isinstance(key, slice):
             return super(SmartList, self).__getitem__(key)
-        sliceinfo = [key.start or 0, maxsize if key.stop is None else key.stop,
-                     key.step or 1]
+        keystop = maxsize if key.stop is None else key.stop
+        sliceinfo = [key.start or 0, keystop, key.step or 1]
         child = _ListProxy(self, sliceinfo)
         self._children[id(child)] = (child, sliceinfo)
         return child
@@ -100,8 +100,8 @@ class SmartList(list):
     def __delitem__(self, key):
         super(SmartList, self).__delitem__(key)
         if isinstance(key, slice):
-            key = slice(key.start or 0,
-                        maxsize if key.stop is None else key.stop)
+            keystop = maxsize if key.stop is None else key.stop
+            key = slice(key.start or 0, keystop)
         else:
             key = slice(key, key + 1)
         diff = key.stop - key.start
@@ -241,18 +241,36 @@ class _ListProxy(list):
 
     def __setitem__(self, key, item):
         if isinstance(key, slice):
-            adjusted = slice(key.start + self._start, key.stop + self._stop,
-                             key.step)
+            keystart = (key.start or 0) + self._start
+            if key.stop is None or key.stop == maxsize:
+                keystop = self._stop
+            else:
+                keystop = key.stop + self._start
+            adjusted = slice(keystart, keystop, key.step)
             self._parent[adjusted] = item
         else:
+            length = len(self)
+            if key < 0:
+                key = length + key
+            if key < 0 or key >= length:
+                raise IndexError("list assignment index out of range")
             self._parent[self._start + key] = item
 
     def __delitem__(self, key):
         if isinstance(key, slice):
-            adjusted = slice(key.start + self._start, key.stop + self._stop,
-                             key.step)
+            keystart = (key.start or 0) + self._start
+            if key.stop is None or key.stop == maxsize:
+                keystop = self._stop
+            else:
+                keystop = key.stop + self._start
+            adjusted = slice(keystart, keystop, key.step)
             del self._parent[adjusted]
         else:
+            length = len(self)
+            if key < 0:
+                key = length + key
+            if key < 0 or key >= length:
+                raise IndexError("list assignment index out of range")
             del self._parent[self._start + key]
 
     def __iter__(self):
@@ -290,6 +308,16 @@ class _ListProxy(list):
         self.extend(other)
         return self
 
+    def __mul__(self, other):
+        return SmartList(list(self) * other)
+
+    def __rmul__(self, other):
+        return SmartList(other * list(self))
+
+    def __imul__(self, other):
+        self.extend(list(self) * (other - 1))
+        return self
+
     @property
     def _start(self):
         """The starting index of this list, inclusive."""
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 44775b4..777660a 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -29,100 +29,15 @@ from mwparserfromhell.smart_list import SmartList, _ListProxy
 class TestSmartList(unittest.TestCase):
     """Test cases for the SmartList class and its child, _ListProxy."""
 
-    def _test_list_methods(self, builder):
-        """Run tests on the public methods of a list built with *builder*."""
-        list1 = builder(range(5))
-        list2 = builder(["foo"])
-        list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)])
-
-        list1.append(5)
-        list1.append(1)
-        list1.append(2)
-        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1)
-
-        self.assertEquals(0, list1.count(6))
-        self.assertEquals(2, list1.count(1))
-
-        list1.extend(range(5, 8))
-        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
-
-        self.assertEquals(1, list1.index(1))
-        self.assertEquals(6, list1.index(1, 3))
-        self.assertEquals(6, list1.index(1, 3, 7))
-        self.assertRaises(ValueError, list1.index, 1, 3, 5)
-
-        list1.insert(0, -1)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
-        list1.insert(-1, 6.5)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
-        list1.insert(13, 8)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
-
-        self.assertEquals(8, list1.pop())
-        self.assertEquals(7, list1.pop())
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
-        self.assertEquals(-1, list1.pop(0))
-        self.assertEquals(5, list1.pop(5))
-        self.assertEquals(6.5, list1.pop(-1))
-        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
-        self.assertEquals("foo", list2.pop())
-        self.assertRaises(IndexError, list2.pop)
-        self.assertEquals([], list2)
-
-        list1.remove(6)
-        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1)
-        list1.remove(1)
-        self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1)
-        list1.remove(1)
-        self.assertEquals([0, 2, 3, 4, 2, 5], list1)
-        self.assertRaises(ValueError, list1.remove, 1)
-
-        list1.reverse()
-        self.assertEquals([5, 2, 4, 3, 2, 0], list1)
-
-        list1.sort()
-        self.assertEquals([0, 2, 2, 3, 4, 5], list1)
-        list1.sort(reverse=True)
-        self.assertEquals([5, 4, 3, 2, 2, 0], list1)
-        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
-        self.assertEquals([3, 4, 2, 2, 5, 0], list1)
-        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
-        self.assertEquals([0, 5, 4, 2, 2, 3], list1)
-        list3.sort(key=lambda i: i[1])
-        self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
-        list3.sort(key=lambda i: i[1], reverse=True)
-        self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
-
-    def test_docs(self):
-        """make sure the methods of SmartList/_ListProxy have docstrings"""
-        methods = ["append", "count", "extend", "index", "insert", "pop",
-                   "remove", "reverse", "sort"]
-        for meth in methods:
-            expected = getattr(list, meth).__doc__
-            smartlist_doc = getattr(SmartList, meth).__doc__
-            listproxy_doc = getattr(_ListProxy, meth).__doc__
-            self.assertEquals(expected, smartlist_doc)
-            self.assertEquals(expected, listproxy_doc)
-
-    def test_doctest(self):
-        """make sure the test embedded in SmartList's docstring passes"""
-        parent = SmartList([0, 1, 2, 3])
-        self.assertEquals([0, 1, 2, 3], parent)
-        child = parent[2:]
-        self.assertEquals([2, 3], child)
-        child.append(4)
-        self.assertEquals([2, 3, 4], child)
-        self.assertEquals([0, 1, 2, 3, 4], parent)
-
-    def test_parent_get_set_del(self):
-        """make sure SmartList's getitem/setitem/delitem work"""
+    def _test_get_set_del_item(self, builder):
+        """Run tests on __get/set/delitem__ of a list built with *builder*."""
         def assign(L, s1, s2, s3, val):
             L[s1:s2:s3] = val
         def delete(L, s1):
             del L[s1]
 
-        list1 = SmartList([0, 1, 2, 3, "one", "two"])
-        list2 = SmartList(list(range(10)))
+        list1 = builder([0, 1, 2, 3, "one", "two"])
+        list2 = builder(list(range(10)))
 
         self.assertEquals(1, list1[1])
         self.assertEquals("one", list1[-2])
@@ -152,9 +67,11 @@ class TestSmartList(unittest.TestCase):
 
         list1[3] = 100
         self.assertEquals(100, list1[3])
+        list1[-3] = 101
+        self.assertEquals([0, 1, 2, 101, "one", "two"], list1)
         list1[5:] = [6, 7, 8]
         self.assertEquals([6, 7, 8], list1[5:])
-        self.assertEquals([0, 1, 2, 100, "one", 6, 7, 8], list1)
+        self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1)
         list1[2:4] = [-1, -2, -3, -4, -5]
         self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1)
         list1[0:-3] = [99]
@@ -185,10 +102,10 @@ class TestSmartList(unittest.TestCase):
         del list2[2:8:2]
         self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2)
 
-    def test_parent_add(self):
-        """make sure SmartList's add/radd/iadd work"""
-        list1 = SmartList(range(5))
-        list2 = SmartList(range(5, 10))
+    def _test_add_radd_iadd(self, builder):
+        """Run tests on __r/i/add__ of a list built with *builder*."""
+        list1 = builder(range(5))
+        list2 = builder(range(5, 10))
         self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6])
         self.assertEquals([0, 1, 2, 3, 4], list1)
         self.assertEquals(list(range(10)), list1 + list2)
@@ -197,12 +114,12 @@ class TestSmartList(unittest.TestCase):
         list1 += ["foo", "bar", "baz"]
         self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1)
 
-    def test_parent_unaffected_magics(self):
-        """sanity checks against SmartList features that were not modified"""
-        list1 = SmartList([0, 1, 2, 3, "one", "two"])
-        list2 = SmartList([])
-        list3 = SmartList([0, 2, 3, 4])
-        list4 = SmartList([0, 1, 2])
+    def _test_other_magic_methods(self, builder):
+        """Run tests on other magic methods of a list built with *builder*."""
+        list1 = builder([0, 1, 2, 3, "one", "two"])
+        list2 = builder([])
+        list3 = builder([0, 2, 3, 4])
+        list4 = builder([0, 1, 2])
 
         if py3k:
             self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1))
@@ -284,47 +201,130 @@ class TestSmartList(unittest.TestCase):
         list4 *= 2
         self.assertEquals([0, 1, 2, 0, 1, 2], list4)
 
+    def _test_list_methods(self, builder):
+        """Run tests on the public methods of a list built with *builder*."""
+        list1 = builder(range(5))
+        list2 = builder(["foo"])
+        list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)])
+
+        list1.append(5)
+        list1.append(1)
+        list1.append(2)
+        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1)
+
+        self.assertEquals(0, list1.count(6))
+        self.assertEquals(2, list1.count(1))
+
+        list1.extend(range(5, 8))
+        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+
+        self.assertEquals(1, list1.index(1))
+        self.assertEquals(6, list1.index(1, 3))
+        self.assertEquals(6, list1.index(1, 3, 7))
+        self.assertRaises(ValueError, list1.index, 1, 3, 5)
+
+        list1.insert(0, -1)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+        list1.insert(-1, 6.5)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
+        list1.insert(13, 8)
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
+
+        self.assertEquals(8, list1.pop())
+        self.assertEquals(7, list1.pop())
+        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
+        self.assertEquals(-1, list1.pop(0))
+        self.assertEquals(5, list1.pop(5))
+        self.assertEquals(6.5, list1.pop(-1))
+        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
+        self.assertEquals("foo", list2.pop())
+        self.assertRaises(IndexError, list2.pop)
+        self.assertEquals([], list2)
+
+        list1.remove(6)
+        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1)
+        list1.remove(1)
+        self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1)
+        list1.remove(1)
+        self.assertEquals([0, 2, 3, 4, 2, 5], list1)
+        self.assertRaises(ValueError, list1.remove, 1)
+
+        list1.reverse()
+        self.assertEquals([5, 2, 4, 3, 2, 0], list1)
+
+        list1.sort()
+        self.assertEquals([0, 2, 2, 3, 4, 5], list1)
+        list1.sort(reverse=True)
+        self.assertEquals([5, 4, 3, 2, 2, 0], list1)
+        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
+        self.assertEquals([3, 4, 2, 2, 5, 0], list1)
+        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
+        self.assertEquals([0, 5, 4, 2, 2, 3], list1)
+        list3.sort(key=lambda i: i[1])
+        self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
+        list3.sort(key=lambda i: i[1], reverse=True)
+        self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
+
+    def test_docs(self):
+        """make sure the methods of SmartList/_ListProxy have docstrings"""
+        methods = ["append", "count", "extend", "index", "insert", "pop",
+                   "remove", "reverse", "sort"]
+        for meth in methods:
+            expected = getattr(list, meth).__doc__
+            smartlist_doc = getattr(SmartList, meth).__doc__
+            listproxy_doc = getattr(_ListProxy, meth).__doc__
+            self.assertEquals(expected, smartlist_doc)
+            self.assertEquals(expected, listproxy_doc)
+
+    def test_doctest(self):
+        """make sure the test embedded in SmartList's docstring passes"""
+        parent = SmartList([0, 1, 2, 3])
+        self.assertEquals([0, 1, 2, 3], parent)
+        child = parent[2:]
+        self.assertEquals([2, 3], child)
+        child.append(4)
+        self.assertEquals([2, 3, 4], child)
+        self.assertEquals([0, 1, 2, 3, 4], parent)
+
+    def test_parent_get_set_del(self):
+        """make sure SmartList's getitem/setitem/delitem work"""
+        self._test_get_set_del_item(lambda L: SmartList(L))
+
+    def test_parent_add(self):
+        """make sure SmartList's add/radd/iadd work"""
+        self._test_add_radd_iadd(lambda L: SmartList(L))
+
+    def test_parent_unaffected_magics(self):
+        """sanity checks against SmartList features that were not modified"""
+        self._test_other_magic_methods(lambda L: SmartList(L))
+
     def test_parent_methods(self):
         """make sure SmartList's non-magic methods work, like append()"""
         self._test_list_methods(lambda L: SmartList(L))
 
-    def test_child_magics(self):
-        """make sure _ListProxy's magically implemented features work"""
-        pass
-        # if py3k:
-        #     __str__
-        #     __bytes__
-        # else:
-        #     __unicode__
-        #     __str__
-        # __repr__
-        # __lt__
-        # __le__
-        # __eq__
-        # __ne__
-        # __gt__
-        # __ge__
-        # if py3k:
-        #     __bool__
-        # else:
-        #     __nonzero__
-        # __len__
-        # __getitem__
-        # __setitem__
-        # __delitem__
-        # __iter__
-        # __reversed__
-        # __contains__
-        # if not py3k:
-        #     __getslice__
-        #     __setslice__
-        #     __delslice__
-        # __add__
-        # __radd__
-        # __iadd__
-        # __mul__
-        # __rmul__
-        # __imul__
+    def test_child_get_set_del(self):
+        """make sure _ListProxy's getitem/setitem/delitem work"""
+        self._test_get_set_del_item(lambda L: SmartList(list(L))[:])
+        self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:])
+        # self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1])
+        # builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
+        # self._test_get_set_del_item(builder)
+
+    def test_child_add(self):
+        """make sure _ListProxy's add/radd/iadd work"""
+        self._test_add_radd_iadd(lambda L: SmartList(list(L))[:])
+        self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:])
+        self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1])
+        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
+        self._test_add_radd_iadd(builder)
+
+    def test_child_other_magics(self):
+        """make sure _ListProxy's other magically implemented features work"""
+        self._test_other_magic_methods(lambda L: SmartList(list(L))[:])
+        self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:])
+        self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1])
+        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
+        self._test_other_magic_methods(builder)
 
     def test_child_methods(self):
         """make sure _ListProxy's non-magic methods work, like append()"""

From b298a68b37444ff2674ce7699e1bc85d610df547 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 16:54:01 -0400
Subject: [PATCH 103/180] Squash a bug dealing with extended slices.

---
 mwparserfromhell/smart_list.py | 11 ++++++-----
 tests/test_smart_list.py       |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 062e9ad..46c475a 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -87,8 +87,9 @@ class SmartList(list):
             return super(SmartList, self).__setitem__(key, item)
         item = list(item)
         super(SmartList, self).__setitem__(key, item)
-        key = slice(key.start or 0, maxsize if key.stop is None else key.stop)
-        diff = len(item) - key.stop + key.start
+        keystop = maxsize if key.stop is None else key.stop
+        key = slice(key.start or 0, keystop, key.step or 1)
+        diff = len(item) + (key.start - key.stop) / key.step
         values = self._children.values if py3k else self._children.itervalues
         if diff:
             for child, (start, stop, step) in values():
@@ -101,10 +102,10 @@ class SmartList(list):
         super(SmartList, self).__delitem__(key)
         if isinstance(key, slice):
             keystop = maxsize if key.stop is None else key.stop
-            key = slice(key.start or 0, keystop)
+            key = slice(key.start or 0, keystop, key.step or 1)
         else:
-            key = slice(key, key + 1)
-        diff = key.stop - key.start
+            key = slice(key, key + 1, 1)
+        diff = (key.stop - key.start) / key.step
         values = self._children.values if py3k else self._children.itervalues
         for child, (start, stop, step) in values():
             if start > key.start:
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 777660a..10e39ea 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -306,9 +306,9 @@ class TestSmartList(unittest.TestCase):
         """make sure _ListProxy's getitem/setitem/delitem work"""
         self._test_get_set_del_item(lambda L: SmartList(list(L))[:])
         self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:])
-        # self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1])
-        # builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
-        # self._test_get_set_del_item(builder)
+        self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1])
+        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
+        self._test_get_set_del_item(builder)
 
     def test_child_add(self):
         """make sure _ListProxy's add/radd/iadd work"""

From b6284195d31543aca2a1d4e1742ce3f649217b14 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 17:08:18 -0400
Subject: [PATCH 104/180] Implement first part of test_influence().

---
 tests/test_smart_list.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 10e39ea..b0a10cb 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -336,7 +336,54 @@ class TestSmartList(unittest.TestCase):
 
     def test_influence(self):
         """make sure changes are propagated from parents to children"""
-        pass
+        parent = SmartList([0, 1, 2, 3, 4, 5])
+        child1 = parent[2:]
+        child2 = parent[2:5]
+
+        parent.append(6)
+        child1.append(7)
+        child2.append(4.5)
+        self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent)
+        self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1)
+        self.assertEquals([2, 3, 4, 4.5], child2)
+
+        parent.insert(0, -1)
+        parent.insert(4, 2.5)
+        parent.insert(10, 6.5)
+        self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent)
+        self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1)
+        self.assertEquals([2, 2.5, 3, 4, 4.5], child2)
+
+        self.assertEquals(7, parent.pop())
+        self.assertEquals(6.5, child1.pop())
+        self.assertEquals(4.5, child2.pop())
+        self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent)
+        self.assertEquals([2, 2.5, 3, 4, 5, 6], child1)
+        self.assertEquals([2, 2.5, 3, 4], child2)
+
+        parent.remove(-1)
+        child1.remove(2.5)
+        self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent)
+        self.assertEquals([2, 3, 4, 5, 6], child1)
+        self.assertEquals([2, 3, 4], child2)
+
+        self.assertEquals(0, parent.pop(0))
+        self.assertEquals([1, 2, 3, 4, 5, 6], parent)
+        self.assertEquals([2, 3, 4, 5, 6], child1)
+        self.assertEquals([2, 3, 4], child2)
+
+        child2.reverse()
+        self.assertEquals([1, 4, 3, 2, 5, 6], parent)
+        self.assertEquals([4, 3, 2, 5, 6], child1)
+        self.assertEquals([4, 3, 2], child2)
+
+        parent.extend([7, 8])
+        child1.extend([8.1, 8.2])
+        child2.extend([1.9, 1.8])
+        self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent)
+        self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
+        self.assertEquals([4, 3, 2, 1.9, 1.8], child2)
+
         # also test whether children that exit scope are removed from parent's map
 
 if __name__ == "__main__":

From 34b85a93cd425f3b9c1b2d91fa7d5625b284f171 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 17:33:29 -0400
Subject: [PATCH 105/180] Cosmetic change: standardize whitespace after class
 definition.

---
 mwparserfromhell/nodes/argument.py | 1 +
 mwparserfromhell/nodes/comment.py  | 1 +
 mwparserfromhell/nodes/text.py     | 1 +
 mwparserfromhell/nodes/wikilink.py | 1 +
 mwparserfromhell/string_mixin.py   | 1 +
 tests/_test_tokenizer.py           | 1 +
 tests/test_ctokenizer.py           | 1 +
 tests/test_docs.py                 | 1 +
 tests/test_pytokenizer.py          | 1 +
 tests/test_smart_list.py           | 2 --
 tests/test_string_mixin.py         | 1 +
 11 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py
index 06facb4..d7db92a 100644
--- a/mwparserfromhell/nodes/argument.py
+++ b/mwparserfromhell/nodes/argument.py
@@ -30,6 +30,7 @@ __all__ = ["Argument"]
 
 class Argument(Node):
     """Represents a template argument substitution, like ``{{{foo}}}``."""
+
     def __init__(self, name, default=None):
         super(Argument, self).__init__()
         self._name = name
diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py
index b34c29e..e96ce38 100644
--- a/mwparserfromhell/nodes/comment.py
+++ b/mwparserfromhell/nodes/comment.py
@@ -29,6 +29,7 @@ __all__ = ["Comment"]
 
 class Comment(Node):
     """Represents a hidden HTML comment, like ``<!-- foobar -->``."""
+
     def __init__(self, contents):
         super(Comment, self).__init__()
         self._contents = contents
diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py
index 60ba847..6fda3da 100644
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -29,6 +29,7 @@ __all__ = ["Text"]
 
 class Text(Node):
     """Represents ordinary, unformatted text with no special properties."""
+
     def __init__(self, value):
         super(Text, self).__init__()
         self._value = value
diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py
index f880016..6fea468 100644
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -30,6 +30,7 @@ __all__ = ["Wikilink"]
 
 class Wikilink(Node):
     """Represents an internal wikilink, like ``[[Foo|Bar]]``."""
+
     def __init__(self, title, text=None):
         super(Wikilink, self).__init__()
         self._title = title
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index efd28d8..eee58b9 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -50,6 +50,7 @@ class StringMixIn(object):
     :py:meth:`__unicode__` instead of the immutable ``self`` like the regular
     ``str`` type.
     """
+
     if py3k:
         def __str__(self):
             return self.__unicode__()
diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 4d12dc9..379b4fa 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -38,6 +38,7 @@ class TokenizerTestCase(object):
     TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer'
     directory.
     """
+
     @classmethod
     def _build_test_method(cls, funcname, data):
         """Create and return a method to be treated as a test case method.
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 07b5290..4dbeceb 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase
 
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the C tokenizer."""
+
     @classmethod
     def setUpClass(cls):
         from mwparserfromhell.parser._tokenizer import CTokenizer
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 8673cb9..075b0a7 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -30,6 +30,7 @@ from mwparserfromhell.compat import py3k, str, StringIO
 
 class TestDocs(unittest.TestCase):
     """Integration test cases for mwparserfromhell's documentation."""
+
     def assertPrint(self, input, output):
         """Assertion check that *input*, when printed, produces *output*."""
         buff = StringIO()
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index a2f2482..73e6fe7 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase
 
 class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the Python tokenizer."""
+
     @classmethod
     def setUpClass(cls):
         from mwparserfromhell.parser.tokenizer import Tokenizer
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index b0a10cb..f6d22ae 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -384,7 +384,5 @@ class TestSmartList(unittest.TestCase):
         self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
         self.assertEquals([4, 3, 2, 1.9, 1.8], child2)
 
-        # also test whether children that exit scope are removed from parent's map
-
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 8d86c8e..7b99995 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -37,6 +37,7 @@ class _FakeString(StringMixIn):
 
 class TestStringMixIn(unittest.TestCase):
     """Test cases for the StringMixIn class."""
+
     def test_docs(self):
         """make sure the various methods of StringMixIn have docstrings"""
         methods = [

From 6a741db7ce98239108f21004b2a9d2f99a63f90f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 18:25:03 -0400
Subject: [PATCH 106/180] Applying fb71f5507eca7bc73fae764549a7579889817cba

---
 mwparserfromhell/parser/__init__.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 074b9ba..1fb95b5 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -26,16 +26,16 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module
 joins them together under one interface.
 """
 
+from .builder import Builder
+from .tokenizer import Tokenizer
 try:
-    from ._builder import CBuilder as Builder
+    from ._tokenizer import CTokenizer
+    use_c = True
 except ImportError:
-    from .builder import Builder
-try:
-    from ._tokenizer import CTokenizer as Tokenizer
-except ImportError:
-    from .tokenizer import Tokenizer
+    CTokenizer = None
+    use_c = False
 
-__all__ = ["Parser"]
+__all__ = ["use_c", "Parser"]
 
 class Parser(object):
     """Represents a parser for wikicode.
@@ -48,7 +48,10 @@ class Parser(object):
 
     def __init__(self, text):
         self.text = text
-        self._tokenizer = Tokenizer()
+        if use_c and CTokenizer:
+            self._tokenizer = CTokenizer()
+        else:
+            self._tokenizer = Tokenizer()
         self._builder = Builder()
 
     def parse(self):

From 9e26264d6b8d462cd93bc4c475c91abfe6d3b501 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 25 Mar 2013 19:13:32 -0400
Subject: [PATCH 107/180] Replace deprecated alias assertEquals() with
 assertEqual().

---
 tests/test_smart_list.py   | 244 ++++++++++++++++++++++-----------------------
 tests/test_string_mixin.py | 228 +++++++++++++++++++++---------------------
 tests/test_tokens.py       |  24 ++---
 3 files changed, 248 insertions(+), 248 deletions(-)

diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index f6d22ae..680de9d 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -39,80 +39,80 @@ class TestSmartList(unittest.TestCase):
         list1 = builder([0, 1, 2, 3, "one", "two"])
         list2 = builder(list(range(10)))
 
-        self.assertEquals(1, list1[1])
-        self.assertEquals("one", list1[-2])
-        self.assertEquals([2, 3], list1[2:4])
+        self.assertEqual(1, list1[1])
+        self.assertEqual("one", list1[-2])
+        self.assertEqual([2, 3], list1[2:4])
         self.assertRaises(IndexError, lambda: list1[6])
         self.assertRaises(IndexError, lambda: list1[-7])
 
-        self.assertEquals([0, 1, 2], list1[:3])
-        self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:])
-        self.assertEquals([3, "one", "two"], list1[3:])
-        self.assertEquals(["one", "two"], list1[-2:])
-        self.assertEquals([0, 1], list1[:-4])
-        self.assertEquals([], list1[6:])
-        self.assertEquals([], list1[4:2])
-
-        self.assertEquals([0, 2, "one"], list1[0:5:2])
-        self.assertEquals([0, 2], list1[0:-3:2])
-        self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::])
-        self.assertEquals([2, 3, "one", "two"], list1[2::])
-        self.assertEquals([0, 1, 2, 3], list1[:4:])
-        self.assertEquals([2, 3], list1[2:4:])
-        self.assertEquals([0, 2, 4, 6, 8], list2[::2])
-        self.assertEquals([2, 5, 8], list2[2::3])
-        self.assertEquals([0, 3], list2[:6:3])
-        self.assertEquals([2, 5, 8], list2[-8:9:3])
-        self.assertEquals([], list2[100000:1000:-100])
+        self.assertEqual([0, 1, 2], list1[:3])
+        self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:])
+        self.assertEqual([3, "one", "two"], list1[3:])
+        self.assertEqual(["one", "two"], list1[-2:])
+        self.assertEqual([0, 1], list1[:-4])
+        self.assertEqual([], list1[6:])
+        self.assertEqual([], list1[4:2])
+
+        self.assertEqual([0, 2, "one"], list1[0:5:2])
+        self.assertEqual([0, 2], list1[0:-3:2])
+        self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::])
+        self.assertEqual([2, 3, "one", "two"], list1[2::])
+        self.assertEqual([0, 1, 2, 3], list1[:4:])
+        self.assertEqual([2, 3], list1[2:4:])
+        self.assertEqual([0, 2, 4, 6, 8], list2[::2])
+        self.assertEqual([2, 5, 8], list2[2::3])
+        self.assertEqual([0, 3], list2[:6:3])
+        self.assertEqual([2, 5, 8], list2[-8:9:3])
+        self.assertEqual([], list2[100000:1000:-100])
 
         list1[3] = 100
-        self.assertEquals(100, list1[3])
+        self.assertEqual(100, list1[3])
         list1[-3] = 101
-        self.assertEquals([0, 1, 2, 101, "one", "two"], list1)
+        self.assertEqual([0, 1, 2, 101, "one", "two"], list1)
         list1[5:] = [6, 7, 8]
-        self.assertEquals([6, 7, 8], list1[5:])
-        self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1)
+        self.assertEqual([6, 7, 8], list1[5:])
+        self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1)
         list1[2:4] = [-1, -2, -3, -4, -5]
-        self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1)
+        self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1)
         list1[0:-3] = [99]
-        self.assertEquals([99, 6, 7, 8], list1)
+        self.assertEqual([99, 6, 7, 8], list1)
         list2[0:6:2] = [100, 102, 104]
-        self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2)
+        self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2)
         list2[::3] = [200, 203, 206, 209]
-        self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2)
+        self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2)
         list2[::] = range(7)
-        self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2)
+        self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2)
         self.assertRaises(ValueError, assign, list2, 0, 5, 2,
                           [100, 102, 104, 106])
 
         del list2[2]
-        self.assertEquals([0, 1, 3, 4, 5, 6], list2)
+        self.assertEqual([0, 1, 3, 4, 5, 6], list2)
         del list2[-3]
-        self.assertEquals([0, 1, 3, 5, 6], list2)
+        self.assertEqual([0, 1, 3, 5, 6], list2)
         self.assertRaises(IndexError, delete, list2, 100)
         self.assertRaises(IndexError, delete, list2, -6)
         list2[:] = range(10)
         del list2[3:6]
-        self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2)
+        self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2)
         del list2[-2:]
-        self.assertEquals([0, 1, 2, 6, 7], list2)
+        self.assertEqual([0, 1, 2, 6, 7], list2)
         del list2[:2]
-        self.assertEquals([2, 6, 7], list2)
+        self.assertEqual([2, 6, 7], list2)
         list2[:] = range(10)
         del list2[2:8:2]
-        self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2)
+        self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2)
 
     def _test_add_radd_iadd(self, builder):
         """Run tests on __r/i/add__ of a list built with *builder*."""
         list1 = builder(range(5))
         list2 = builder(range(5, 10))
-        self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6])
-        self.assertEquals([0, 1, 2, 3, 4], list1)
-        self.assertEquals(list(range(10)), list1 + list2)
-        self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1)
-        self.assertEquals([0, 1, 2, 3, 4], list1)
+        self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6])
+        self.assertEqual([0, 1, 2, 3, 4], list1)
+        self.assertEqual(list(range(10)), list1 + list2)
+        self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1)
+        self.assertEqual([0, 1, 2, 3, 4], list1)
         list1 += ["foo", "bar", "baz"]
-        self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1)
+        self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1)
 
     def _test_other_magic_methods(self, builder):
         """Run tests on other magic methods of a list built with *builder*."""
@@ -122,13 +122,13 @@ class TestSmartList(unittest.TestCase):
         list4 = builder([0, 1, 2])
 
         if py3k:
-            self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1))
-            self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1))
-            self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1))
+            self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1))
+            self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1))
+            self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1))
         else:
-            self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1))
-            self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
-            self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))
+            self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1))
+            self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
+            self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))
 
         self.assertTrue(list1 < list3)
         self.assertTrue(list1 <= list3)
@@ -164,42 +164,42 @@ class TestSmartList(unittest.TestCase):
         self.assertTrue(bool(list1))
         self.assertFalse(bool(list2))
 
-        self.assertEquals(6, len(list1))
-        self.assertEquals(0, len(list2))
+        self.assertEqual(6, len(list1))
+        self.assertEqual(0, len(list2))
 
         out = []
         for obj in list1:
             out.append(obj)
-        self.assertEquals([0, 1, 2, 3, "one", "two"], out)
+        self.assertEqual([0, 1, 2, 3, "one", "two"], out)
 
         out = []
         for ch in list2:
             out.append(ch)
-        self.assertEquals([], out)
+        self.assertEqual([], out)
 
         gen1 = iter(list1)
         out = []
         for i in range(len(list1)):
             out.append(gen1.next())
         self.assertRaises(StopIteration, gen1.next)
-        self.assertEquals([0, 1, 2, 3, "one", "two"], out)
+        self.assertEqual([0, 1, 2, 3, "one", "two"], out)
         gen2 = iter(list2)
         self.assertRaises(StopIteration, gen2.next)
 
-        self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
-        self.assertEquals([], list(reversed(list2)))
+        self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
+        self.assertEqual([], list(reversed(list2)))
 
         self.assertTrue("one" in list1)
         self.assertTrue(3 in list1)
         self.assertFalse(10 in list1)
         self.assertFalse(0 in list2)
 
-        self.assertEquals([], list2 * 5)
-        self.assertEquals([], 5 * list2)
-        self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3)
-        self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4)
+        self.assertEqual([], list2 * 5)
+        self.assertEqual([], 5 * list2)
+        self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3)
+        self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4)
         list4 *= 2
-        self.assertEquals([0, 1, 2, 0, 1, 2], list4)
+        self.assertEqual([0, 1, 2, 0, 1, 2], list4)
 
     def _test_list_methods(self, builder):
         """Run tests on the public methods of a list built with *builder*."""
@@ -210,60 +210,60 @@ class TestSmartList(unittest.TestCase):
         list1.append(5)
         list1.append(1)
         list1.append(2)
-        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1)
+        self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1)
 
-        self.assertEquals(0, list1.count(6))
-        self.assertEquals(2, list1.count(1))
+        self.assertEqual(0, list1.count(6))
+        self.assertEqual(2, list1.count(1))
 
         list1.extend(range(5, 8))
-        self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+        self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
 
-        self.assertEquals(1, list1.index(1))
-        self.assertEquals(6, list1.index(1, 3))
-        self.assertEquals(6, list1.index(1, 3, 7))
+        self.assertEqual(1, list1.index(1))
+        self.assertEqual(6, list1.index(1, 3))
+        self.assertEqual(6, list1.index(1, 3, 7))
         self.assertRaises(ValueError, list1.index, 1, 3, 5)
 
         list1.insert(0, -1)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
+        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
         list1.insert(-1, 6.5)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
+        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
         list1.insert(13, 8)
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
-
-        self.assertEquals(8, list1.pop())
-        self.assertEquals(7, list1.pop())
-        self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
-        self.assertEquals(-1, list1.pop(0))
-        self.assertEquals(5, list1.pop(5))
-        self.assertEquals(6.5, list1.pop(-1))
-        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
-        self.assertEquals("foo", list2.pop())
+        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)
+
+        self.assertEqual(8, list1.pop())
+        self.assertEqual(7, list1.pop())
+        self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
+        self.assertEqual(-1, list1.pop(0))
+        self.assertEqual(5, list1.pop(5))
+        self.assertEqual(6.5, list1.pop(-1))
+        self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
+        self.assertEqual("foo", list2.pop())
         self.assertRaises(IndexError, list2.pop)
-        self.assertEquals([], list2)
+        self.assertEqual([], list2)
 
         list1.remove(6)
-        self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1)
+        self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1)
         list1.remove(1)
-        self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1)
+        self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1)
         list1.remove(1)
-        self.assertEquals([0, 2, 3, 4, 2, 5], list1)
+        self.assertEqual([0, 2, 3, 4, 2, 5], list1)
         self.assertRaises(ValueError, list1.remove, 1)
 
         list1.reverse()
-        self.assertEquals([5, 2, 4, 3, 2, 0], list1)
+        self.assertEqual([5, 2, 4, 3, 2, 0], list1)
 
         list1.sort()
-        self.assertEquals([0, 2, 2, 3, 4, 5], list1)
+        self.assertEqual([0, 2, 2, 3, 4, 5], list1)
         list1.sort(reverse=True)
-        self.assertEquals([5, 4, 3, 2, 2, 0], list1)
+        self.assertEqual([5, 4, 3, 2, 2, 0], list1)
         list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
-        self.assertEquals([3, 4, 2, 2, 5, 0], list1)
+        self.assertEqual([3, 4, 2, 2, 5, 0], list1)
         list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
-        self.assertEquals([0, 5, 4, 2, 2, 3], list1)
+        self.assertEqual([0, 5, 4, 2, 2, 3], list1)
         list3.sort(key=lambda i: i[1])
-        self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
+        self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
         list3.sort(key=lambda i: i[1], reverse=True)
-        self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
+        self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
 
     def test_docs(self):
         """make sure the methods of SmartList/_ListProxy have docstrings"""
@@ -273,18 +273,18 @@ class TestSmartList(unittest.TestCase):
             expected = getattr(list, meth).__doc__
             smartlist_doc = getattr(SmartList, meth).__doc__
             listproxy_doc = getattr(_ListProxy, meth).__doc__
-            self.assertEquals(expected, smartlist_doc)
-            self.assertEquals(expected, listproxy_doc)
+            self.assertEqual(expected, smartlist_doc)
+            self.assertEqual(expected, listproxy_doc)
 
     def test_doctest(self):
         """make sure the test embedded in SmartList's docstring passes"""
         parent = SmartList([0, 1, 2, 3])
-        self.assertEquals([0, 1, 2, 3], parent)
+        self.assertEqual([0, 1, 2, 3], parent)
         child = parent[2:]
-        self.assertEquals([2, 3], child)
+        self.assertEqual([2, 3], child)
         child.append(4)
-        self.assertEquals([2, 3, 4], child)
-        self.assertEquals([0, 1, 2, 3, 4], parent)
+        self.assertEqual([2, 3, 4], child)
+        self.assertEqual([0, 1, 2, 3, 4], parent)
 
     def test_parent_get_set_del(self):
         """make sure SmartList's getitem/setitem/delitem work"""
@@ -343,46 +343,46 @@ class TestSmartList(unittest.TestCase):
         parent.append(6)
         child1.append(7)
         child2.append(4.5)
-        self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent)
-        self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1)
-        self.assertEquals([2, 3, 4, 4.5], child2)
+        self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent)
+        self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1)
+        self.assertEqual([2, 3, 4, 4.5], child2)
 
         parent.insert(0, -1)
         parent.insert(4, 2.5)
         parent.insert(10, 6.5)
-        self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent)
-        self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1)
-        self.assertEquals([2, 2.5, 3, 4, 4.5], child2)
+        self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent)
+        self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1)
+        self.assertEqual([2, 2.5, 3, 4, 4.5], child2)
 
-        self.assertEquals(7, parent.pop())
-        self.assertEquals(6.5, child1.pop())
-        self.assertEquals(4.5, child2.pop())
-        self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent)
-        self.assertEquals([2, 2.5, 3, 4, 5, 6], child1)
-        self.assertEquals([2, 2.5, 3, 4], child2)
+        self.assertEqual(7, parent.pop())
+        self.assertEqual(6.5, child1.pop())
+        self.assertEqual(4.5, child2.pop())
+        self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent)
+        self.assertEqual([2, 2.5, 3, 4, 5, 6], child1)
+        self.assertEqual([2, 2.5, 3, 4], child2)
 
         parent.remove(-1)
         child1.remove(2.5)
-        self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent)
-        self.assertEquals([2, 3, 4, 5, 6], child1)
-        self.assertEquals([2, 3, 4], child2)
+        self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent)
+        self.assertEqual([2, 3, 4, 5, 6], child1)
+        self.assertEqual([2, 3, 4], child2)
 
-        self.assertEquals(0, parent.pop(0))
-        self.assertEquals([1, 2, 3, 4, 5, 6], parent)
-        self.assertEquals([2, 3, 4, 5, 6], child1)
-        self.assertEquals([2, 3, 4], child2)
+        self.assertEqual(0, parent.pop(0))
+        self.assertEqual([1, 2, 3, 4, 5, 6], parent)
+        self.assertEqual([2, 3, 4, 5, 6], child1)
+        self.assertEqual([2, 3, 4], child2)
 
         child2.reverse()
-        self.assertEquals([1, 4, 3, 2, 5, 6], parent)
-        self.assertEquals([4, 3, 2, 5, 6], child1)
-        self.assertEquals([4, 3, 2], child2)
+        self.assertEqual([1, 4, 3, 2, 5, 6], parent)
+        self.assertEqual([4, 3, 2, 5, 6], child1)
+        self.assertEqual([4, 3, 2], child2)
 
         parent.extend([7, 8])
         child1.extend([8.1, 8.2])
         child2.extend([1.9, 1.8])
-        self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent)
-        self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
-        self.assertEquals([4, 3, 2, 1.9, 1.8], child2)
+        self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent)
+        self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
+        self.assertEqual([4, 3, 2, 1.9, 1.8], child2)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 7b99995..6ef6344 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -56,17 +56,17 @@ class TestStringMixIn(unittest.TestCase):
         for meth in methods:
             expected = getattr(str, meth).__doc__
             actual = getattr(StringMixIn, meth).__doc__
-            self.assertEquals(expected, actual)
+            self.assertEqual(expected, actual)
 
     def test_types(self):
         """make sure StringMixIns convert to different types correctly"""
         fstr = _FakeString("fake string")
-        self.assertEquals(str(fstr), "fake string")
-        self.assertEquals(bytes(fstr), b"fake string")
+        self.assertEqual(str(fstr), "fake string")
+        self.assertEqual(bytes(fstr), b"fake string")
         if py3k:
-            self.assertEquals(repr(fstr), "'fake string'")
+            self.assertEqual(repr(fstr), "'fake string'")
         else:
-            self.assertEquals(repr(fstr), b"u'fake string'")
+            self.assertEqual(repr(fstr), b"u'fake string'")
 
         self.assertIsInstance(str(fstr), str)
         self.assertIsInstance(bytes(fstr), bytes)
@@ -119,18 +119,18 @@ class TestStringMixIn(unittest.TestCase):
 
         self.assertTrue(str1)
         self.assertFalse(str2)
-        self.assertEquals(11, len(str1))
-        self.assertEquals(0, len(str2))
+        self.assertEqual(11, len(str1))
+        self.assertEqual(0, len(str2))
 
         out = []
         for ch in str1:
             out.append(ch)
-        self.assertEquals(expected, out)
+        self.assertEqual(expected, out)
 
         out = []
         for ch in str2:
             out.append(ch)
-        self.assertEquals([], out)
+        self.assertEqual([], out)
 
         gen1 = iter(str1)
         gen2 = iter(str2)
@@ -141,16 +141,16 @@ class TestStringMixIn(unittest.TestCase):
         for i in range(len(str1)):
             out.append(gen1.next())
         self.assertRaises(StopIteration, gen1.next)
-        self.assertEquals(expected, out)
+        self.assertEqual(expected, out)
         self.assertRaises(StopIteration, gen2.next)
 
-        self.assertEquals("gnirts ekaf", "".join(list(reversed(str1))))
-        self.assertEquals([], list(reversed(str2)))
+        self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
+        self.assertEqual([], list(reversed(str2)))
 
-        self.assertEquals("f", str1[0])
-        self.assertEquals(" ", str1[4])
-        self.assertEquals("g", str1[10])
-        self.assertEquals("n", str1[-2])
+        self.assertEqual("f", str1[0])
+        self.assertEqual(" ", str1[4])
+        self.assertEqual("g", str1[10])
+        self.assertEqual("n", str1[-2])
         self.assertRaises(IndexError, lambda: str1[11])
         self.assertRaises(IndexError, lambda: str2[0])
 
@@ -165,75 +165,75 @@ class TestStringMixIn(unittest.TestCase):
     def test_other_methods(self):
         """test the remaining non-magic methods of StringMixIn"""
         str1 = _FakeString("fake string")
-        self.assertEquals("Fake string", str1.capitalize())
+        self.assertEqual("Fake string", str1.capitalize())
 
-        self.assertEquals("  fake string  ", str1.center(15))
-        self.assertEquals("  fake string   ", str1.center(16))
-        self.assertEquals("qqfake stringqq", str1.center(15, "q"))
+        self.assertEqual("  fake string  ", str1.center(15))
+        self.assertEqual("  fake string   ", str1.center(16))
+        self.assertEqual("qqfake stringqq", str1.center(15, "q"))
 
-        self.assertEquals(1, str1.count("e"))
-        self.assertEquals(0, str1.count("z"))
-        self.assertEquals(1, str1.count("r", 7))
-        self.assertEquals(0, str1.count("r", 8))
-        self.assertEquals(1, str1.count("r", 5, 9))
-        self.assertEquals(0, str1.count("r", 5, 7))
+        self.assertEqual(1, str1.count("e"))
+        self.assertEqual(0, str1.count("z"))
+        self.assertEqual(1, str1.count("r", 7))
+        self.assertEqual(0, str1.count("r", 8))
+        self.assertEqual(1, str1.count("r", 5, 9))
+        self.assertEqual(0, str1.count("r", 5, 7))
 
         if not py3k:
             str2 = _FakeString("fo")
-            self.assertEquals(str1, str1.decode())
+            self.assertEqual(str1, str1.decode())
             actual = _FakeString("\\U00010332\\U0001033f\\U00010344")
-            self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape"))
+            self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape"))
             self.assertRaises(UnicodeError, str2.decode, "punycode")
-            self.assertEquals("", str2.decode("punycode", "ignore"))
+            self.assertEqual("", str2.decode("punycode", "ignore"))
 
         str3 = _FakeString("𐌲𐌿𐍄")
-        self.assertEquals(b"fake string", str1.encode())
-        self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
+        self.assertEqual(b"fake string", str1.encode())
+        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
                           str3.encode("utf8"))
-        self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
+        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
                           str3.encode(encoding="utf8"))
         self.assertRaises(UnicodeEncodeError, str3.encode)
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
         self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
-        self.assertEquals("", str3.encode("ascii", "ignore"))
-        self.assertEquals("", str3.encode(errors="ignore"))
+        self.assertEqual("", str3.encode("ascii", "ignore"))
+        self.assertEqual("", str3.encode(errors="ignore"))
 
         self.assertTrue(str1.endswith("ing"))
         self.assertFalse(str1.endswith("ingh"))
 
         str4 = _FakeString("\tfoobar")
-        self.assertEquals("fake string", str1)
-        self.assertEquals("        foobar", str4.expandtabs())
-        self.assertEquals("    foobar", str4.expandtabs(4))
+        self.assertEqual("fake string", str1)
+        self.assertEqual("        foobar", str4.expandtabs())
+        self.assertEqual("    foobar", str4.expandtabs(4))
 
-        self.assertEquals(3, str1.find("e"))
-        self.assertEquals(-1, str1.find("z"))
-        self.assertEquals(7, str1.find("r", 7))
-        self.assertEquals(-1, str1.find("r", 8))
-        self.assertEquals(7, str1.find("r", 5, 9))
-        self.assertEquals(-1, str1.find("r", 5, 7))
+        self.assertEqual(3, str1.find("e"))
+        self.assertEqual(-1, str1.find("z"))
+        self.assertEqual(7, str1.find("r", 7))
+        self.assertEqual(-1, str1.find("r", 8))
+        self.assertEqual(7, str1.find("r", 5, 9))
+        self.assertEqual(-1, str1.find("r", 5, 7))
 
         str5 = _FakeString("foo{0}baz")
         str6 = _FakeString("foo{abc}baz")
         str7 = _FakeString("foo{0}{abc}buzz")
         str8 = _FakeString("{0}{1}")
-        self.assertEquals("fake string", str1.format())
-        self.assertEquals("foobarbaz", str5.format("bar"))
-        self.assertEquals("foobarbaz", str6.format(abc="bar"))
-        self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz"))
+        self.assertEqual("fake string", str1.format())
+        self.assertEqual("foobarbaz", str5.format("bar"))
+        self.assertEqual("foobarbaz", str6.format(abc="bar"))
+        self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz"))
         self.assertRaises(IndexError, str8.format, "abc")
 
         if py3k:
-            self.assertEquals("fake string", str1.format_map({}))
-            self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"}))
+            self.assertEqual("fake string", str1.format_map({}))
+            self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"}))
             self.assertRaises(ValueError, str5.format_map, {0: "abc"})
 
-        self.assertEquals(3, str1.index("e"))
+        self.assertEqual(3, str1.index("e"))
         self.assertRaises(ValueError, str1.index, "z")
-        self.assertEquals(7, str1.index("r", 7))
+        self.assertEqual(7, str1.index("r", 7))
         self.assertRaises(ValueError, str1.index, "r", 8)
-        self.assertEquals(7, str1.index("r", 5, 9))
+        self.assertEqual(7, str1.index("r", 5, 9))
         self.assertRaises(ValueError, str1.index, "r", 5, 7)
 
         str9 = _FakeString("foobar")
@@ -303,120 +303,120 @@ class TestStringMixIn(unittest.TestCase):
         self.assertFalse(str15.isupper())
         self.assertTrue(str21.isupper())
 
-        self.assertEquals("foobar", str15.join(["foo", "bar"]))
-        self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz")))
+        self.assertEqual("foobar", str15.join(["foo", "bar"]))
+        self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz")))
 
-        self.assertEquals("fake string    ", str1.ljust(15))
-        self.assertEquals("fake string     ", str1.ljust(16))
-        self.assertEquals("fake stringqqqq", str1.ljust(15, "q"))
+        self.assertEqual("fake string    ", str1.ljust(15))
+        self.assertEqual("fake string     ", str1.ljust(16))
+        self.assertEqual("fake stringqqqq", str1.ljust(15, "q"))
 
         str22 = _FakeString("ß")
-        self.assertEquals("", str15.lower())
-        self.assertEquals("foobar", str16.lower())
-        self.assertEquals("ß", str22.lower())
+        self.assertEqual("", str15.lower())
+        self.assertEqual("foobar", str16.lower())
+        self.assertEqual("ß", str22.lower())
         if py3k:
-            self.assertEquals("", str15.casefold())
-            self.assertEquals("foobar", str16.casefold())
-            self.assertEquals("ss", str22.casefold())
+            self.assertEqual("", str15.casefold())
+            self.assertEqual("foobar", str16.casefold())
+            self.assertEqual("ss", str22.casefold())
 
         str23 = _FakeString("  fake string  ")
-        self.assertEquals("fake string", str1.lstrip())
-        self.assertEquals("fake string  ", str23.lstrip())
-        self.assertEquals("ke string", str1.lstrip("abcdef"))
+        self.assertEqual("fake string", str1.lstrip())
+        self.assertEqual("fake string  ", str23.lstrip())
+        self.assertEqual("ke string", str1.lstrip("abcdef"))
 
-        self.assertEquals(("fa", "ke", " string"), str1.partition("ke"))
-        self.assertEquals(("fake string", "", ""), str1.partition("asdf"))
+        self.assertEqual(("fa", "ke", " string"), str1.partition("ke"))
+        self.assertEqual(("fake string", "", ""), str1.partition("asdf"))
 
         str24 = _FakeString("boo foo moo")
-        self.assertEquals("real string", str1.replace("fake", "real"))
-        self.assertEquals("bu fu moo", str24.replace("oo", "u", 2))
+        self.assertEqual("real string", str1.replace("fake", "real"))
+        self.assertEqual("bu fu moo", str24.replace("oo", "u", 2))
 
-        self.assertEquals(3, str1.rfind("e"))
-        self.assertEquals(-1, str1.rfind("z"))
-        self.assertEquals(7, str1.rfind("r", 7))
-        self.assertEquals(-1, str1.rfind("r", 8))
-        self.assertEquals(7, str1.rfind("r", 5, 9))
-        self.assertEquals(-1, str1.rfind("r", 5, 7))
+        self.assertEqual(3, str1.rfind("e"))
+        self.assertEqual(-1, str1.rfind("z"))
+        self.assertEqual(7, str1.rfind("r", 7))
+        self.assertEqual(-1, str1.rfind("r", 8))
+        self.assertEqual(7, str1.rfind("r", 5, 9))
+        self.assertEqual(-1, str1.rfind("r", 5, 7))
 
-        self.assertEquals(3, str1.rindex("e"))
+        self.assertEqual(3, str1.rindex("e"))
         self.assertRaises(ValueError, str1.rindex, "z")
-        self.assertEquals(7, str1.rindex("r", 7))
+        self.assertEqual(7, str1.rindex("r", 7))
         self.assertRaises(ValueError, str1.rindex, "r", 8)
-        self.assertEquals(7, str1.rindex("r", 5, 9))
+        self.assertEqual(7, str1.rindex("r", 5, 9))
         self.assertRaises(ValueError, str1.rindex, "r", 5, 7)
 
-        self.assertEquals("    fake string", str1.rjust(15))
-        self.assertEquals("     fake string", str1.rjust(16))
-        self.assertEquals("qqqqfake string", str1.rjust(15, "q"))
+        self.assertEqual("    fake string", str1.rjust(15))
+        self.assertEqual("     fake string", str1.rjust(16))
+        self.assertEqual("qqqqfake string", str1.rjust(15, "q"))
 
-        self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke"))
-        self.assertEquals(("", "", "fake string"), str1.rpartition("asdf"))
+        self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke"))
+        self.assertEqual(("", "", "fake string"), str1.rpartition("asdf"))
 
         str25 = _FakeString("   this is a   sentence with  whitespace ")
         actual = ["this", "is", "a", "sentence", "with", "whitespace"]
-        self.assertEquals(actual, str25.rsplit())
-        self.assertEquals(actual, str25.rsplit(None))
+        self.assertEqual(actual, str25.rsplit())
+        self.assertEqual(actual, str25.rsplit(None))
         actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
                   "", "whitespace", ""]
-        self.assertEquals(actual, str25.rsplit(" "))
+        self.assertEqual(actual, str25.rsplit(" "))
         actual = ["   this is a", "sentence", "with", "whitespace"]
-        self.assertEquals(actual, str25.rsplit(None, 3))
+        self.assertEqual(actual, str25.rsplit(None, 3))
         actual = ["   this is a   sentence with", "", "whitespace", ""]
-        self.assertEquals(actual, str25.rsplit(" ", 3))
+        self.assertEqual(actual, str25.rsplit(" ", 3))
         if py3k:
-            self.assertEquals(actual, str25.rsplit(maxsplit=3))
+            self.assertEqual(actual, str25.rsplit(maxsplit=3))
 
-        self.assertEquals("fake string", str1.rstrip())
-        self.assertEquals("  fake string", str23.rstrip())
-        self.assertEquals("fake stri", str1.rstrip("ngr"))
+        self.assertEqual("fake string", str1.rstrip())
+        self.assertEqual("  fake string", str23.rstrip())
+        self.assertEqual("fake stri", str1.rstrip("ngr"))
 
         actual = ["this", "is", "a", "sentence", "with", "whitespace"]
-        self.assertEquals(actual, str25.split())
-        self.assertEquals(actual, str25.split(None))
+        self.assertEqual(actual, str25.split())
+        self.assertEqual(actual, str25.split(None))
         actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
                   "", "whitespace", ""]
-        self.assertEquals(actual, str25.split(" "))
+        self.assertEqual(actual, str25.split(" "))
         actual = ["this", "is", "a", "sentence with  whitespace "]
-        self.assertEquals(actual, str25.split(None, 3))
+        self.assertEqual(actual, str25.split(None, 3))
         actual = ["", "", "", "this is a   sentence with  whitespace "]
-        self.assertEquals(actual, str25.split(" ", 3))
+        self.assertEqual(actual, str25.split(" ", 3))
         if py3k:
-            self.assertEquals(actual, str25.split(maxsplit=3))
+            self.assertEqual(actual, str25.split(maxsplit=3))
 
         str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
-        self.assertEquals(["lines", "of", "text", "are", "presented", "here"],
+        self.assertEqual(["lines", "of", "text", "are", "presented", "here"],
                           str26.splitlines())
-        self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n",
+        self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n",
                            "presented\n", "here"], str26.splitlines(True))
 
         self.assertTrue(str1.startswith("fake"))
         self.assertFalse(str1.startswith("faker"))
 
-        self.assertEquals("fake string", str1.strip())
-        self.assertEquals("fake string", str23.strip())
-        self.assertEquals("ke stri", str1.strip("abcdefngr"))
+        self.assertEqual("fake string", str1.strip())
+        self.assertEqual("fake string", str23.strip())
+        self.assertEqual("ke stri", str1.strip("abcdefngr"))
 
-        self.assertEquals("fOObAR", str16.swapcase())
+        self.assertEqual("fOObAR", str16.swapcase())
 
-        self.assertEquals("Fake String", str1.title())
+        self.assertEqual("Fake String", str1.title())
 
         if py3k:
             table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4",
                                     117: "5"})
             table2 = str.maketrans("aeiou", "12345")
             table3 = str.maketrans("aeiou", "12345", "rts")
-            self.assertEquals("f1k2 str3ng", str1.translate(table1))
-            self.assertEquals("f1k2 str3ng", str1.translate(table2))
-            self.assertEquals("f1k2 3ng", str1.translate(table3))
+            self.assertEqual("f1k2 str3ng", str1.translate(table1))
+            self.assertEqual("f1k2 str3ng", str1.translate(table2))
+            self.assertEqual("f1k2 3ng", str1.translate(table3))
         else:
             table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}
-            self.assertEquals("f1k2 str3ng", str1.translate(table))
+            self.assertEqual("f1k2 str3ng", str1.translate(table))
 
-        self.assertEquals("", str15.upper())
-        self.assertEquals("FOOBAR", str16.upper())
+        self.assertEqual("", str15.upper())
+        self.assertEqual("FOOBAR", str16.upper())
 
-        self.assertEquals("123", str12.zfill(3))
-        self.assertEquals("000123", str12.zfill(6))
+        self.assertEqual("123", str12.zfill(3))
+        self.assertEqual("000123", str12.zfill(6))
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 5a18b8e..1449ad2 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -42,8 +42,8 @@ class TestTokens(unittest.TestCase):
         token1 = tokens.Token()
         token2 = tokens.Token(foo="bar", baz=123)
 
-        self.assertEquals("bar", token2.foo)
-        self.assertEquals(123, token2.baz)
+        self.assertEqual("bar", token2.foo)
+        self.assertEqual(123, token2.baz)
         self.assertRaises(KeyError, lambda: token1.foo)
         self.assertRaises(KeyError, lambda: token2.bar)
 
@@ -51,8 +51,8 @@ class TestTokens(unittest.TestCase):
         token2.foo = "ham"
         del token2.baz
 
-        self.assertEquals("eggs", token1.spam)
-        self.assertEquals("ham", token2.foo)
+        self.assertEqual("eggs", token1.spam)
+        self.assertEqual("ham", token2.foo)
         self.assertRaises(KeyError, lambda: token2.baz)
         self.assertRaises(KeyError, delattr, token2, "baz")
 
@@ -63,15 +63,15 @@ class TestTokens(unittest.TestCase):
         token3 = tokens.Text(text="earwig" * 100)
         hundredchars = ("earwig" * 100)[:97] + "..."
 
-        self.assertEquals("Token()", repr(token1))
+        self.assertEqual("Token()", repr(token1))
         if py3k:
             token2repr = "Token(foo='bar', baz=123)"
             token3repr = "Text(text='" + hundredchars + "')"
         else:
             token2repr = "Token(foo=u'bar', baz=123)"
             token3repr = "Text(text=u'" + hundredchars + "')"
-        self.assertEquals(token2repr, repr(token2))
-        self.assertEquals(token3repr, repr(token3))
+        self.assertEqual(token2repr, repr(token2))
+        self.assertEqual(token3repr, repr(token3))
 
     def test_equality(self):
         """check that equivalent tokens are considered equal"""
@@ -82,10 +82,10 @@ class TestTokens(unittest.TestCase):
         token5 = tokens.Text(text="asdf")
         token6 = tokens.TemplateOpen(text="asdf")
 
-        self.assertEquals(token1, token2)
-        self.assertEquals(token2, token1)
-        self.assertEquals(token4, token5)
-        self.assertEquals(token5, token4)
+        self.assertEqual(token1, token2)
+        self.assertEqual(token2, token1)
+        self.assertEqual(token4, token5)
+        self.assertEqual(token5, token4)
         self.assertNotEquals(token1, token3)
         self.assertNotEquals(token2, token3)
         self.assertNotEquals(token4, token6)
@@ -99,7 +99,7 @@ class TestTokens(unittest.TestCase):
             tokens.Text(text="earwig")
         ]
         for token in tests:
-            self.assertEquals(token, eval(repr(token), vars(tokens)))
+            self.assertEqual(token, eval(repr(token), vars(tokens)))
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 97a837c1e8d8fbaae71360f442f53ca7bd81a58f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 01:36:02 -0400
Subject: [PATCH 108/180] Implement test_parser(). Clean up a few lambdas in
 TestSmartList.

---
 tests/test_parser.py     | 62 +++++++++++++++++++++++++++++++++++++++++++++++-
 tests/test_smart_list.py |  8 +++----
 2 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/tests/test_parser.py b/tests/test_parser.py
index 5ea2b49..6e775ce 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -23,8 +23,68 @@
 from __future__ import unicode_literals
 import unittest
 
+from mwparserfromhell.compat import range
+from mwparserfromhell.nodes import Template, Text, Wikilink
+from mwparserfromhell.nodes.extras import Parameter
+from mwparserfromhell.parser import Parser
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
 class TestParser(unittest.TestCase):
-    pass
+    """Tests for the Parser class itself, which tokenizes and builds nodes."""
+
+    def assertNodesEqual(self, expected, actual):
+        """Assert that two Nodes are the same type and have the same data."""
+        self.assertIs(type(expected), type(actual))
+        if isinstance(expected, Text):
+            self.assertEqual(expected.value, actual.value)
+        elif isinstance(expected, Template):
+            self.assertWikicodeEqual(expected.name, actual.name)
+            length = len(expected.params)
+            self.assertEqual(length, len(actual.params))
+            for i in range(length):
+                exp_param = expected.params[i]
+                act_param = actual.params[i]
+                self.assertWikicodeEqual(exp_param.name, act_param.name)
+                self.assertWikicodeEqual(exp_param.value, act_param.value)
+                self.assertIs(exp_param.showkey, act_param.showkey)
+        elif isinstance(expected, Wikilink):
+            self.assertWikicodeEqual(expected.title, actual.title)
+            if expected.text is not None:
+                self.assertWikicodeEqual(expected.text, actual.text)
+            else:
+                self.assertIs(None, actual.text)
+
+    def assertWikicodeEqual(self, expected, actual):
+        """Assert that two Wikicode objects have the same data."""
+        self.assertIsInstance(actual, Wikicode)
+        length = len(expected.nodes)
+        self.assertEqual(length, len(actual.nodes))
+        for i in range(length):
+            self.assertNodesEqual(expected.get(i), actual.get(i))
+
+    def test_parser(self):
+        """integration test for parsing overall"""
+        text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}"
+        wrap = lambda L: Wikicode(SmartList(L))
+        expected = wrap([
+            Text("this is text; "),
+            Template(wrap([Text("this")]), [
+                Parameter(wrap([Text("is")]), wrap([Text("a")])),
+                Parameter(wrap([Text("template")]), wrap([
+                    Template(wrap([Text("with")]), [
+                        Parameter(wrap([Text("1")]),
+                                  wrap([Wikilink(wrap([Text("links")]))]),
+                                  showkey=False),
+                        Parameter(wrap([Text("2")]),
+                                  wrap([Text("in")]), showkey=False)
+                    ]),
+                    Text("it")
+                ]))
+            ])
+        ])
+        actual = Parser(text).parse()
+        self.assertWikicodeEqual(expected, actual)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 680de9d..d821ccd 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -288,19 +288,19 @@ class TestSmartList(unittest.TestCase):
 
     def test_parent_get_set_del(self):
         """make sure SmartList's getitem/setitem/delitem work"""
-        self._test_get_set_del_item(lambda L: SmartList(L))
+        self._test_get_set_del_item(SmartList)
 
     def test_parent_add(self):
         """make sure SmartList's add/radd/iadd work"""
-        self._test_add_radd_iadd(lambda L: SmartList(L))
+        self._test_add_radd_iadd(SmartList)
 
     def test_parent_unaffected_magics(self):
         """sanity checks against SmartList features that were not modified"""
-        self._test_other_magic_methods(lambda L: SmartList(L))
+        self._test_other_magic_methods(SmartList)
 
     def test_parent_methods(self):
         """make sure SmartList's non-magic methods work, like append()"""
-        self._test_list_methods(lambda L: SmartList(L))
+        self._test_list_methods(SmartList)
 
     def test_child_get_set_del(self):
         """make sure _ListProxy's getitem/setitem/delitem work"""

From f8032695146f032108c1b736631f546712689372 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 17:19:08 -0400
Subject: [PATCH 109/180] Add a USES_C field to the tokenizers; add
 TestParser.test_use_c()

---
 mwparserfromhell/parser/tokenizer.c  |  2 ++
 mwparserfromhell/parser/tokenizer.py |  1 +
 tests/test_parser.py                 | 13 ++++++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 8c96500..d3abb22 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1387,6 +1387,8 @@ init_tokenizer(void)
     module = Py_InitModule("_tokenizer", module_methods);
     Py_INCREF(&TokenizerType);
     PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
+    Py_INCREF(Py_True);
+    PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True);
 
     tempmod = PyImport_ImportModule("htmlentitydefs");
     if (!tempmod)
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 67638ca..0bf0322 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -38,6 +38,7 @@ class BadRoute(Exception):
 
 class Tokenizer(object):
     """Creates a list of tokens from a string of wikicode."""
+    USES_C = False
     START = object()
     END = object()
     MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 6e775ce..4f718c8 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -23,10 +23,10 @@
 from __future__ import unicode_literals
 import unittest
 
+from mwparserfromhell import parser
 from mwparserfromhell.compat import range
 from mwparserfromhell.nodes import Template, Text, Wikilink
 from mwparserfromhell.nodes.extras import Parameter
-from mwparserfromhell.parser import Parser
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 
@@ -63,7 +63,14 @@ class TestParser(unittest.TestCase):
         for i in range(length):
             self.assertNodesEqual(expected.get(i), actual.get(i))
 
-    def test_parser(self):
+    def test_use_c(self):
+        """make sure the correct tokenizer is used"""
+        if parser.use_c:
+            self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
+            parser.use_c = False
+        self.assertFalse(parser.Parser(None)._tokenizer.USES_C)
+
+    def test_parsing(self):
         """integration test for parsing overall"""
         text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}"
         wrap = lambda L: Wikicode(SmartList(L))
@@ -83,7 +90,7 @@ class TestParser(unittest.TestCase):
                 ]))
             ])
         ])
-        actual = Parser(text).parse()
+        actual = parser.Parser(text).parse()
         self.assertWikicodeEqual(expected, actual)
 
 if __name__ == "__main__":

From 27a3503aa113c12971fab6a1d8fd676180b70449 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 17:22:37 -0400
Subject: [PATCH 110/180] Add test_uses_c() to TestPyTokenizer and
 TestCTokenizer

---
 tests/test_ctokenizer.py  | 8 +++++++-
 tests/test_pytokenizer.py | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 4dbeceb..7ef8975 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -23,6 +23,8 @@
 from __future__ import unicode_literals
 import unittest
 
+from mwparserfromhell.parser._tokenizer import CTokenizer
+
 from _test_tokenizer import TokenizerTestCase
 
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
@@ -30,8 +32,12 @@ class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        from mwparserfromhell.parser._tokenizer import CTokenizer
         cls.tokenizer = CTokenizer
 
+    def test_uses_c(self):
+        """make sure the C tokenizer identifies as using a C extension"""
+        self.assertTrue(CTokenizer.USES_C)
+        self.assertTrue(CTokenizer().USES_C)
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 73e6fe7..3e598bf 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -23,6 +23,8 @@
 from __future__ import unicode_literals
 import unittest
 
+from mwparserfromhell.parser.tokenizer import Tokenizer
+
 from _test_tokenizer import TokenizerTestCase
 
 class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
@@ -30,8 +32,12 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        from mwparserfromhell.parser.tokenizer import Tokenizer
         cls.tokenizer = Tokenizer
 
+    def test_uses_c(self):
+        """make sure the Python tokenizer identifies as not using C"""
+        self.assertFalse(Tokenizer.USES_C)
+        self.assertFalse(Tokenizer().USES_C)
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 5ca6f6c755bb8b3d3a3190bab4cf6f0a1eb6b2a7 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 17:40:39 -0400
Subject: [PATCH 111/180] Skip test_readme_5() if web query fails.

---
 tests/test_docs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_docs.py b/tests/test_docs.py
index 075b0a7..971c5d1 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -113,7 +113,10 @@ class TestDocs(unittest.TestCase):
         title = "Test"
         data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                 "rvprop": "content", "format": "json", "titles": title}
-        raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
+        try:
+            raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
+        except IOError:
+            self.skipTest("cannot continue because of unsuccessful web call")
         res = json.loads(raw)
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
         expected = urllib.urlopen(url2.format(title)).read().decode("utf8")

From 7f87a1c4b371f813d5006b25cf39f2b40b4dc58e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 19:39:12 -0400
Subject: [PATCH 112/180] Apply bugfixes so that some tests pass on Python 3.

- Skip CTokenizer tests if CTokenizer is not available.
- TestStringMixin: Don't make assumptions about default encoding.
- Add urllib stuff to mwparserfromhell.compat.
- Fix compat.py's line endings.
- gen.next() -> next(gen)
- assert*Equals() -> assert*Equal()
---
 mwparserfromhell/compat.py       | 69 +++++++++++++++++++++-------------------
 mwparserfromhell/string_mixin.py |  2 +-
 tests/test_ctokenizer.py         |  6 +++-
 tests/test_docs.py               | 14 ++++----
 tests/test_smart_list.py         |  6 ++--
 tests/test_string_mixin.py       | 33 ++++++++++++-------
 tests/test_tokens.py             | 17 ++++++----
 7 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 48b9807..34870e6 100755
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -1,33 +1,36 @@
-# -*- coding: utf-8 -*-
-
-"""
-Implements support for both Python 2 and Python 3 by defining common types in
-terms of their Python 2/3 variants. For example, :py:class:`str` is set to
-:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
-:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
-types are meant to be imported directly from within the parser's modules.
-"""
-
-import sys
-
-py3k = sys.version_info[0] == 3
-
-if py3k:
-    bytes = bytes
-    str = str
-    basestring = str
-    range = range
-    maxsize = sys.maxsize
-    import html.entities as htmlentities
-    from io import StringIO
-
-else:
-    bytes = str
-    str = unicode
-    basestring = basestring
-    range = xrange
-    maxsize = sys.maxint
-    import htmlentitydefs as htmlentities
-    from StringIO import StringIO
-
-del sys
+# -*- coding: utf-8 -*-
+
+"""
+Implements support for both Python 2 and Python 3 by defining common types in
+terms of their Python 2/3 variants. For example, :py:class:`str` is set to
+:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
+:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
+types are meant to be imported directly from within the parser's modules.
+"""
+
+import sys
+
+py3k = sys.version_info[0] == 3
+
+if py3k:
+    bytes = bytes
+    str = str
+    basestring = str
+    range = range
+    maxsize = sys.maxsize
+    import html.entities as htmlentities
+    from io import StringIO
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
+
+else:
+    bytes = str
+    str = unicode
+    basestring = basestring
+    range = xrange
+    maxsize = sys.maxint
+    import htmlentitydefs as htmlentities
+    from StringIO import StringIO
+    from urllib import urlencode, urlopen
+
+del sys
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index eee58b9..6bee9c4 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -252,8 +252,8 @@ class StringMixIn(object):
         return self.__unicode__().lstrip(chars)
 
     if py3k:
-        @inheritdoc
         @staticmethod
+        @inheritdoc
         def maketrans(self, x, y=None, z=None):
             if z is None:
                 if y is None:
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 7ef8975..f21378c 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -23,10 +23,14 @@
 from __future__ import unicode_literals
 import unittest
 
-from mwparserfromhell.parser._tokenizer import CTokenizer
+try:
+    from mwparserfromhell.parser._tokenizer import CTokenizer
+except ImportError:
+    CTokenizer = None
 
 from _test_tokenizer import TokenizerTestCase
 
+@unittest.skipUnless(CTokenizer, "C tokenizer not available")
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the C tokenizer."""
 
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 971c5d1..3b23bb7 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -23,10 +23,9 @@
 from __future__ import print_function, unicode_literals
 import json
 import unittest
-import urllib
 
 import mwparserfromhell
-from mwparserfromhell.compat import py3k, str, StringIO
+from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen
 
 class TestDocs(unittest.TestCase):
     """Integration test cases for mwparserfromhell's documentation."""
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase):
         data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                 "rvprop": "content", "format": "json", "titles": title}
         try:
-            raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
+            raw = urlopen(url1, urlencode(data).encode("utf8")).read()
+        except IOError:
+            self.skipTest("cannot continue because of unsuccessful web call")
+        res = json.loads(raw.decode("utf8"))
+        text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
+        try:
+            expected = urlopen(url2.format(title)).read().decode("utf8")
         except IOError:
             self.skipTest("cannot continue because of unsuccessful web call")
-        res = json.loads(raw)
-        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
-        expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
         actual = mwparserfromhell.parse(text)
         self.assertEqual(expected, actual)
 
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index d821ccd..01caca7 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase):
         gen1 = iter(list1)
         out = []
         for i in range(len(list1)):
-            out.append(gen1.next())
-        self.assertRaises(StopIteration, gen1.next)
+            out.append(next(gen1))
+        self.assertRaises(StopIteration, next, gen1)
         self.assertEqual([0, 1, 2, 3, "one", "two"], out)
         gen2 = iter(list2)
-        self.assertRaises(StopIteration, gen2.next)
+        self.assertRaises(StopIteration, next, gen2)
 
         self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
         self.assertEqual([], list(reversed(list2)))
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 6ef6344..6d10609 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -21,6 +21,7 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+from sys import getdefaultencoding
 from types import GeneratorType
 import unittest
 
@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase):
 
         out = []
         for i in range(len(str1)):
-            out.append(gen1.next())
-        self.assertRaises(StopIteration, gen1.next)
+            out.append(next(gen1))
+        self.assertRaises(StopIteration, next, gen1)
         self.assertEqual(expected, out)
-        self.assertRaises(StopIteration, gen2.next)
+        self.assertRaises(StopIteration, next, gen2)
 
         self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
         self.assertEqual([], list(reversed(str2)))
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase):
             self.assertEqual("", str2.decode("punycode", "ignore"))
 
         str3 = _FakeString("𐌲𐌿𐍄")
+        actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
         self.assertEqual(b"fake string", str1.encode())
-        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
-                          str3.encode("utf8"))
-        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
-                          str3.encode(encoding="utf8"))
-        self.assertRaises(UnicodeEncodeError, str3.encode)
+        self.assertEqual(actual, str3.encode("utf-8"))
+        self.assertEqual(actual, str3.encode(encoding="utf-8"))
+        if getdefaultencoding() == "ascii":
+            self.assertRaises(UnicodeEncodeError, str3.encode)
+        elif getdefaultencoding() == "utf-8":
+            self.assertEqual(actual, str3.encode())
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
-        self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
-        self.assertEqual("", str3.encode("ascii", "ignore"))
-        self.assertEqual("", str3.encode(errors="ignore"))
+        if getdefaultencoding() == "ascii":
+            self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
+        elif getdefaultencoding() == "utf-8":
+            self.assertEqual(actual, str3.encode(errors="strict"))
+        self.assertEqual(b"", str3.encode("ascii", "ignore"))
+        if getdefaultencoding() == "ascii":
+            self.assertEqual(b"", str3.encode(errors="ignore"))
+        elif getdefaultencoding() == "utf-8":
+            self.assertEqual(actual, str3.encode(errors="ignore"))
 
         self.assertTrue(str1.endswith("ing"))
         self.assertFalse(str1.endswith("ingh"))
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase):
         actual = ["   this is a   sentence with", "", "whitespace", ""]
         self.assertEqual(actual, str25.rsplit(" ", 3))
         if py3k:
+            actual = ["   this is a", "sentence", "with", "whitespace"]
             self.assertEqual(actual, str25.rsplit(maxsplit=3))
 
         self.assertEqual("fake string", str1.rstrip())
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase):
         actual = ["", "", "", "this is a   sentence with  whitespace "]
         self.assertEqual(actual, str25.split(" ", 3))
         if py3k:
+            actual = ["this", "is", "a", "sentence with  whitespace "]
             self.assertEqual(actual, str25.split(maxsplit=3))
 
         str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 1449ad2..4620982 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase):
 
         self.assertEqual("Token()", repr(token1))
         if py3k:
-            token2repr = "Token(foo='bar', baz=123)"
+            token2repr1 = "Token(foo='bar', baz=123)"
+            token2repr2 = "Token(baz=123, foo='bar')"
             token3repr = "Text(text='" + hundredchars + "')"
         else:
-            token2repr = "Token(foo=u'bar', baz=123)"
+            token2repr1 = "Token(foo=u'bar', baz=123)"
+            token2repr2 = "Token(baz=123, foo=u'bar')"
             token3repr = "Text(text=u'" + hundredchars + "')"
-        self.assertEqual(token2repr, repr(token2))
+        token2repr = repr(token2)
+        self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
         self.assertEqual(token3repr, repr(token3))
 
     def test_equality(self):
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase):
         self.assertEqual(token2, token1)
         self.assertEqual(token4, token5)
         self.assertEqual(token5, token4)
-        self.assertNotEquals(token1, token3)
-        self.assertNotEquals(token2, token3)
-        self.assertNotEquals(token4, token6)
-        self.assertNotEquals(token5, token6)
+        self.assertNotEqual(token1, token3)
+        self.assertNotEqual(token2, token3)
+        self.assertNotEqual(token4, token6)
+        self.assertNotEqual(token5, token6)
 
     def test_repr_equality(self):
         "check that eval(repr(token)) == token"

From 32ac6958e1618e9025486212dac412346126bccd Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 20:59:23 -0400
Subject: [PATCH 113/180] Apply some bugfixes to SmartList to fix tests on
 Python 3.

- Add a _SliceNormalizerMixIn to properly handle slices.
- Use floor division when applying key.step.
- Implement sort() without 'cmp' parameter.
- Fix bytes(list) behavior.
- Children of _ListProxies are now _ListProxies, not regular lists.
---
 mwparserfromhell/smart_list.py | 137 +++++++++++++++++++++++++++--------------
 tests/test_smart_list.py       |  12 ++--
 2 files changed, 99 insertions(+), 50 deletions(-)

diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 46c475a..09b7bbb 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -41,8 +41,23 @@ def inheritdoc(method):
     method.__doc__ = getattr(list, method.__name__).__doc__
     return method
 
+class _SliceNormalizerMixIn(object):
+    """MixIn that provides a private method to normalize slices."""
 
-class SmartList(list):
+    def _normalize_slice(self, key):
+        """Return a slice equivalent to the input *key*, standardized."""
+        if key.start is not None:
+            start = (len(self) + key.start) if key.start < 0 else key.start
+        else:
+            start = 0
+        if key.stop is not None:
+            stop = (len(self) + key.stop) if key.stop < 0 else key.stop
+        else:
+            stop = maxsize
+        return slice(start, stop, key.step or 1)
+
+
+class SmartList(_SliceNormalizerMixIn, list):
     """Implements the ``list`` interface with special handling of sublists.
 
     When a sublist is created (by ``list[i:j]``), any changes made to this
@@ -76,8 +91,8 @@ class SmartList(list):
     def __getitem__(self, key):
         if not isinstance(key, slice):
             return super(SmartList, self).__getitem__(key)
-        keystop = maxsize if key.stop is None else key.stop
-        sliceinfo = [key.start or 0, keystop, key.step or 1]
+        key = self._normalize_slice(key)
+        sliceinfo = [key.start, key.stop, key.step]
         child = _ListProxy(self, sliceinfo)
         self._children[id(child)] = (child, sliceinfo)
         return child
@@ -87,9 +102,8 @@ class SmartList(list):
             return super(SmartList, self).__setitem__(key, item)
         item = list(item)
         super(SmartList, self).__setitem__(key, item)
-        keystop = maxsize if key.stop is None else key.stop
-        key = slice(key.start or 0, keystop, key.step or 1)
-        diff = len(item) + (key.start - key.stop) / key.step
+        key = self._normalize_slice(key)
+        diff = len(item) + (key.start - key.stop) // key.step
         values = self._children.values if py3k else self._children.itervalues
         if diff:
             for child, (start, stop, step) in values():
@@ -101,11 +115,10 @@ class SmartList(list):
     def __delitem__(self, key):
         super(SmartList, self).__delitem__(key)
         if isinstance(key, slice):
-            keystop = maxsize if key.stop is None else key.stop
-            key = slice(key.start or 0, keystop, key.step or 1)
+            key = self._normalize_slice(key)
         else:
             key = slice(key, key + 1, 1)
-        diff = (key.stop - key.start) / key.step
+        diff = (key.stop - key.start) // key.step
         values = self._children.values if py3k else self._children.itervalues
         for child, (start, stop, step) in values():
             if start > key.start:
@@ -166,22 +179,35 @@ class SmartList(list):
             child._parent = copy
         super(SmartList, self).reverse()
 
-    @inheritdoc
-    def sort(self, cmp=None, key=None, reverse=None):
-        copy = list(self)
-        for child in self._children:
-            child._parent = copy
-        kwargs = {}
-        if cmp is not None:
-            kwargs["cmp"] = cmp
-        if key is not None:
-            kwargs["key"] = key
-        if reverse is not None:
-            kwargs["reverse"] = reverse
-        super(SmartList, self).sort(**kwargs)
-
-
-class _ListProxy(list):
+    if py3k:
+        @inheritdoc
+        def sort(self, key=None, reverse=None):
+            copy = list(self)
+            for child in self._children:
+                child._parent = copy
+            kwargs = {}
+            if key is not None:
+                kwargs["key"] = key
+            if reverse is not None:
+                kwargs["reverse"] = reverse
+            super(SmartList, self).sort(**kwargs)
+    else:
+        @inheritdoc
+        def sort(self, cmp=None, key=None, reverse=None):
+            copy = list(self)
+            for child in self._children:
+                child._parent = copy
+            kwargs = {}
+            if cmp is not None:
+                kwargs["cmp"] = cmp
+            if key is not None:
+                kwargs["key"] = key
+            if reverse is not None:
+                kwargs["reverse"] = reverse
+            super(SmartList, self).sort(**kwargs)
+
+
+class _ListProxy(_SliceNormalizerMixIn, list):
     """Implement the ``list`` interface by getting elements from a parent.
 
     This is created by a :py:class:`~.SmartList` object when slicing. It does
@@ -235,19 +261,28 @@ class _ListProxy(list):
             return bool(self._render())
 
     def __len__(self):
-        return (self._stop - self._start) / self._step
+        return (self._stop - self._start) // self._step
 
     def __getitem__(self, key):
-        return self._render()[key]
+        if isinstance(key, slice):
+            key = self._normalize_slice(key)
+            if key.stop == maxsize:
+                keystop = self._stop
+            else:
+                keystop = key.stop + self._start
+            adjusted = slice(key.start + self._start, keystop, key.step)
+            return self._parent[adjusted]
+        else:
+            return self._render()[key]
 
     def __setitem__(self, key, item):
         if isinstance(key, slice):
-            keystart = (key.start or 0) + self._start
-            if key.stop is None or key.stop == maxsize:
+            key = self._normalize_slice(key)
+            if key.stop == maxsize:
                 keystop = self._stop
             else:
                 keystop = key.stop + self._start
-            adjusted = slice(keystart, keystop, key.step)
+            adjusted = slice(key.start + self._start, keystop, key.step)
             self._parent[adjusted] = item
         else:
             length = len(self)
@@ -259,12 +294,12 @@ class _ListProxy(list):
 
     def __delitem__(self, key):
         if isinstance(key, slice):
-            keystart = (key.start or 0) + self._start
-            if key.stop is None or key.stop == maxsize:
+            key = self._normalize_slice(key)
+            if key.stop == maxsize:
                 keystop = self._stop
             else:
                 keystop = key.stop + self._start
-            adjusted = slice(keystart, keystop, key.step)
+            adjusted = slice(key.start + self._start, keystop, key.step)
             del self._parent[adjusted]
         else:
             length = len(self)
@@ -388,18 +423,30 @@ class _ListProxy(list):
         item.reverse()
         self._parent[self._start:self._stop:self._step] = item
 
-    @inheritdoc
-    def sort(self, cmp=None, key=None, reverse=None):
-        item = self._render()
-        kwargs = {}
-        if cmp is not None:
-            kwargs["cmp"] = cmp
-        if key is not None:
-            kwargs["key"] = key
-        if reverse is not None:
-            kwargs["reverse"] = reverse
-        item.sort(**kwargs)
-        self._parent[self._start:self._stop:self._step] = item
+    if py3k:
+        @inheritdoc
+        def sort(self, key=None, reverse=None):
+            item = self._render()
+            kwargs = {}
+            if key is not None:
+                kwargs["key"] = key
+            if reverse is not None:
+                kwargs["reverse"] = reverse
+            item.sort(**kwargs)
+            self._parent[self._start:self._stop:self._step] = item
+    else:
+        @inheritdoc
+        def sort(self, cmp=None, key=None, reverse=None):
+            item = self._render()
+            kwargs = {}
+            if cmp is not None:
+                kwargs["cmp"] = cmp
+            if key is not None:
+                kwargs["key"] = key
+            if reverse is not None:
+                kwargs["reverse"] = reverse
+            item.sort(**kwargs)
+            self._parent[self._start:self._stop:self._step] = item
 
 
 del inheritdoc
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 01caca7..3423bb7 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -123,7 +123,7 @@ class TestSmartList(unittest.TestCase):
 
         if py3k:
             self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1))
-            self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1))
+            self.assertEqual(b"\x00\x01\x02", bytes(list4))
             self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1))
         else:
             self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1))
@@ -256,10 +256,12 @@ class TestSmartList(unittest.TestCase):
         self.assertEqual([0, 2, 2, 3, 4, 5], list1)
         list1.sort(reverse=True)
         self.assertEqual([5, 4, 3, 2, 2, 0], list1)
-        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y))  # Distance from 3
-        self.assertEqual([3, 4, 2, 2, 5, 0], list1)
-        list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True)
-        self.assertEqual([0, 5, 4, 2, 2, 3], list1)
+        if not py3k:
+            func = lambda x, y: abs(3 - x) - abs(3 - y)  # Distance from 3
+            list1.sort(cmp=func)
+            self.assertEqual([3, 4, 2, 2, 5, 0], list1)
+            list1.sort(cmp=func, reverse=True)
+            self.assertEqual([0, 5, 4, 2, 2, 3], list1)
         list3.sort(key=lambda i: i[1])
         self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
         list3.sort(key=lambda i: i[1], reverse=True)

From eae6f11add071401c95e89c5f8ea42be2d0c96aa Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 21:24:45 -0400
Subject: [PATCH 114/180] Make _test_tokenizer import relative so tests work on
 py3k.

---
 tests/test_ctokenizer.py  | 2 +-
 tests/test_pytokenizer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index f21378c..7a082e8 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -28,7 +28,7 @@ try:
 except ImportError:
     CTokenizer = None
 
-from _test_tokenizer import TokenizerTestCase
+from ._test_tokenizer import TokenizerTestCase
 
 @unittest.skipUnless(CTokenizer, "C tokenizer not available")
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 3e598bf..697c7e5 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -25,7 +25,7 @@ import unittest
 
 from mwparserfromhell.parser.tokenizer import Tokenizer
 
-from _test_tokenizer import TokenizerTestCase
+from ._test_tokenizer import TokenizerTestCase
 
 class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the Python tokenizer."""

From 1b69b5e882944abf0909816d2daed76c37cbe9c8 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 30 Mar 2013 16:46:39 -0400
Subject: [PATCH 115/180] Moving compat stuff exclusively for unit tests to its
 own file.

---
 mwparserfromhell/compat.py |  7 -------
 tests/compat.py            | 20 ++++++++++++++++++++
 tests/test_docs.py         |  4 +++-
 tests/test_parser.py       |  3 ++-
 tests/test_smart_list.py   |  4 +++-
 tests/test_string_mixin.py |  4 +++-
 6 files changed, 31 insertions(+), 11 deletions(-)
 create mode 100644 tests/compat.py

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 34870e6..bb81513 100755
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -16,21 +16,14 @@ if py3k:
     bytes = bytes
     str = str
     basestring = str
-    range = range
     maxsize = sys.maxsize
     import html.entities as htmlentities
-    from io import StringIO
-    from urllib.parse import urlencode
-    from urllib.request import urlopen
 
 else:
     bytes = str
     str = unicode
     basestring = basestring
-    range = xrange
     maxsize = sys.maxint
     import htmlentitydefs as htmlentities
-    from StringIO import StringIO
-    from urllib import urlencode, urlopen
 
 del sys
diff --git a/tests/compat.py b/tests/compat.py
new file mode 100644
index 0000000..8bed40e
--- /dev/null
+++ b/tests/compat.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+"""
+Serves the same purpose as mwparserfromhell.compat, but only for objects
+required by unit tests. This avoids unnecessary imports (like urllib) within
+the main library.
+"""
+
+from mwparserfromhell.compat import py3k
+
+if py3k:
+    range = range
+    from io import StringIO
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
+
+else:
+    range = xrange
+    from StringIO import StringIO
+    from urllib import urlencode, urlopen
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 3b23bb7..8d95c47 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -25,7 +25,9 @@ import json
 import unittest
 
 import mwparserfromhell
-from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen
+from mwparserfromhell.compat import py3k, str
+
+from .compat import StringIO, urlencode, urlopen
 
 class TestDocs(unittest.TestCase):
     """Integration test cases for mwparserfromhell's documentation."""
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 4f718c8..1c37a85 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -24,12 +24,13 @@ from __future__ import unicode_literals
 import unittest
 
 from mwparserfromhell import parser
-from mwparserfromhell.compat import range
 from mwparserfromhell.nodes import Template, Text, Wikilink
 from mwparserfromhell.nodes.extras import Parameter
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 
+from .compat import range
+
 class TestParser(unittest.TestCase):
     """Tests for the Parser class itself, which tokenizes and builds nodes."""
 
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 3423bb7..25df555 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -23,9 +23,11 @@
 from __future__ import unicode_literals
 import unittest
 
-from mwparserfromhell.compat import py3k, range
+from mwparserfromhell.compat import py3k
 from mwparserfromhell.smart_list import SmartList, _ListProxy
 
+from .compat import range
+
 class TestSmartList(unittest.TestCase):
     """Test cases for the SmartList class and its child, _ListProxy."""
 
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 6d10609..306f2fd 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -25,9 +25,11 @@ from sys import getdefaultencoding
 from types import GeneratorType
 import unittest
 
-from mwparserfromhell.compat import bytes, py3k, range, str
+from mwparserfromhell.compat import bytes, py3k, str
 from mwparserfromhell.string_mixin import StringMixIn
 
+from .compat import range
+
 class _FakeString(StringMixIn):
     def __init__(self, data):
         self._data = data

From e3f89af62dcc323b6119174a07868057e814ede9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 30 Mar 2013 18:38:29 -0400
Subject: [PATCH 116/180] Adding a TreeEqualityTestCase base class.

---
 tests/_test_tokenizer.py     |  3 +-
 tests/_test_tree_equality.py | 78 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_ctokenizer.py     |  2 +-
 tests/test_parser.py         | 33 ++-----------------
 tests/test_pytokenizer.py    |  2 +-
 5 files changed, 84 insertions(+), 34 deletions(-)
 create mode 100644 tests/_test_tree_equality.py

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 379b4fa..13882aa 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -21,6 +21,7 @@
 # SOFTWARE.
 
 from __future__ import print_function, unicode_literals
+from unittest import TestCase
 from os import listdir, path
 
 from mwparserfromhell.compat import py3k
@@ -31,7 +32,7 @@ class _TestParseError(Exception):
     pass
 
 
-class TokenizerTestCase(object):
+class TokenizerTestCase(TestCase):
     """A base test case for tokenizers, whose tests are loaded dynamically.
 
     Subclassed along with unittest.TestCase to form TestPyTokenizer and
diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
new file mode 100644
index 0000000..26c373d
--- /dev/null
+++ b/tests/_test_tree_equality.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+from unittest import TestCase
+
+from mwparserfromhell.nodes import Template, Text, Wikilink
+from mwparserfromhell.nodes.extras import Parameter
+from mwparserfromhell.wikicode import Wikicode
+
+class TreeEqualityTestCase(TestCase):
+    """A base test case with support for comparing the equality of node trees.
+
+    This adds a number of type equality functions, for Wikicode, Text,
+    Templates, and Wikilinks.
+    """
+
+    def assertNodeEqual(self, expected, actual):
+        registry = {
+            Text: self.assertTextNodeEqual,
+            Template: self.assertTemplateNodeEqual,
+            Wikilink: self.assertWikilinkNodeEqual
+        }
+        for nodetype in registry:
+            if isinstance(expected, nodetype):
+                self.assertIsInstance(actual, nodetype)
+                registry[nodetype](expected, actual)
+
+    def assertTextNodeEqual(self, expected, actual):
+        """Assert that two Text nodes have the same data."""
+        self.assertEqual(expected.value, actual.value)
+
+    def assertTemplateNodeEqual(self, expected, actual):
+        """Assert that two Template nodes have the same data."""
+        self.assertWikicodeEqual(expected.name, actual.name)
+        length = len(expected.params)
+        self.assertEqual(length, len(actual.params))
+        for i in range(length):
+            exp_param = expected.params[i]
+            act_param = actual.params[i]
+            self.assertWikicodeEqual(exp_param.name, act_param.name)
+            self.assertWikicodeEqual(exp_param.value, act_param.value)
+            self.assertIs(exp_param.showkey, act_param.showkey)
+
+    def assertWikilinkNodeEqual(self, expected, actual):
+        """Assert that two Wikilink nodes have the same data."""
+        self.assertWikicodeEqual(expected.title, actual.title)
+        if expected.text is not None:
+            self.assertWikicodeEqual(expected.text, actual.text)
+        else:
+            self.assertIs(None, actual.text)
+
+    def assertWikicodeEqual(self, expected, actual):
+        """Assert that two Wikicode objects have the same data."""
+        self.assertIsInstance(actual, Wikicode)
+        length = len(expected.nodes)
+        self.assertEqual(length, len(actual.nodes))
+        for i in range(length):
+            self.assertNodeEqual(expected.get(i), actual.get(i))
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 7a082e8..955b9a0 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -31,7 +31,7 @@ except ImportError:
 from ._test_tokenizer import TokenizerTestCase
 
 @unittest.skipUnless(CTokenizer, "C tokenizer not available")
-class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
+class TestCTokenizer(TokenizerTestCase):
     """Test cases for the C tokenizer."""
 
     @classmethod
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 1c37a85..9d2c969 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -29,41 +29,12 @@ from mwparserfromhell.nodes.extras import Parameter
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 
+from ._test_tree_equality import TreeEqualityTestCase
 from .compat import range
 
-class TestParser(unittest.TestCase):
+class TestParser(TreeEqualityTestCase):
     """Tests for the Parser class itself, which tokenizes and builds nodes."""
 
-    def assertNodesEqual(self, expected, actual):
-        """Assert that two Nodes are the same type and have the same data."""
-        self.assertIs(type(expected), type(actual))
-        if isinstance(expected, Text):
-            self.assertEqual(expected.value, actual.value)
-        elif isinstance(expected, Template):
-            self.assertWikicodeEqual(expected.name, actual.name)
-            length = len(expected.params)
-            self.assertEqual(length, len(actual.params))
-            for i in range(length):
-                exp_param = expected.params[i]
-                act_param = actual.params[i]
-                self.assertWikicodeEqual(exp_param.name, act_param.name)
-                self.assertWikicodeEqual(exp_param.value, act_param.value)
-                self.assertIs(exp_param.showkey, act_param.showkey)
-        elif isinstance(expected, Wikilink):
-            self.assertWikicodeEqual(expected.title, actual.title)
-            if expected.text is not None:
-                self.assertWikicodeEqual(expected.text, actual.text)
-            else:
-                self.assertIs(None, actual.text)
-
-    def assertWikicodeEqual(self, expected, actual):
-        """Assert that two Wikicode objects have the same data."""
-        self.assertIsInstance(actual, Wikicode)
-        length = len(expected.nodes)
-        self.assertEqual(length, len(actual.nodes))
-        for i in range(length):
-            self.assertNodesEqual(expected.get(i), actual.get(i))
-
     def test_use_c(self):
         """make sure the correct tokenizer is used"""
         if parser.use_c:
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 697c7e5..7b37eb3 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer
 
 from ._test_tokenizer import TokenizerTestCase
 
-class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
+class TestPyTokenizer(TokenizerTestCase):
     """Test cases for the Python tokenizer."""
 
     @classmethod

From a8cb275b941b70524e8b97341784097434ae627c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 1 Apr 2013 19:04:55 -0400
Subject: [PATCH 117/180] Add TestUtils; implement two tests for it.

Also, add a missing docstring in TreeEqualityTestCase.
---
 tests/_test_tree_equality.py |  1 +
 tests/test_utils.py          | 67 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 tests/test_utils.py

diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 26c373d..0fdb531 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -35,6 +35,7 @@ class TreeEqualityTestCase(TestCase):
     """
 
     def assertNodeEqual(self, expected, actual):
+        """Assert that two Nodes have the same type and have the same data."""
         registry = {
             Text: self.assertTextNodeEqual,
             Template: self.assertTemplateNodeEqual,
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..8afad7a
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.nodes import Template, Text
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.utils import parse_anything
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+class TestUtils(TreeEqualityTestCase):
+    """Tests for the utils module, which provides parse_anything()."""
+
+    def test_parse_anything_valid(self):
+        """tests for valid input to utils.parse_anything()"""
+        wrap = lambda L: Wikicode(SmartList(L))
+        textify = lambda L: wrap([Text(item) for item in L])
+        tests = [
+            (wrap([Text("foobar")]), textify(["foobar"])),
+            (Template(wrap([Text("spam")])),
+                wrap([Template(textify(["spam"]))])),
+            ("fóóbar", textify(["fóóbar"])),
+            (b"foobár", textify(["foobár"])),
+            (123, textify(["123"])),
+            (True, textify(["True"])),
+            (None, wrap([])),
+            ([Text("foo"), Text("bar"), Text("baz")],
+                textify(["foo", "bar", "baz"])),
+            ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456],
+                textify(["foo", "bar", "baz", "123", "456"])),
+            ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"]))
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, parse_anything(test))
+
+    def test_parse_anything_invalid(self):
+        """tests for invalid input to utils.parse_anything()"""
+        self.assertRaises(ValueError, parse_anything, Ellipsis)
+        self.assertRaises(ValueError, parse_anything, object)
+        self.assertRaises(ValueError, parse_anything, object())
+        self.assertRaises(ValueError, parse_anything, type)
+        self.assertRaises(ValueError, parse_anything, ["foo", [object]])
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From 30d4f137a829a7bfd613363f3579f97337462024 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 1 Apr 2013 19:06:59 -0400
Subject: [PATCH 118/180] Curse you, Python 3!

---
 tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 8afad7a..c088530 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -42,7 +42,7 @@ class TestUtils(TreeEqualityTestCase):
             (Template(wrap([Text("spam")])),
                 wrap([Template(textify(["spam"]))])),
             ("fóóbar", textify(["fóóbar"])),
-            (b"foobár", textify(["foobár"])),
+            (b"foob\xc3\xa1r", textify(["foobár"])),
             (123, textify(["123"])),
             (True, textify(["True"])),
             (None, wrap([])),

From cda1ce95f3b46c3392e57de182bc925c815b7d1f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 1 Apr 2013 19:11:30 -0400
Subject: [PATCH 119/180] Roll back part of e3f89af62d because CURSE YOU UNIT
 TESTING FRAMEWORK

---
 tests/_test_tokenizer.py  | 3 +--
 tests/test_ctokenizer.py  | 2 +-
 tests/test_pytokenizer.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 13882aa..379b4fa 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -21,7 +21,6 @@
 # SOFTWARE.
 
 from __future__ import print_function, unicode_literals
-from unittest import TestCase
 from os import listdir, path
 
 from mwparserfromhell.compat import py3k
@@ -32,7 +31,7 @@ class _TestParseError(Exception):
     pass
 
 
-class TokenizerTestCase(TestCase):
+class TokenizerTestCase(object):
     """A base test case for tokenizers, whose tests are loaded dynamically.
 
     Subclassed along with unittest.TestCase to form TestPyTokenizer and
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 955b9a0..7a082e8 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -31,7 +31,7 @@ except ImportError:
 from ._test_tokenizer import TokenizerTestCase
 
 @unittest.skipUnless(CTokenizer, "C tokenizer not available")
-class TestCTokenizer(TokenizerTestCase):
+class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the C tokenizer."""
 
     @classmethod
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 7b37eb3..697c7e5 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer
 
 from ._test_tokenizer import TokenizerTestCase
 
-class TestPyTokenizer(TokenizerTestCase):
+class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the Python tokenizer."""
 
     @classmethod

From 892092434fa748ef06ff2558c5b9dbfce9155071 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 1 Apr 2013 21:04:53 -0400
Subject: [PATCH 120/180] Skeleton for TestBuilder; adding some nodes to
 TreeEqualityTestCase.

---
 tests/_test_tree_equality.py | 38 ++++++++++++++++++++++++++++++------
 tests/test_builder.py        | 46 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 0fdb531..16f4b49 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -23,8 +23,9 @@
 from __future__ import unicode_literals
 from unittest import TestCase
 
-from mwparserfromhell.nodes import Template, Text, Wikilink
-from mwparserfromhell.nodes.extras import Parameter
+from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
+                                    Tag, Template, Text, Wikilink)
+from mwparserfromhell.nodes.extras import Attribute, Parameter
 from mwparserfromhell.wikicode import Wikicode
 
 class TreeEqualityTestCase(TestCase):
@@ -37,8 +38,13 @@ class TreeEqualityTestCase(TestCase):
     def assertNodeEqual(self, expected, actual):
         """Assert that two Nodes have the same type and have the same data."""
         registry = {
-            Text: self.assertTextNodeEqual,
+            Argument: self.assertArgumentNodeEqual,
+            Comment: self.assertCommentNodeEqual,
+            Heading: self.assertHeadingNodeEqual,
+            HTMLEntity: self.assertHTMLEntityNodeEqual,
+            Tag: self.assertTagNodeEqual,
             Template: self.assertTemplateNodeEqual,
+            Text: self.assertTextNodeEqual,
             Wikilink: self.assertWikilinkNodeEqual
         }
         for nodetype in registry:
@@ -46,9 +52,25 @@ class TreeEqualityTestCase(TestCase):
                 self.assertIsInstance(actual, nodetype)
                 registry[nodetype](expected, actual)
 
-    def assertTextNodeEqual(self, expected, actual):
-        """Assert that two Text nodes have the same data."""
-        self.assertEqual(expected.value, actual.value)
+    def assertArgumentNodeEqual(self, expected, actual):
+        """Assert that two Argument nodes have the same data."""
+        pass
+
+    def assertCommentNodeEqual(self, expected, actual):
+        """Assert that two Comment nodes have the same data."""
+        pass
+
+    def assertHeadingNodeEqual(self, expected, actual):
+        """Assert that two Heading nodes have the same data."""
+        pass
+
+    def assertHTMLEntityNodeEqual(self, expected, actual):
+        """Assert that two HTMLEntity nodes have the same data."""
+        pass
+
+    def assertTagNodeEqual(self, expected, actual):
+        """Assert that two Tag nodes have the same data."""
+        pass
 
     def assertTemplateNodeEqual(self, expected, actual):
         """Assert that two Template nodes have the same data."""
@@ -62,6 +84,10 @@ class TreeEqualityTestCase(TestCase):
             self.assertWikicodeEqual(exp_param.value, act_param.value)
             self.assertIs(exp_param.showkey, act_param.showkey)
 
+    def assertTextNodeEqual(self, expected, actual):
+        """Assert that two Text nodes have the same data."""
+        self.assertEqual(expected.value, actual.value)
+
     def assertWikilinkNodeEqual(self, expected, actual):
         """Assert that two Wikilink nodes have the same data."""
         self.assertWikicodeEqual(expected.title, actual.title)
diff --git a/tests/test_builder.py b/tests/test_builder.py
index a3518fd..a80d8bf 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -23,8 +23,50 @@
 from __future__ import unicode_literals
 import unittest
 
-class TestBuilder(unittest.TestCase):
-    pass
+from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
+                                    Tag, Template, Text, Wikilink)
+from mwparserfromhell.nodes.extras import Attribute, Parameter
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+
+class TestBuilder(TreeEqualityTestCase):
+    """Tests for the builder, which turns tokens into Wikicode objects."""
+
+    def test_text(self):
+        """tests for building Text nodes"""
+        pass
+
+    def test_template(self):
+        """tests for building Template nodes"""
+        pass
+
+    def test_argument(self):
+        """tests for building Argument nodes"""
+        pass
+
+    def test_wikilink(self):
+        """tests for building Wikilink nodes"""
+        pass
+
+    def test_html_entity(self):
+        """tests for building HTMLEntity nodes"""
+        pass
+
+    def test_heading(self):
+        """tests for building Heading nodes"""
+        pass
+
+    def test_comment(self):
+        """tests for building Comment nodes"""
+        pass
+
+    def test_tag(self):
+        """tests for building Tag nodes"""
+        pass
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 404b4479a26ab89f41b2e9bae5c6ffc8d5777f67 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 1 Apr 2013 21:30:19 -0400
Subject: [PATCH 121/180] Implement the remaining asserts in
 TreeEqualityTestCase.

---
 mwparserfromhell/nodes/html_entity.py |  5 ++++-
 tests/_test_tree_equality.py          | 18 +++++++++++++-----
 tests/test_builder.py                 |  1 +
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 221040b..5b7607c 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -135,7 +135,10 @@ class HTMLEntity(Node):
 
     @hex_char.setter
     def hex_char(self, newval):
-        self._hex_char = bool(newval)
+        newval = str(newval)
+        if newval not in ("x", "X"):
+            raise ValueError(newval)
+        self._hex_char = newval
 
     def normalize(self):
         """Return the unicode character represented by the HTML entity."""
diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 16f4b49..2014ac1 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -54,23 +54,31 @@ class TreeEqualityTestCase(TestCase):
 
     def assertArgumentNodeEqual(self, expected, actual):
         """Assert that two Argument nodes have the same data."""
-        pass
+        self.assertWikicodeEqual(expected.name, actual.name)
+        if expected.default is not None:
+            self.assertWikicodeEqual(expected.default, actual.default)
+        else:
+            self.assertIs(None, actual.default)
 
     def assertCommentNodeEqual(self, expected, actual):
         """Assert that two Comment nodes have the same data."""
-        pass
+        self.assertWikicodeEqual(expected.contents, actual.contents)
 
     def assertHeadingNodeEqual(self, expected, actual):
         """Assert that two Heading nodes have the same data."""
-        pass
+        self.assertWikicodeEqual(expected.title, actual.title)
+        self.assertEqual(expected.level, actual.level)
 
     def assertHTMLEntityNodeEqual(self, expected, actual):
         """Assert that two HTMLEntity nodes have the same data."""
-        pass
+        self.assertEqual(expected.value, actual.value)
+        self.assertIs(expected.named, actual.named)
+        self.assertIs(expected.hexadecimal, actual.hexadecimal)
+        self.assertEquals(expected.hex_char, actual.hex_char)
 
     def assertTagNodeEqual(self, expected, actual):
         """Assert that two Tag nodes have the same data."""
-        pass
+        self.fail("Holding this until feature/html_tags is ready.")
 
     def assertTemplateNodeEqual(self, expected, actual):
         """Assert that two Template nodes have the same data."""
diff --git a/tests/test_builder.py b/tests/test_builder.py
index a80d8bf..e6919c1 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -64,6 +64,7 @@ class TestBuilder(TreeEqualityTestCase):
         """tests for building Comment nodes"""
         pass
 
+    @unittest.skip("holding this until feature/html_tags is ready")
     def test_tag(self):
         """tests for building Tag nodes"""
         pass

From cb23587ab6e4cb3dfc21d817f2cb7b18c5542a60 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 3 Apr 2013 11:00:07 -0400
Subject: [PATCH 122/180] Adding some Builder tests

---
 tests/test_builder.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index e6919c1..d577bfc 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -26,6 +26,8 @@ import unittest
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                     Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
+from mwparserfromhell.parser import tokens
+from mwparserfromhell.parser.builder import Builder
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 
@@ -36,13 +38,34 @@ wrap = lambda L: Wikicode(SmartList(L))
 class TestBuilder(TreeEqualityTestCase):
     """Tests for the builder, which turns tokens into Wikicode objects."""
 
+    def setUp(self):
+        self.builder = Builder()
+
     def test_text(self):
         """tests for building Text nodes"""
-        pass
+        tests = [
+            ([tokens.Text(text="foobar")], wrap([Text("foobar")])),
+            ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])),
+            ([tokens.Text(text="spam"), tokens.Text(text="eggs")],
+                wrap([Text("spam"), Text("eggs")])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     def test_template(self):
         """tests for building Template nodes"""
-        pass
+        tests = [
+            ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()],
+                wrap([Template(wrap([Text("foobar")]))])),
+            ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()],
+                wrap([Template(wrap([Text("spam"), Text("eggs")]))])),
+            ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()],
+                wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])),
+            ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()],
+                wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     def test_argument(self):
         """tests for building Argument nodes"""

From b8e8d057abc4fefec78f967adf30326669c0726c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 4 Apr 2013 10:49:04 -0400
Subject: [PATCH 123/180] Finish test_template()

---
 tests/test_builder.py | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index d577bfc..952b501 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -47,7 +47,7 @@ class TestBuilder(TreeEqualityTestCase):
             ([tokens.Text(text="foobar")], wrap([Text("foobar")])),
             ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])),
             ([tokens.Text(text="spam"), tokens.Text(text="eggs")],
-                wrap([Text("spam"), Text("eggs")])),
+             wrap([Text("spam"), Text("eggs")])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -55,14 +55,46 @@ class TestBuilder(TreeEqualityTestCase):
     def test_template(self):
         """tests for building Template nodes"""
         tests = [
-            ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()],
-                wrap([Template(wrap([Text("foobar")]))])),
-            ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()],
-                wrap([Template(wrap([Text("spam"), Text("eggs")]))])),
-            ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()],
-                wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])),
-            ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()],
-                wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])),
+            ([tokens.TemplateOpen(), tokens.Text(text="foobar"),
+              tokens.TemplateClose()],
+             wrap([Template(wrap([Text("foobar")]))])),
+
+            ([tokens.TemplateOpen(), tokens.Text(text="spam"),
+              tokens.Text(text="eggs"), tokens.TemplateClose()],
+             wrap([Template(wrap([Text("spam"), Text("eggs")]))])),
+
+            ([tokens.TemplateOpen(), tokens.Text(text="foo"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
+              tokens.TemplateClose()],
+             wrap([Template(wrap([Text("foo")]), params=[
+                 Parameter(wrap([Text("1")]), wrap([Text("bar")]),
+                     showkey=False)])])),
+
+            ([tokens.TemplateOpen(), tokens.Text(text="foo"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
+              tokens.TemplateParamEquals(), tokens.Text(text="baz"),
+              tokens.TemplateClose()],
+             wrap([Template(wrap([Text("foo")]), params=[
+                 Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])),
+
+            ([tokens.TemplateOpen(), tokens.Text(text="foo"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
+              tokens.TemplateParamEquals(), tokens.Text(text="baz"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="biz"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="buzz"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="3"),
+              tokens.TemplateParamEquals(), tokens.Text(text="buff"),
+              tokens.TemplateParamSeparator(), tokens.Text(text="baff"),
+              tokens.TemplateClose()],
+             wrap([Template(wrap([Text("foo")]), params=[
+                 Parameter(wrap([Text("bar")]), wrap([Text("baz")])),
+                 Parameter(wrap([Text("1")]), wrap([Text("biz")]),
+                     showkey=False),
+                 Parameter(wrap([Text("2")]), wrap([Text("buzz")]),
+                     showkey=False),
+                 Parameter(wrap([Text("3")]), wrap([Text("buff")])),
+                 Parameter(wrap([Text("3")]), wrap([Text("baff")]),
+                     showkey=False)])])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))

From e32a6692f8ad9f8d6c57a56ca40e8aedf128c074 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 4 Apr 2013 10:59:16 -0400
Subject: [PATCH 124/180] test_argument()

---
 tests/test_builder.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index 952b501..e632644 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -101,7 +101,29 @@ class TestBuilder(TreeEqualityTestCase):
 
     def test_argument(self):
         """tests for building Argument nodes"""
-        pass
+        tests = [
+            ([tokens.ArgumentOpen(), tokens.Text(text="foobar"),
+              tokens.ArgumentClose()],
+             wrap([Argument(wrap([Text("foobar")]))])),
+
+            ([tokens.ArgumentOpen(), tokens.Text(text="spam"),
+              tokens.Text(text="eggs"), tokens.ArgumentClose()],
+             wrap([Argument(wrap([Text("spam"), Text("eggs")]))])),
+
+            ([tokens.ArgumentOpen(), tokens.Text(text="foo"),
+              tokens.ArgumentSeparator(), tokens.Text(text="bar"),
+              tokens.ArgumentClose()],
+             wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])),
+
+            ([tokens.ArgumentOpen(), tokens.Text(text="foo"),
+              tokens.Text(text="bar"), tokens.ArgumentSeparator(),
+              tokens.Text(text="baz"), tokens.Text(text="biz"),
+              tokens.ArgumentClose()],
+             wrap([Argument(wrap([Text("foo"), Text("bar")]),
+                            wrap([Text("baz"), Text("biz")]))])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     def test_wikilink(self):
         """tests for building Wikilink nodes"""

From 7289d8c070a6fcd2bceaa8e00e7661c9c21461a5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 5 Apr 2013 10:25:48 -0400
Subject: [PATCH 125/180] test_wikilink(); fix indentation

---
 tests/test_builder.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index e632644..ea38dae 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -68,7 +68,7 @@ class TestBuilder(TreeEqualityTestCase):
               tokens.TemplateClose()],
              wrap([Template(wrap([Text("foo")]), params=[
                  Parameter(wrap([Text("1")]), wrap([Text("bar")]),
-                     showkey=False)])])),
+                           showkey=False)])])),
 
             ([tokens.TemplateOpen(), tokens.Text(text="foo"),
               tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
@@ -89,12 +89,12 @@ class TestBuilder(TreeEqualityTestCase):
              wrap([Template(wrap([Text("foo")]), params=[
                  Parameter(wrap([Text("bar")]), wrap([Text("baz")])),
                  Parameter(wrap([Text("1")]), wrap([Text("biz")]),
-                     showkey=False),
+                           showkey=False),
                  Parameter(wrap([Text("2")]), wrap([Text("buzz")]),
-                     showkey=False),
+                           showkey=False),
                  Parameter(wrap([Text("3")]), wrap([Text("buff")])),
                  Parameter(wrap([Text("3")]), wrap([Text("baff")]),
-                     showkey=False)])])),
+                           showkey=False)])])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -127,7 +127,29 @@ class TestBuilder(TreeEqualityTestCase):
 
     def test_wikilink(self):
         """tests for building Wikilink nodes"""
-        pass
+        tests = [
+            ([tokens.WikilinkOpen(), tokens.Text(text="foobar"),
+              tokens.WikilinkClose()],
+             wrap([Wikilink(wrap([Text("foobar")]))])),
+
+            ([tokens.WikilinkOpen(), tokens.Text(text="spam"),
+              tokens.Text(text="eggs"), tokens.WikilinkClose()],
+             wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])),
+
+            ([tokens.WikilinkOpen(), tokens.Text(text="foo"),
+              tokens.WikilinkSeparator(), tokens.Text(text="bar"),
+              tokens.WikilinkClose()],
+             wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])),
+
+            ([tokens.WikilinkOpen(), tokens.Text(text="foo"),
+              tokens.Text(text="bar"), tokens.WikilinkSeparator(),
+              tokens.Text(text="baz"), tokens.Text(text="biz"),
+              tokens.WikilinkClose()],
+             wrap([Wikilink(wrap([Text("foo"), Text("bar")]),
+                            wrap([Text("baz"), Text("biz")]))])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     def test_html_entity(self):
         """tests for building HTMLEntity nodes"""

From e9463543f46c49748740f69c5e5bcdb569338a2a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 5 Apr 2013 10:46:43 -0400
Subject: [PATCH 126/180] test_html_entity()

---
 tests/test_builder.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index ea38dae..7dcbc0e 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -153,7 +153,23 @@ class TestBuilder(TreeEqualityTestCase):
 
     def test_html_entity(self):
         """tests for building HTMLEntity nodes"""
-        pass
+        tests = [
+            ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"),
+              tokens.HTMLEntityEnd()],
+             wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])),
+
+            ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(),
+              tokens.Text(text="107"), tokens.HTMLEntityEnd()],
+             wrap([HTMLEntity("107", named=False, hexadecimal=False)])),
+
+            ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(),
+              tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"),
+              tokens.HTMLEntityEnd()],
+             wrap([HTMLEntity("6B", named=False, hexadecimal=True,
+                              hex_char="X")])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     def test_heading(self):
         """tests for building Heading nodes"""

From 132c6584d059497374c7f0c53285e6251beb6675 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 5 Apr 2013 10:52:43 -0400
Subject: [PATCH 127/180] test_heading() and test_comment()

---
 tests/test_builder.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index 7dcbc0e..410eb4a 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -173,11 +173,31 @@ class TestBuilder(TreeEqualityTestCase):
 
     def test_heading(self):
         """tests for building Heading nodes"""
-        pass
+        tests = [
+            ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"),
+              tokens.HeadingEnd()],
+             wrap([Heading(wrap([Text("foobar")]), 2)])),
+
+            ([tokens.HeadingStart(level=4), tokens.Text(text="spam"),
+              tokens.Text(text="eggs"), tokens.HeadingEnd()],
+             wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     def test_comment(self):
         """tests for building Comment nodes"""
-        pass
+        tests = [
+            ([tokens.CommentStart(), tokens.Text(text="foobar"),
+              tokens.CommentEnd()],
+             wrap([Comment(wrap([Text("foobar")]))])),
+
+            ([tokens.CommentStart(), tokens.Text(text="spam"),
+              tokens.Text(text="eggs"), tokens.CommentEnd()],
+             wrap([Comment(wrap([Text("spam"), Text("eggs")]))])),
+        ]
+        for test, valid in tests:
+            self.assertWikicodeEqual(valid, self.builder.build(test))
 
     @unittest.skip("holding this until feature/html_tags is ready")
     def test_tag(self):

From 094e867ee6d7a2f34c6555e318ccdb1622526484 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Apr 2013 15:45:51 -0400
Subject: [PATCH 128/180] Add test_integration(); add a horrible abuse of PEP8

---
 tests/_test_tree_equality.py |  2 +-
 tests/test_builder.py        | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 2014ac1..758a72e 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -74,7 +74,7 @@ class TreeEqualityTestCase(TestCase):
         self.assertEqual(expected.value, actual.value)
         self.assertIs(expected.named, actual.named)
         self.assertIs(expected.hexadecimal, actual.hexadecimal)
-        self.assertEquals(expected.hex_char, actual.hex_char)
+        self.assertEqual(expected.hex_char, actual.hex_char)
 
     def assertTagNodeEqual(self, expected, actual):
         """Assert that two Tag nodes have the same data."""
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 410eb4a..9425713 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -204,5 +204,25 @@ class TestBuilder(TreeEqualityTestCase):
         """tests for building Tag nodes"""
         pass
 
+    def test_integration(self):
+        """a test for building a combination of templates together"""
+        test = [tokens.TemplateOpen(), tokens.TemplateOpen(),
+                tokens.TemplateOpen(), tokens.TemplateOpen(),
+                tokens.Text(text="foo"), tokens.TemplateClose(),
+                tokens.Text(text="bar"), tokens.TemplateParamSeparator(),
+                tokens.Text(text="baz"), tokens.TemplateParamEquals(),
+                tokens.Text(text="biz"), tokens.TemplateClose(),
+                tokens.Text(text="buzz"), tokens.TemplateClose(),
+                tokens.Text(text="usr"), tokens.TemplateParamSeparator(),
+                tokens.TemplateOpen(), tokens.Text(text="bin"),
+                tokens.TemplateClose(), tokens.TemplateClose()]
+        valid = wrap(
+            [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text(
+            "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]),
+            wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[
+            Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]),
+            showkey=False)])])
+        self.assertWikicodeEqual(valid, self.builder.build(test))
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 2d9b8a39b6509d8a39dcf12b90dbcb2e8f07433f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 6 Apr 2013 16:17:47 -0400
Subject: [PATCH 129/180] test_integration2(); finish TestBuilder

---
 tests/test_builder.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index 9425713..1e578ed 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -206,6 +206,7 @@ class TestBuilder(TreeEqualityTestCase):
 
     def test_integration(self):
         """a test for building a combination of templates together"""
+        # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}
         test = [tokens.TemplateOpen(), tokens.TemplateOpen(),
                 tokens.TemplateOpen(), tokens.TemplateOpen(),
                 tokens.Text(text="foo"), tokens.TemplateClose(),
@@ -224,5 +225,37 @@ class TestBuilder(TreeEqualityTestCase):
             showkey=False)])])
         self.assertWikicodeEqual(valid, self.builder.build(test))
 
+    def test_integration2(self):
+        """an even more audacious test for building a horrible wikicode mess"""
+        # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j=&nbsp;}}
+        test = [tokens.TemplateOpen(), tokens.Text(text="a"),
+                tokens.TemplateParamSeparator(), tokens.Text(text="b"),
+                tokens.TemplateParamSeparator(), tokens.TemplateOpen(),
+                tokens.Text(text="c"), tokens.TemplateParamSeparator(),
+                tokens.WikilinkOpen(), tokens.Text(text="d"),
+                tokens.WikilinkClose(), tokens.ArgumentOpen(),
+                tokens.Text(text="e"), tokens.ArgumentClose(),
+                tokens.TemplateClose(), tokens.TemplateClose(),
+                tokens.WikilinkOpen(), tokens.Text(text="f"),
+                tokens.WikilinkSeparator(), tokens.ArgumentOpen(),
+                tokens.Text(text="g"), tokens.ArgumentClose(),
+                tokens.CommentStart(), tokens.Text(text="h"),
+                tokens.CommentEnd(), tokens.WikilinkClose(),
+                tokens.TemplateOpen(), tokens.Text(text="i"),
+                tokens.TemplateParamSeparator(), tokens.Text(text="j"),
+                tokens.TemplateParamEquals(), tokens.HTMLEntityStart(),
+                tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(),
+                tokens.TemplateClose()]
+        valid = wrap(
+            [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]),
+            wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]),
+            wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1")
+            ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))]
+            ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")]
+            ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))])
+            ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]),
+            wrap([HTMLEntity("nbsp", named=True)]))])])
+        self.assertWikicodeEqual(valid, self.builder.build(test))
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From b0e3cd9cae58a0ac8490d2ee0c9b87e05de456b5 Mon Sep 17 00:00:00 2001
From: Kunal Mehta <legoktm@gmail.com>
Date: Thu, 18 Apr 2013 18:23:08 -0500
Subject: [PATCH 130/180] Fix Pywikipedia references in documentation

---
 docs/integration.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/integration.rst b/docs/integration.rst
index d0e54db..bd4e0ac 100644
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -7,12 +7,12 @@ Integration
 :py:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on
 :py:meth:`~earwigbot.wiki.page.Page.get`.
 
-If you're using PyWikipedia_, your code might look like this::
+If you're using Pywikipedia_, your code might look like this::
 
     import mwparserfromhell
     import wikipedia as pywikibot
     def parse(title):
-        site = pywikibot.get_site()
+        site = pywikibot.getSite()
         page = pywikibot.Page(site, title)
         text = page.get()
         return mwparserfromhell.parse(text)
@@ -31,5 +31,5 @@ following code (via the API_)::
         return mwparserfromhell.parse(text)
 
 .. _EarwigBot:            https://github.com/earwig/earwigbot
-.. _PyWikipedia:          http://pywikipediabot.sourceforge.net/
+.. _Pywikipedia:          http://pywikipediabot.sourceforge.net/
 .. _API:                  http://mediawiki.org/wiki/API

From 9c7517b22a6ff0f0ab8834b2e39bf56d886d6989 Mon Sep 17 00:00:00 2001
From: Kunal Mehta <legoktm@gmail.com>
Date: Thu, 18 Apr 2013 18:32:51 -0500
Subject: [PATCH 131/180] Link to mediawiki.org instead of sf.net

---
 docs/integration.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/integration.rst b/docs/integration.rst
index bd4e0ac..78810b8 100644
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -31,5 +31,5 @@ following code (via the API_)::
         return mwparserfromhell.parse(text)
 
 .. _EarwigBot:            https://github.com/earwig/earwigbot
-.. _Pywikipedia:          http://pywikipediabot.sourceforge.net/
+.. _Pywikipedia:          https://www.mediawiki.org/wiki/Manual:Pywikipediabot
 .. _API:                  http://mediawiki.org/wiki/API

From 6e399275263af3feae4bcd43ae17ccd5c7d2d1b6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 18 Apr 2013 20:06:27 -0400
Subject: [PATCH 132/180] Update README with same changes (#27)

---
 README.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.rst b/README.rst
index 90e896f..77262ca 100644
--- a/README.rst
+++ b/README.rst
@@ -107,12 +107,12 @@ Integration
 ``Page`` objects have a ``parse`` method that essentially calls
 ``mwparserfromhell.parse()`` on ``page.get()``.
 
-If you're using PyWikipedia_, your code might look like this::
+If you're using Pywikipedia_, your code might look like this::
 
     import mwparserfromhell
     import wikipedia as pywikibot
     def parse(title):
-        site = pywikibot.get_site()
+        site = pywikibot.getSite()
         page = pywikibot.Page(site, title)
         text = page.get()
         return mwparserfromhell.parse(text)
@@ -138,5 +138,5 @@ following code (via the API_)::
 .. _Python Package Index: http://pypi.python.org
 .. _get pip:              http://pypi.python.org/pypi/pip
 .. _EarwigBot:            https://github.com/earwig/earwigbot
-.. _PyWikipedia:          http://pywikipediabot.sourceforge.net/
+.. _Pywikipedia:          https://www.mediawiki.org/wiki/Manual:Pywikipediabot
 .. _API:                  http://mediawiki.org/wiki/API

From 8db40689edf51d6febfaae3340fc6af6d34329ad Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 20 Apr 2013 17:59:20 -0400
Subject: [PATCH 133/180] Improve a few things about nodes; simply a method in
 Wikicode.

---
 mwparserfromhell/nodes/html_entity.py | 47 ++++++++++++++++++++++++++++-------
 mwparserfromhell/nodes/wikilink.py    |  5 +++-
 mwparserfromhell/wikicode.py          |  8 +-----
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 5b7607c..1bf1c78 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -63,7 +63,8 @@ class HTMLEntity(Node):
             return self.normalize()
         return self
 
-    def _unichr(self, value):
+    @staticmethod
+    def _unichr(value):
         """Implement the builtin unichr() with support for non-BMP code points.
 
         On wide Python builds, this functions like the normal unichr(). On
@@ -119,19 +120,47 @@ class HTMLEntity(Node):
     @value.setter
     def value(self, newval):
         newval = str(newval)
-        if newval not in htmlentities.entitydefs:
-            test = int(self.value, 16)
-            if test < 0 or (test > 0x10FFFF and int(self.value) > 0x10FFFF):
-                raise ValueError(newval)
+        try:
+            int(newval)
+        except ValueError:
+            try:
+                int(newval, 16)
+            except ValueError:
+                if newval not in htmlentities.entitydefs:
+                    raise ValueError("entity value is not a valid name")
+                self._named = True
+                self._hexadecimal = False
+            else:
+                if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF:
+                    raise ValueError("entity value is not in range(0x110000)")
+                self._named = False
+                self._hexadecimal = True
+        else:
+            test = int(newval, 16 if self.hexadecimal else 10)
+            if test < 0 or test > 0x10FFFF:
+                raise ValueError("entity value is not in range(0x110000)")
+            self._named = False
         self._value = newval
 
     @named.setter
     def named(self, newval):
-        self._named = bool(newval)
+        newval = bool(newval)
+        if newval and self.value not in htmlentities.entitydefs:
+            raise ValueError("entity value is not a valid name")
+        if not newval:
+            try:
+                int(self.value, 16)
+            except ValueError:
+                err = "current entity value is not a valid Unicode codepoint"
+                raise ValueError(err)
+        self._named = newval
 
     @hexadecimal.setter
     def hexadecimal(self, newval):
-        self._hexadecimal = bool(newval)
+        newval = bool(newval)
+        if newval and self.named:
+            raise ValueError("a named entity cannot be hexadecimal")
+        self._hexadecimal = newval
 
     @hex_char.setter
     def hex_char(self, newval):
@@ -145,5 +174,5 @@ class HTMLEntity(Node):
         if self.named:
             return unichr(htmlentities.name2codepoint[self.value])
         if self.hexadecimal:
-            return self._unichr(int(self.value, 16))
-        return self._unichr(int(self.value))
+            return HTMLEntity._unichr(int(self.value, 16))
+        return HTMLEntity._unichr(int(self.value))
diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py
index 6fea468..527e9bb 100644
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -79,4 +79,7 @@ class Wikilink(Node):
 
     @text.setter
     def text(self, value):
-        self._text = parse_anything(value)
+        if value is None:
+            self._text = None
+        else:
+            self._text = parse_anything(value)
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 8d8ebe2..f2d9c89 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -88,13 +88,7 @@ class Wikicode(StringMixIn):
         If *obj* is a ``Node``, the function will test whether they are the
         same object, otherwise it will compare them with ``==``.
         """
-        if isinstance(obj, Node):
-            if node is obj:
-                return True
-        else:
-            if node == obj:
-                return True
-        return False
+        return (node is obj) if isinstance(obj, Node) else (node == obj)
 
     def _contains(self, nodes, obj):
         """Return ``True`` if *obj* is inside of *nodes*, else ``False``.

From 5cf451eb22aa47b119183eb25de141627d0e1ef7 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 20 Apr 2013 18:01:39 -0400
Subject: [PATCH 134/180] Adding a bunch of tests for different nodes.

---
 tests/test_argument.py    |  99 ++++++++++++++++++++++++++
 tests/test_comment.py     |  62 +++++++++++++++++
 tests/test_heading.py     |  88 ++++++++++++++++++++++++
 tests/test_html_entity.py | 172 ++++++++++++++++++++++++++++++++++++++++++++++
 tests/test_text.py        |  69 +++++++++++++++++++
 tests/test_wikilink.py    |  99 ++++++++++++++++++++++++++
 6 files changed, 589 insertions(+)
 create mode 100644 tests/test_argument.py
 create mode 100644 tests/test_comment.py
 create mode 100644 tests/test_heading.py
 create mode 100644 tests/test_html_entity.py
 create mode 100644 tests/test_text.py
 create mode 100644 tests/test_wikilink.py

diff --git a/tests/test_argument.py b/tests/test_argument.py
new file mode 100644
index 0000000..e0524c4
--- /dev/null
+++ b/tests/test_argument.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Argument, Text
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+
+class TestArgument(TreeEqualityTestCase):
+    """Test cases for the Argument node."""
+
+    def test_unicode(self):
+        """test Argument.__unicode__()"""
+        node = Argument(wrap([Text("foobar")]))
+        self.assertEqual("{{{foobar}}}", str(node))
+        node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
+        self.assertEqual("{{{foo|bar}}}", str(node2))
+
+    def test_strip(self):
+        """test Argument.__strip__()"""
+        node = Argument(wrap([Text("foobar")]))
+        self.assertIs(None, node.__strip__(True, True))
+        self.assertIs(None, node.__strip__(True, False))
+        self.assertIs(None, node.__strip__(False, True))
+        self.assertIs(None, node.__strip__(False, False))
+
+        node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
+        self.assertEqual("bar", node2.__strip__(True, True))
+        self.assertEqual("bar", node2.__strip__(True, False))
+        self.assertEqual("bar", node2.__strip__(False, True))
+        self.assertEqual("bar", node2.__strip__(False, False))
+
+    def test_showtree(self):
+        """test Argument.__showtree__()"""
+        output = []
+        getter, marker = object(), object()
+        get = lambda code: output.append((getter, code))
+        mark = lambda: output.append(marker)
+        node1 = Argument(wrap([Text("foobar")]))
+        node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
+        node1.__showtree__(output.append, get, mark)
+        node2.__showtree__(output.append, get, mark)
+        valid = [
+            "{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name),
+            "    | ", marker, (getter, node2.default), "}}}"]
+        self.assertEqual(valid, output)
+
+    def test_name(self):
+        """test getter/setter for the name attribute"""
+        name = wrap([Text("foobar")])
+        node1 = Argument(name)
+        node2 = Argument(name, wrap([Text("baz")]))
+        self.assertIs(name, node1.name)
+        self.assertIs(name, node2.name)
+        node1.name = "héhehé"
+        node2.name = "héhehé"
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name)
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name)
+
+    def test_default(self):
+        """test getter/setter for the default attribute"""
+        default = wrap([Text("baz")])
+        node1 = Argument(wrap([Text("foobar")]))
+        node2 = Argument(wrap([Text("foobar")]), default)
+        self.assertIs(None, node1.default)
+        self.assertIs(default, node2.default)
+        node1.default = "buzz"
+        node2.default = None
+        self.assertWikicodeEqual(wrap([Text("buzz")]), node1.default)
+        self.assertIs(None, node2.default)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_comment.py b/tests/test_comment.py
new file mode 100644
index 0000000..980f594
--- /dev/null
+++ b/tests/test_comment.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Comment
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+class TestComment(TreeEqualityTestCase):
+    """Test cases for the Comment node."""
+
+    def test_unicode(self):
+        """test Comment.__unicode__()"""
+        node = Comment("foobar")
+        self.assertEqual("<!--foobar-->", str(node))
+
+    def test_strip(self):
+        """test Comment.__strip__()"""
+        node = Comment("foobar")
+        self.assertIs(None, node.__strip__(True, True))
+        self.assertIs(None, node.__strip__(True, False))
+        self.assertIs(None, node.__strip__(False, True))
+        self.assertIs(None, node.__strip__(False, False))
+
+    def test_showtree(self):
+        """test Comment.__showtree__()"""
+        output = []
+        node = Comment("foobar")
+        node.__showtree__(output.append, None, None)
+        self.assertEqual(["<!--foobar-->"], output)
+
+    def test_contents(self):
+        """test getter/setter for the contents attribute"""
+        node = Comment("foobar")
+        self.assertEqual("foobar", node.contents)
+        node.contents = "barfoo"
+        self.assertEqual("barfoo", node.contents)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_heading.py b/tests/test_heading.py
new file mode 100644
index 0000000..a0e78e5
--- /dev/null
+++ b/tests/test_heading.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Heading, Text
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+
+class TestHeading(TreeEqualityTestCase):
+    """Test cases for the Heading node."""
+
+    def test_unicode(self):
+        """test Heading.__unicode__()"""
+        node = Heading(wrap([Text("foobar")]), 2)
+        self.assertEqual("==foobar==", str(node))
+        node2 = Heading(wrap([Text(" zzz ")]), 5)
+        self.assertEqual("===== zzz =====", str(node2))
+
+    def test_strip(self):
+        """test Heading.__strip__()"""
+        node = Heading(wrap([Text("foobar")]), 3)
+        self.assertEqual("foobar", node.__strip__(True, True))
+        self.assertEqual("foobar", node.__strip__(True, False))
+        self.assertEqual("foobar", node.__strip__(False, True))
+        self.assertEqual("foobar", node.__strip__(False, False))
+
+    def test_showtree(self):
+        """test Heading.__showtree__()"""
+        output = []
+        getter = object()
+        get = lambda code: output.append((getter, code))
+        node1 = Heading(wrap([Text("foobar")]), 3)
+        node2 = Heading(wrap([Text(" baz ")]), 4)
+        node1.__showtree__(output.append, get, None)
+        node2.__showtree__(output.append, get, None)
+        valid = ["===", (getter, node1.title), "===",
+                 "====", (getter, node2.title), "===="]
+        self.assertEqual(valid, output)
+
+    def test_title(self):
+        """test getter/setter for the title attribute"""
+        title = wrap([Text("foobar")])
+        node = Heading(title, 3)
+        self.assertIs(title, node.title)
+        node.title = "héhehé"
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node.title)
+
+    def test_level(self):
+        """test getter/setter for the level attribute"""
+        node = Heading(wrap([Text("foobar")]), 3)
+        self.assertEqual(3, node.level)
+        node.level = 5
+        self.assertEqual(5, node.level)
+        node.level = True
+        self.assertEqual(1, node.level)
+        self.assertRaises(ValueError, setattr, node, "level", 0)
+        self.assertRaises(ValueError, setattr, node, "level", 7)
+        self.assertRaises(ValueError, setattr, node, "level", "abc")
+        self.assertRaises(ValueError, setattr, node, "level", False)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
new file mode 100644
index 0000000..20c8fc0
--- /dev/null
+++ b/tests/test_html_entity.py
@@ -0,0 +1,172 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import HTMLEntity
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+
+class TestHTMLEntity(TreeEqualityTestCase):
+    """Test cases for the HTMLEntity node."""
+
+    def test_unicode(self):
+        """test HTMLEntity.__unicode__()"""
+        node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
+        node2 = HTMLEntity("107", named=False, hexadecimal=False)
+        node3 = HTMLEntity("6b", named=False, hexadecimal=True)
+        node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X")
+        self.assertEqual("&nbsp;", str(node1))
+        self.assertEqual("&#107;", str(node2))
+        self.assertEqual("&#x6b;", str(node3))
+        self.assertEqual("&#X6C;", str(node4))
+
+    def test_strip(self):
+        """test HTMLEntity.__strip__()"""
+        node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
+        node2 = HTMLEntity("107", named=False, hexadecimal=False)
+        node3 = HTMLEntity("e9", named=False, hexadecimal=True)
+
+        self.assertEqual("\xa0", node1.__strip__(True, True))
+        self.assertEqual("\xa0", node1.__strip__(True, False))
+        self.assertEqual("&nbsp;", node1.__strip__(False, True))
+        self.assertEqual("&nbsp;", node1.__strip__(False, False))
+        self.assertEqual("k", node2.__strip__(True, True))
+        self.assertEqual("k", node2.__strip__(True, False))
+        self.assertEqual("&#107;", node2.__strip__(False, True))
+        self.assertEqual("&#107;", node2.__strip__(False, False))
+        self.assertEqual("é", node3.__strip__(True, True))
+        self.assertEqual("é", node3.__strip__(True, False))
+        self.assertEqual("&#xe9;", node3.__strip__(False, True))
+        self.assertEqual("&#xe9;", node3.__strip__(False, False))
+
+    def test_showtree(self):
+        """test HTMLEntity.__showtree__()"""
+        output = []
+        node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
+        node2 = HTMLEntity("107", named=False, hexadecimal=False)
+        node3 = HTMLEntity("e9", named=False, hexadecimal=True)
+        node1.__showtree__(output.append, None, None)
+        node2.__showtree__(output.append, None, None)
+        node3.__showtree__(output.append, None, None)
+        res = ["&nbsp;", "&#107;", "&#xe9;"]
+        self.assertEqual(res, output)
+
+    def test_value(self):
+        """test HTMLEntity.value()"""
+        node1 = HTMLEntity("nbsp")
+        node2 = HTMLEntity("107")
+        node3 = HTMLEntity("e9")
+        self.assertEquals("nbsp", node1.value)
+        self.assertEquals("107", node2.value)
+        self.assertEquals("e9", node3.value)
+
+        node1.value = "ffa4"
+        node2.value = 72
+        node3.value = "Sigma"
+        self.assertEquals("ffa4", node1.value)
+        self.assertFalse(node1.named)
+        self.assertTrue(node1.hexadecimal)
+        self.assertEquals("72", node2.value)
+        self.assertFalse(node2.named)
+        self.assertFalse(node2.hexadecimal)
+        self.assertEquals("Sigma", node3.value)
+        self.assertTrue(node3.named)
+        self.assertFalse(node3.hexadecimal)
+
+        node1.value = "10FFFF"
+        node2.value = 110000
+        node2.value = 1114111
+        self.assertRaises(ValueError, setattr, node3, "value", "")
+        self.assertRaises(ValueError, setattr, node3, "value", "foobar")
+        self.assertRaises(ValueError, setattr, node3, "value", True)
+        self.assertRaises(ValueError, setattr, node3, "value", -1)
+        self.assertRaises(ValueError, setattr, node1, "value", 110000)
+        self.assertRaises(ValueError, setattr, node1, "value", "1114112")
+
+    def test_named(self):
+        """test HTMLEntity.named()"""
+        node1 = HTMLEntity("nbsp")
+        node2 = HTMLEntity("107")
+        node3 = HTMLEntity("e9")
+        self.assertTrue(node1.named)
+        self.assertFalse(node2.named)
+        self.assertFalse(node3.named)
+        node1.named = 1
+        node2.named = 0
+        node3.named = 0
+        self.assertTrue(node1.named)
+        self.assertFalse(node2.named)
+        self.assertFalse(node3.named)
+        self.assertRaises(ValueError, setattr, node1, "named", False)
+        self.assertRaises(ValueError, setattr, node2, "named", True)
+        self.assertRaises(ValueError, setattr, node3, "named", True)
+
+    def test_hexadecimal(self):
+        """test HTMLEntity.hexadecimal()"""
+        node1 = HTMLEntity("nbsp")
+        node2 = HTMLEntity("107")
+        node3 = HTMLEntity("e9")
+        self.assertFalse(node1.hexadecimal)
+        self.assertFalse(node2.hexadecimal)
+        self.assertTrue(node3.hexadecimal)
+        node1.hexadecimal = False
+        node2.hexadecimal = True
+        node3.hexadecimal = False
+        self.assertFalse(node1.hexadecimal)
+        self.assertTrue(node2.hexadecimal)
+        self.assertFalse(node3.hexadecimal)
+        self.assertRaises(ValueError, setattr, node1, "hexadecimal", True)
+
+    def test_hex_char(self):
+        """test HTMLEntity.hex_char()"""
+        node1 = HTMLEntity("e9")
+        node2 = HTMLEntity("e9", hex_char="X")
+        self.assertEquals("x", node1.hex_char)
+        self.assertEquals("X", node2.hex_char)
+        node1.hex_char = "X"
+        node2.hex_char = "x"
+        self.assertEquals("X", node1.hex_char)
+        self.assertEquals("x", node2.hex_char)
+        self.assertRaises(ValueError, setattr, node1, "hex_char", 123)
+        self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar")
+        self.assertRaises(ValueError, setattr, node1, "hex_char", True)
+
+    def test_normalize(self):
+        """test HTMLEntity.normalize()"""
+        node1 = HTMLEntity("nbsp")
+        node2 = HTMLEntity("107")
+        node3 = HTMLEntity("e9")
+        node4 = HTMLEntity("1f648")
+        self.assertEquals("\xa0", node1.normalize())
+        self.assertEquals("k", node2.normalize())
+        self.assertEquals("é", node3.normalize())
+        self.assertEquals("\U0001F648", node4.normalize())
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_text.py b/tests/test_text.py
new file mode 100644
index 0000000..13636bf
--- /dev/null
+++ b/tests/test_text.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Text
+
+class TestText(unittest.TestCase):
+    """Test cases for the Text node."""
+
+    def test_unicode(self):
+        """test Text.__unicode__()"""
+        node = Text("foobar")
+        self.assertEqual("foobar", str(node))
+        node2 = Text("fóóbar")
+        self.assertEqual("fóóbar", str(node2))
+
+    def test_strip(self):
+        """test Text.__strip__()"""
+        node = Text("foobar")
+        self.assertIs(node, node.__strip__(True, True))
+        self.assertIs(node, node.__strip__(True, False))
+        self.assertIs(node, node.__strip__(False, True))
+        self.assertIs(node, node.__strip__(False, False))
+
+    def test_showtree(self):
+        """test Text.__showtree__()"""
+        output = []
+        node1 = Text("foobar")
+        node2 = Text("fóóbar")
+        node3 = Text("𐌲𐌿𐍄")
+        node1.__showtree__(output.append, None, None)
+        node2.__showtree__(output.append, None, None)
+        node3.__showtree__(output.append, None, None)
+        res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"]
+        self.assertEqual(res, output)
+
+    def test_value(self):
+        """test getter/setter for the value attribute"""
+        node = Text("foobar")
+        self.assertEqual("foobar", node.value)
+        self.assertIsInstance(node.value, str)
+        node.value = "héhéhé"
+        self.assertEqual("héhéhé", node.value)
+        self.assertIsInstance(node.value, str)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
new file mode 100644
index 0000000..422489f
--- /dev/null
+++ b/tests/test_wikilink.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Text, Wikilink
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+
+class TestWikilink(TreeEqualityTestCase):
+    """Test cases for the Wikilink node."""
+
+    def test_unicode(self):
+        """test Wikilink.__unicode__()"""
+        node = Wikilink(wrap([Text("foobar")]))
+        self.assertEqual("[[foobar]]", str(node))
+        node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
+        self.assertEqual("[[foo|bar]]", str(node2))
+
+    def test_strip(self):
+        """test Wikilink.__strip__()"""
+        node = Wikilink(wrap([Text("foobar")]))
+        self.assertEqual("foobar", node.__strip__(True, True))
+        self.assertEqual("foobar", node.__strip__(True, False))
+        self.assertEqual("foobar", node.__strip__(False, True))
+        self.assertEqual("foobar", node.__strip__(False, False))
+
+        node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
+        self.assertEqual("bar", node2.__strip__(True, True))
+        self.assertEqual("bar", node2.__strip__(True, False))
+        self.assertEqual("bar", node2.__strip__(False, True))
+        self.assertEqual("bar", node2.__strip__(False, False))
+
+    def test_showtree(self):
+        """test Wikilink.__showtree__()"""
+        output = []
+        getter, marker = object(), object()
+        get = lambda code: output.append((getter, code))
+        mark = lambda: output.append(marker)
+        node1 = Wikilink(wrap([Text("foobar")]))
+        node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
+        node1.__showtree__(output.append, get, mark)
+        node2.__showtree__(output.append, get, mark)
+        valid = [
+            "[[", (getter, node1.title), "]]", "[[", (getter, node2.title),
+            "    | ", marker, (getter, node2.text), "]]"]
+        self.assertEqual(valid, output)
+
+    def test_title(self):
+        """test getter/setter for the title attribute"""
+        title = wrap([Text("foobar")])
+        node1 = Wikilink(title)
+        node2 = Wikilink(title, wrap([Text("baz")]))
+        self.assertIs(title, node1.title)
+        self.assertIs(title, node2.title)
+        node1.title = "héhehé"
+        node2.title = "héhehé"
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.title)
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.title)
+
+    def test_text(self):
+        """test getter/setter for the text attribute"""
+        text = wrap([Text("baz")])
+        node1 = Wikilink(wrap([Text("foobar")]))
+        node2 = Wikilink(wrap([Text("foobar")]), text)
+        self.assertIs(None, node1.text)
+        self.assertIs(text, node2.text)
+        node1.text = "buzz"
+        node2.text = None
+        self.assertWikicodeEqual(wrap([Text("buzz")]), node1.text)
+        self.assertIs(None, node2.text)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From 6bf8cfd2adcf536113f3a9ace3901b08540d7ff9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 20 Apr 2013 18:09:34 -0400
Subject: [PATCH 135/180] Fix tests for Python 3.

---
 mwparserfromhell/nodes/html_entity.py | 55 ++++++++++++++++++-----------------
 tests/test_html_entity.py             | 28 +++++++++---------
 2 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 1bf1c78..b51bd92 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -23,7 +23,7 @@
 from __future__ import unicode_literals
 
 from . import Node
-from ..compat import htmlentities, str
+from ..compat import htmlentities, py3k, str
 
 __all__ = ["HTMLEntity"]
 
@@ -63,29 +63,31 @@ class HTMLEntity(Node):
             return self.normalize()
         return self
 
-    @staticmethod
-    def _unichr(value):
-        """Implement the builtin unichr() with support for non-BMP code points.
+    if not py3k:
+        @staticmethod
+        def _unichr(value):
+            """Implement builtin unichr() with support for non-BMP code points.
 
-        On wide Python builds, this functions like the normal unichr(). On
-        narrow builds, this returns the value's corresponding surrogate pair.
-        """
-        try:
-            return unichr(value)
-        except ValueError:
-            # Test whether we're on the wide or narrow Python build. Check the
-            # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY):
-            if len("\U0001F64A") == 2:
-                # Ensure this is within the range we can encode:
-                if value > 0x10FFFF:
-                    raise ValueError("unichr() arg not in range(0x110000)")
-                code = value - 0x10000
-                if value < 0:  # Invalid code point
-                    raise
-                lead = 0xD800 + (code >> 10)
-                trail = 0xDC00 + (code % (1 << 10))
-                return unichr(lead) + unichr(trail)
-            raise
+            On wide Python builds, this functions like the normal unichr(). On
+            narrow builds, this returns the value's encoded surrogate pair.
+            """
+            try:
+                return unichr(value)
+            except ValueError:
+                # Test whether we're on the wide or narrow Python build. Check
+                # the length of a non-BMP code point
+                # (U+1F64A, SPEAK-NO-EVIL MONKEY):
+                if len("\U0001F64A") == 2:
+                    # Ensure this is within the range we can encode:
+                    if value > 0x10FFFF:
+                        raise ValueError("unichr() arg not in range(0x110000)")
+                    code = value - 0x10000
+                    if value < 0:  # Invalid code point
+                        raise
+                    lead = 0xD800 + (code >> 10)
+                    trail = 0xDC00 + (code % (1 << 10))
+                    return unichr(lead) + unichr(trail)
+                raise
 
     @property
     def value(self):
@@ -171,8 +173,9 @@ class HTMLEntity(Node):
 
     def normalize(self):
         """Return the unicode character represented by the HTML entity."""
+        chrfunc = chr if py3k else HTMLEntity._unichr
         if self.named:
-            return unichr(htmlentities.name2codepoint[self.value])
+            return chrfunc(htmlentities.name2codepoint[self.value])
         if self.hexadecimal:
-            return HTMLEntity._unichr(int(self.value, 16))
-        return HTMLEntity._unichr(int(self.value))
+            return chrfunc(int(self.value, 16))
+        return chrfunc(int(self.value))
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index 20c8fc0..4bf32e8 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -82,20 +82,20 @@ class TestHTMLEntity(TreeEqualityTestCase):
         node1 = HTMLEntity("nbsp")
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
-        self.assertEquals("nbsp", node1.value)
-        self.assertEquals("107", node2.value)
-        self.assertEquals("e9", node3.value)
+        self.assertEqual("nbsp", node1.value)
+        self.assertEqual("107", node2.value)
+        self.assertEqual("e9", node3.value)
 
         node1.value = "ffa4"
         node2.value = 72
         node3.value = "Sigma"
-        self.assertEquals("ffa4", node1.value)
+        self.assertEqual("ffa4", node1.value)
         self.assertFalse(node1.named)
         self.assertTrue(node1.hexadecimal)
-        self.assertEquals("72", node2.value)
+        self.assertEqual("72", node2.value)
         self.assertFalse(node2.named)
         self.assertFalse(node2.hexadecimal)
-        self.assertEquals("Sigma", node3.value)
+        self.assertEqual("Sigma", node3.value)
         self.assertTrue(node3.named)
         self.assertFalse(node3.hexadecimal)
 
@@ -147,12 +147,12 @@ class TestHTMLEntity(TreeEqualityTestCase):
         """test HTMLEntity.hex_char()"""
         node1 = HTMLEntity("e9")
         node2 = HTMLEntity("e9", hex_char="X")
-        self.assertEquals("x", node1.hex_char)
-        self.assertEquals("X", node2.hex_char)
+        self.assertEqual("x", node1.hex_char)
+        self.assertEqual("X", node2.hex_char)
         node1.hex_char = "X"
         node2.hex_char = "x"
-        self.assertEquals("X", node1.hex_char)
-        self.assertEquals("x", node2.hex_char)
+        self.assertEqual("X", node1.hex_char)
+        self.assertEqual("x", node2.hex_char)
         self.assertRaises(ValueError, setattr, node1, "hex_char", 123)
         self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar")
         self.assertRaises(ValueError, setattr, node1, "hex_char", True)
@@ -163,10 +163,10 @@ class TestHTMLEntity(TreeEqualityTestCase):
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
         node4 = HTMLEntity("1f648")
-        self.assertEquals("\xa0", node1.normalize())
-        self.assertEquals("k", node2.normalize())
-        self.assertEquals("é", node3.normalize())
-        self.assertEquals("\U0001F648", node4.normalize())
+        self.assertEqual("\xa0", node1.normalize())
+        self.assertEqual("k", node2.normalize())
+        self.assertEqual("é", node3.normalize())
+        self.assertEqual("\U0001F648", node4.normalize())
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 6a385b392190d9c4ce89c8cc8777efcb587972f5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 20 Apr 2013 21:44:44 -0400
Subject: [PATCH 136/180] TestParameter and a fair chunk of TestTemplate.

---
 mwparserfromhell/nodes/template.py |   9 ++-
 tests/test_html_entity.py          |  10 +--
 tests/test_parameter.py            |  79 +++++++++++++++++++++
 tests/test_template.py             | 140 +++++++++++++++++++++++++++++++++++++
 4 files changed, 228 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_parameter.py
 create mode 100644 tests/test_template.py

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index e34ba7a..eb7f3a8 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -183,11 +183,10 @@ class Template(Node):
     def get(self, name):
         """Get the parameter whose name is *name*.
 
-        The returned object is a
-        :py:class:`~.Parameter` instance. Raises :py:exc:`ValueError` if no
-        parameter has this name. Since multiple parameters can have the same
-        name, we'll return the last match, since the last parameter is the only
-        one read by the MediaWiki parser.
+        The returned object is a :py:class:`~.Parameter` instance. Raises
+        :py:exc:`ValueError` if no parameter has this name. Since multiple
+        parameters can have the same name, we'll return the last match, since
+        the last parameter is the only one read by the MediaWiki parser.
         """
         name = name.strip() if isinstance(name, basestring) else str(name)
         for param in reversed(self.params):
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index 4bf32e8..a7a9669 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -78,7 +78,7 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertEqual(res, output)
 
     def test_value(self):
-        """test HTMLEntity.value()"""
+        """test getter/setter for the value attribute"""
         node1 = HTMLEntity("nbsp")
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
@@ -110,7 +110,7 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertRaises(ValueError, setattr, node1, "value", "1114112")
 
     def test_named(self):
-        """test HTMLEntity.named()"""
+        """test getter/setter for the named attribute"""
         node1 = HTMLEntity("nbsp")
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
@@ -128,7 +128,7 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertRaises(ValueError, setattr, node3, "named", True)
 
     def test_hexadecimal(self):
-        """test HTMLEntity.hexadecimal()"""
+        """test getter/setter for the hexadecimal attribute"""
         node1 = HTMLEntity("nbsp")
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
@@ -144,7 +144,7 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertRaises(ValueError, setattr, node1, "hexadecimal", True)
 
     def test_hex_char(self):
-        """test HTMLEntity.hex_char()"""
+        """test getter/setter for the hex_char attribute"""
         node1 = HTMLEntity("e9")
         node2 = HTMLEntity("e9", hex_char="X")
         self.assertEqual("x", node1.hex_char)
@@ -158,7 +158,7 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertRaises(ValueError, setattr, node1, "hex_char", True)
 
     def test_normalize(self):
-        """test HTMLEntity.normalize()"""
+        """test getter/setter for the normalize attribute"""
         node1 = HTMLEntity("nbsp")
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
diff --git a/tests/test_parameter.py b/tests/test_parameter.py
new file mode 100644
index 0000000..b46ad71
--- /dev/null
+++ b/tests/test_parameter.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Text
+from mwparserfromhell.nodes.extras import Parameter
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+
+class TestParameter(TreeEqualityTestCase):
+    """Test cases for the Parameter node extra."""
+
+    def test_unicode(self):
+        """test Parameter.__unicode__()"""
+        node = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False)
+        self.assertEqual("foo", str(node))
+        node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")]))
+        self.assertEqual("foo=bar", str(node2))
+
+    def test_name(self):
+        """test getter/setter for the name attribute"""
+        name1 = wrap([Text("1")])
+        name2 = wrap([Text("foobar")])
+        node1 = Parameter(name1, wrap([Text("foobar")]), showkey=False)
+        node2 = Parameter(name2, wrap([Text("baz")]))
+        self.assertIs(name1, node1.name)
+        self.assertIs(name2, node2.name)
+        node1.name = "héhehé"
+        node2.name = "héhehé"
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name)
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name)
+
+    def test_value(self):
+        """test getter/setter for the value attribute"""
+        value = wrap([Text("bar")])
+        node = Parameter(wrap([Text("foo")]), value)
+        self.assertIs(value, node.value)
+        node.value = "héhehé"
+        self.assertWikicodeEqual(wrap([Text("héhehé")]), node.value)
+
+    def test_showkey(self):
+        """test getter/setter for the showkey attribute"""
+        node1 = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False)
+        node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")]))
+        self.assertFalse(node1.showkey)
+        self.assertTrue(node2.showkey)
+        node1.showkey = True
+        node2.showkey = ""
+        self.assertTrue(node1.showkey)
+        self.assertFalse(node2.showkey)
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_template.py b/tests/test_template.py
new file mode 100644
index 0000000..fde7522
--- /dev/null
+++ b/tests/test_template.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.compat import str
+from mwparserfromhell.nodes import Template, Text
+from mwparserfromhell.nodes.extras import Parameter
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+from ._test_tree_equality import TreeEqualityTestCase
+
+wrap = lambda L: Wikicode(SmartList(L))
+pgens = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), True)
+pgenh = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), False)
+
+class TestTemplate(TreeEqualityTestCase):
+    """Test cases for the Template node."""
+
+    def test_unicode(self):
+        """test Template.__unicode__()"""
+        node = Template(wrap([Text("foobar")]))
+        self.assertEqual("{{foobar}}", str(node))
+        node2 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "bar"), pgens("abc", "def")])
+        self.assertEqual("{{foo|bar|abc=def}}", str(node2))
+
+    def test_strip(self):
+        """test Template.__strip__()"""
+        node1 = Template(wrap([Text("foobar")]))
+        node2 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "bar"), pgens("abc", "def")])
+        for a in (True, False):
+            for b in (True, False):
+                self.assertEqual(None, node1.__strip__(a, b))
+                self.assertEqual(None, node2.__strip__(a, b))
+
+    def test_showtree(self):
+        """test Template.__showtree__()"""
+        output = []
+        getter, marker = object(), object()
+        get = lambda code: output.append((getter, code))
+        mark = lambda: output.append(marker)
+        node1 = Template(wrap([Text("foobar")]))
+        node2 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "bar"), pgens("abc", "def")])
+        node1.__showtree__(output.append, get, mark)
+        node2.__showtree__(output.append, get, mark)
+        valid = [
+            "{{", (getter, node1.name), "}}", "{{", (getter, node2.name),
+            "    | ", marker, (getter, node2.params[0].name), "    = ", marker,
+            (getter, node2.params[0].value), "    | ", marker,
+            (getter, node2.params[1].name), "    = ", marker,
+            (getter, node2.params[1].value), "}}"]
+        self.assertEqual(valid, output)
+
+    def test_name(self):
+        """test getter/setter for the name attribute"""
+        name = wrap([Text("foobar")])
+        node1 = Template(name)
+        node2 = Template(name, [pgenh("1", "bar")])
+        self.assertIs(name, node1.name)
+        self.assertIs(name, node2.name)
+        node1.name = "asdf"
+        node2.name = "téstïng"
+        self.assertWikicodeEqual(wrap([Text("asdf")]), node1.name)
+        self.assertWikicodeEqual(wrap([Text("téstïng")]), node2.name)
+
+    def test_params(self):
+        """test getter for the params attribute"""
+        node1 = Template(wrap([Text("foobar")]))
+        plist = [pgenh("1", "bar"), pgens("abc", "def")]
+        node2 = Template(wrap([Text("foo")]), plist)
+        self.assertEqual([], node1.params)
+        self.assertIs(plist, node2.params)
+
+    def test_has_param(self):
+        """test Template.has_param()"""
+        node1 = Template(wrap([Text("foobar")]))
+        node2 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "bar"), pgens("abc", "def")])
+        node3 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")])
+        node4 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "a"), pgens("b", " ")])
+        self.assertFalse(node1.has_param("foobar"))
+        self.assertTrue(node2.has_param(1))
+        self.assertTrue(node2.has_param("abc"))
+        self.assertFalse(node2.has_param("def"))
+        self.assertTrue(node3.has_param("1"))
+        self.assertTrue(node3.has_param("b"))
+        self.assertFalse(node4.has_param("b"))
+        self.assertTrue(node3.has_param("b", False))
+        self.assertTrue(node4.has_param("b", False))
+
+    def test_get(self):
+        """test Template.get()"""
+        node1 = Template(wrap([Text("foobar")]))
+        node2p1 = pgenh("1", "bar")
+        node2p2 = pgens("abc", "def")
+        node2 = Template(wrap([Text("foo")]), [node2p1, node2p2])
+        node3p1 = pgens("b", "c")
+        node3p2 = pgens("1", "d")
+        node3 = Template(wrap([Text("foo")]),
+                         [pgenh("1", "a"), node3p1, node3p2])
+        node4p1 = pgens("b", " ")
+        node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1])
+        self.assertRaises(ValueError, node1.get, "foobar")
+        self.assertIs(node2p1, node2.get(1))
+        self.assertIs(node2p2, node2.get("abc"))
+        self.assertRaises(ValueError, node2.get, "def")
+        self.assertIs(node3p1, node3.get("b"))
+        self.assertIs(node3p2, node3.get("1"))
+        self.assertIs(node4p1, node4.get("b"))
+
+    # add
+    # remove
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From debcb6577e80cb5c371513e73bb82f1d2c107ec1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 21 Apr 2013 20:50:05 -0400
Subject: [PATCH 137/180] Fix recursion issues by giving up at a certain point
 (closes #16).

- Stop parsing new templates if the template depth gets above
  MAX_DEPTH (40) or if we've already tried to parse over MAX_CYCLES
  (100,000) templates.
- Add two tests to ensure recursion works somewhat correctly.
- Fix parsing the string "{{" with the Python tokenizer; add a test.
---
 mwparserfromhell/parser/tokenizer.c  | 18 +++++++++++++-----
 mwparserfromhell/parser/tokenizer.h  |  5 +++++
 mwparserfromhell/parser/tokenizer.py | 24 +++++++++++++++++++-----
 tests/tokenizer/templates.mwtest     | 21 +++++++++++++++++++++
 4 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d3abb22..875263c 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -109,6 +109,8 @@ Tokenizer_push(Tokenizer* self, int context)
         return -1;
     top->next = self->topstack;
     self->topstack = top;
+    self->depth++;
+    self->cycles++;
     return 0;
 }
 
@@ -174,6 +176,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
     Textbuffer_dealloc(top->textbuffer);
     self->topstack = top->next;
     free(top);
+    self->depth--;
 }
 
 /*
@@ -1269,10 +1272,14 @@ Tokenizer_parse(Tokenizer* self, int context)
             Tokenizer_write_text(self, this);
         }
         else if (this == next && next == *"{") {
-            if (Tokenizer_parse_template_or_argument(self))
-                return NULL;
-            if (self->topstack->context & LC_FAIL_NEXT)
-                self->topstack->context ^= LC_FAIL_NEXT;
+            if (Tokenizer_CAN_RECURSE(self)) {
+                if (Tokenizer_parse_template_or_argument(self))
+                    return NULL;
+                if (self->topstack->context & LC_FAIL_NEXT)
+                    self->topstack->context ^= LC_FAIL_NEXT;
+            }
+            else
+                Tokenizer_write_text(self, this);
         }
         else if (this == *"|" && this_context & LC_TEMPLATE) {
             if (Tokenizer_handle_template_param(self))
@@ -1295,7 +1302,8 @@ Tokenizer_parse(Tokenizer* self, int context)
             Tokenizer_write_text(self, this);
         }
         else if (this == next && next == *"[") {
-            if (!(this_context & LC_WIKILINK_TITLE)) {
+            if (!(this_context & LC_WIKILINK_TITLE) &&
+                                                Tokenizer_CAN_RECURSE(self)) {
                 if (Tokenizer_parse_wikilink(self))
                     return NULL;
                 if (self->topstack->context & LC_FAIL_NEXT)
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 693538c..0730ea8 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -46,6 +46,8 @@ static const char* MARKERS[] = {
 
 #define NUM_MARKERS 18
 #define TEXTBUFFER_BLOCKSIZE 1024
+#define MAX_DEPTH 40
+#define MAX_CYCLES 100000
 #define MAX_ENTITY_SIZE 8
 
 static int route_state = 0;
@@ -165,12 +167,15 @@ typedef struct {
     Py_ssize_t head;        /* current position in text */
     Py_ssize_t length;      /* length of text */
     int global;             /* global context */
+    int depth;              /* stack recursion depth */
+    int cycles;             /* total number of stack recursions */
 } Tokenizer;
 
 
 /* Macros for accessing Tokenizer data: */
 
 #define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta)))
+#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES)
 
 
 /* Function prototypes: */
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index f995937..24eb9db 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -42,6 +42,8 @@ class Tokenizer(object):
     END = object()
     MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
                "/", "-", "!", "\n", END]
+    MAX_DEPTH = 40
+    MAX_CYCLES = 100000
     regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE)
 
     def __init__(self):
@@ -49,6 +51,8 @@ class Tokenizer(object):
         self._head = 0
         self._stacks = []
         self._global = 0
+        self._depth = 0
+        self._cycles = 0
 
     @property
     def _stack(self):
@@ -76,6 +80,8 @@ class Tokenizer(object):
     def _push(self, context=0):
         """Add a new token stack, context, and textbuffer to the list."""
         self._stacks.append([[], context, []])
+        self._depth += 1
+        self._cycles += 1
 
     def _push_textbuffer(self):
         """Push the textbuffer onto the stack as a Text node and clear it."""
@@ -90,6 +96,7 @@ class Tokenizer(object):
         stack's context with the current stack's.
         """
         self._push_textbuffer()
+        self._depth -= 1
         if keep_context:
             context = self._context
             stack = self._stacks.pop()[0]
@@ -97,6 +104,10 @@ class Tokenizer(object):
             return stack
         return self._stacks.pop()[0]
 
+    def _can_recurse(self):
+        """Return whether or not our max recursion depth has been exceeded."""
+        return self._depth < self.MAX_DEPTH and self._cycles < self.MAX_CYCLES
+
     def _fail_route(self):
         """Fail the current tokenization route.
 
@@ -418,7 +429,7 @@ class Tokenizer(object):
                 else:
                     if this == "\n":
                         self._context |= contexts.FAIL_ON_TEXT
-            elif this is not self.END or not this.isspace():
+            elif this is self.END or not this.isspace():
                 self._context |= contexts.HAS_TEXT
             return True
         else:
@@ -479,9 +490,12 @@ class Tokenizer(object):
                 else:
                     self._write_text(this)
             elif this == next == "{":
-                self._parse_template_or_argument()
-                if self._context & contexts.FAIL_NEXT:
-                    self._context ^= contexts.FAIL_NEXT
+                if self._can_recurse():
+                    self._parse_template_or_argument()
+                    if self._context & contexts.FAIL_NEXT:
+                        self._context ^= contexts.FAIL_NEXT
+                else:
+                    self._write_text("{")
             elif this == "|" and self._context & contexts.TEMPLATE:
                 self._handle_template_param()
             elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
@@ -496,7 +510,7 @@ class Tokenizer(object):
                 else:
                     self._write_text("}")
             elif this == next == "[":
-                if not self._context & contexts.WIKILINK_TITLE:
+                if not self._context & contexts.WIKILINK_TITLE and self._can_recurse():
                     self._parse_wikilink()
                     if self._context & contexts.FAIL_NEXT:
                         self._context ^= contexts.FAIL_NEXT
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index fa3c0a4..cf41bb3 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -481,6 +481,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="
 
 ---
 
+name:   incomplete_stub
+label:  incomplete templates that should fail gracefully: just an opening
+input:  "{{"
+output: [Text(text="{{")]
+
+---
+
 name:   incomplete_plain
 label:  incomplete templates that should fail gracefully: no close whatsoever
 input:  "{{stuff}} {{foobar"
@@ -597,3 +604,17 @@ name:   incomplete_nested_template_as_param_value
 label:  incomplete templates that should fail gracefully: a valid nested template as a parameter value
 input:  "{{stuff}} {{foo|bar={{baz}}"
 output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()]
+
+---
+
+name:   recursion_one_hundred_opens
+label:  test potentially dangerous recursion: one hundred template openings
+input:  "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{"
+output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")]
+
+---
+
+name:   recursion_opens_and_closes
+label:  test potentially dangerous recursion: template openings and closings
+input:  "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}"
+output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")]

From 786d6192746284ef19c166c4d9eb95050c661b1c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 24 Apr 2013 10:28:17 -0400
Subject: [PATCH 138/180] Drop force_no_field in template.remove(); implement
 test_remove().

- Also add tests for spacing in param names.
---
 mwparserfromhell/nodes/template.py | 27 ++++++++++--------
 tests/test_template.py             | 56 ++++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index eb7f3a8..751c2b1 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -142,9 +142,9 @@ class Template(Node):
                 return False
         return True
 
-    def _remove_without_field(self, param, i, force_no_field):
+    def _remove_without_field(self, param, i):
         """Return False if a parameter name should be kept, otherwise True."""
-        if not param.showkey and not force_no_field:
+        if not param.showkey:
             dependents = [not after.showkey for after in self.params[i+1:]]
             if any(dependents):
                 return False
@@ -266,22 +266,23 @@ class Template(Node):
         self.params.append(param)
         return param
 
-    def remove(self, name, keep_field=False, force_no_field=False):
+    def remove(self, name, keep_field=False):
         """Remove a parameter from the template whose name is *name*.
 
         If *keep_field* is ``True``, we will keep the parameter's name, but
         blank its value. Otherwise, we will remove the parameter completely
         *unless* other parameters are dependent on it (e.g. removing ``bar``
         from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what
-        we expected, so ``{{foo||baz}}`` will be produced instead), unless
-        *force_no_field* is also ``True``. If the parameter shows up multiple
-        times in the template, we will remove all instances of it (and keep
-        one if *keep_field* is ``True`` - that being the first instance if
-        none of the instances have dependents, otherwise that instance will be
-        kept).
+        we expected, so ``{{foo||baz}}`` will be produced instead).
+
+        If the parameter shows up multiple times in the template, we will
+        remove all instances of it (and keep one if *keep_field* is ``True`` -
+        the first instance if none have dependents, otherwise the one with
+        dependents will be kept).
         """
         name = name.strip() if isinstance(name, basestring) else str(name)
         removed = False
+        to_remove =[]
         for i, param in enumerate(self.params):
             if param.name.strip() == name:
                 if keep_field:
@@ -289,13 +290,15 @@ class Template(Node):
                         self._blank_param_value(param.value)
                         keep_field = False
                     else:
-                        self.params.remove(param)
+                        to_remove.append(param)
                 else:
-                    if self._remove_without_field(param, i, force_no_field):
-                        self.params.remove(param)
+                    if self._remove_without_field(param, i):
+                        to_remove.append(param)
                     else:
                         self._blank_param_value(param.value)
                 if not removed:
                     removed = True
         if not removed:
             raise ValueError(name)
+        for param in to_remove:
+            self.params.remove(param)
diff --git a/tests/test_template.py b/tests/test_template.py
index fde7522..ecac917 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -98,7 +98,7 @@ class TestTemplate(TreeEqualityTestCase):
         """test Template.has_param()"""
         node1 = Template(wrap([Text("foobar")]))
         node2 = Template(wrap([Text("foo")]),
-                         [pgenh("1", "bar"), pgens("abc", "def")])
+                         [pgenh("1", "bar"), pgens("\nabc ", "def")])
         node3 = Template(wrap([Text("foo")]),
                          [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")])
         node4 = Template(wrap([Text("foo")]),
@@ -108,7 +108,7 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertTrue(node2.has_param("abc"))
         self.assertFalse(node2.has_param("def"))
         self.assertTrue(node3.has_param("1"))
-        self.assertTrue(node3.has_param("b"))
+        self.assertTrue(node3.has_param(" b "))
         self.assertFalse(node4.has_param("b"))
         self.assertTrue(node3.has_param("b", False))
         self.assertTrue(node4.has_param("b", False))
@@ -123,7 +123,7 @@ class TestTemplate(TreeEqualityTestCase):
         node3p2 = pgens("1", "d")
         node3 = Template(wrap([Text("foo")]),
                          [pgenh("1", "a"), node3p1, node3p2])
-        node4p1 = pgens("b", " ")
+        node4p1 = pgens(" b", " ")
         node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1])
         self.assertRaises(ValueError, node1.get, "foobar")
         self.assertIs(node2p1, node2.get(1))
@@ -131,10 +131,56 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertRaises(ValueError, node2.get, "def")
         self.assertIs(node3p1, node3.get("b"))
         self.assertIs(node3p2, node3.get("1"))
-        self.assertIs(node4p1, node4.get("b"))
+        self.assertIs(node4p1, node4.get("b "))
 
     # add
-    # remove
+
+    def test_remove(self):
+        """test Template.remove()"""
+        node1 = Template(wrap([Text("foobar")]))
+        node2 = Template(wrap([Text("foo")]), [pgenh("1", "bar"),
+                                               pgens("abc", "def")])
+        node3 = Template(wrap([Text("foo")]), [pgenh("1", "bar"),
+                                               pgens("abc", "def")])
+        node4 = Template(wrap([Text("foo")]), [pgenh("1", "bar"),
+                                               pgenh("2", "baz")])
+        node5 = Template(wrap([Text("foo")]), [
+            pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")])
+        node6 = Template(wrap([Text("foo")]), [
+            pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")])
+        node7 = Template(wrap([Text("foo")]), [
+            pgens("1  ", "a"), pgens("  1", "b"), pgens("2", "c")])
+        node8 = Template(wrap([Text("foo")]), [
+            pgens("1  ", "a"), pgens("  1", "b"), pgens("2", "c")])
+        node9 = Template(wrap([Text("foo")]), [
+            pgens("1  ", "a"), pgenh("1", "b"), pgenh("2", "c")])
+        node10 = Template(wrap([Text("foo")]), [
+            pgens("1  ", "a"), pgenh("1", "b"), pgenh("2", "c")])
+
+        node2.remove("1")
+        node2.remove("abc")
+        node3.remove(1, keep_field=True)
+        node3.remove("abc", keep_field=True)
+        node4.remove("1", keep_field=False)
+        node5.remove("a", keep_field=False)
+        node6.remove("a", keep_field=True)
+        node7.remove(1, keep_field=True)
+        node8.remove(1, keep_field=False)
+        node9.remove(1, keep_field=True)
+        node10.remove(1, keep_field=False)
+
+        self.assertRaises(ValueError, node1.remove, 1)
+        self.assertRaises(ValueError, node1.remove, "a")
+        self.assertRaises(ValueError, node2.remove, "1")
+        self.assertEquals("{{foo}}", node2)
+        self.assertEquals("{{foo||abc=}}", node3)
+        self.assertEquals("{{foo||baz}}", node4)
+        self.assertEquals("{{foo|b=c}}", node5)
+        self.assertEquals("{{foo| a=|b=c}}", node6)
+        self.assertEquals("{{foo|1  =|2=c}}", node7)
+        self.assertEquals("{{foo|2=c}}", node8)
+        self.assertEquals("{{foo||c}}", node9)
+        self.assertEquals("{{foo||c}}", node10)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 6af2f3b0639ea515a343cbb36a38daff661f8e62 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 24 Apr 2013 17:46:53 -0400
Subject: [PATCH 139/180] assertEquals -> assertEqual

---
 tests/test_template.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_template.py b/tests/test_template.py
index ecac917..0895219 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -172,15 +172,15 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertRaises(ValueError, node1.remove, 1)
         self.assertRaises(ValueError, node1.remove, "a")
         self.assertRaises(ValueError, node2.remove, "1")
-        self.assertEquals("{{foo}}", node2)
-        self.assertEquals("{{foo||abc=}}", node3)
-        self.assertEquals("{{foo||baz}}", node4)
-        self.assertEquals("{{foo|b=c}}", node5)
-        self.assertEquals("{{foo| a=|b=c}}", node6)
-        self.assertEquals("{{foo|1  =|2=c}}", node7)
-        self.assertEquals("{{foo|2=c}}", node8)
-        self.assertEquals("{{foo||c}}", node9)
-        self.assertEquals("{{foo||c}}", node10)
+        self.assertEqual("{{foo}}", node2)
+        self.assertEqual("{{foo||abc=}}", node3)
+        self.assertEqual("{{foo||baz}}", node4)
+        self.assertEqual("{{foo|b=c}}", node5)
+        self.assertEqual("{{foo| a=|b=c}}", node6)
+        self.assertEqual("{{foo|1  =|2=c}}", node7)
+        self.assertEqual("{{foo|2=c}}", node8)
+        self.assertEqual("{{foo||c}}", node9)
+        self.assertEqual("{{foo||c}}", node10)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From b46c98b0121d6b9bbb13720a658a3a8b0237932e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 25 Apr 2013 10:22:20 -0400
Subject: [PATCH 140/180] Clean up template.add(); add a before param but do
 not implement yet.

---
 mwparserfromhell/nodes/template.py | 47 +++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 751c2b1..4b74971 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -194,20 +194,30 @@ class Template(Node):
                 return param
         raise ValueError(name)
 
-    def add(self, name, value, showkey=None, force_nonconformity=False):
+    def add(self, name, value, showkey=None, before=None,
+            preserve_spacing=True):
         """Add a parameter to the template with a given *name* and *value*.
 
         *name* and *value* can be anything parasable by
-        :py:func:`.utils.parse_anything`; pipes (and equal signs, if
-        appropriate) are automatically escaped from *value* where applicable.
+        :py:func:`.utils.parse_anything`; pipes and equal signs are
+        automatically escaped from *value* when appropriate.
+
         If *showkey* is given, this will determine whether or not to show the
         parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of
         ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent
         guess. If *name* is already a parameter, we'll replace its value while
-        keeping the same spacing rules unless *force_nonconformity* is
-        ``True``. We will also try to guess the dominant spacing convention
-        when adding a new parameter using :py:meth:`_get_spacing_conventions`
-        unless *force_nonconformity* is ``True``.
+        keeping the same spacing rules. We will also try to guess the dominant
+        spacing convention when adding a new parameter using
+        :py:meth:`_get_spacing_conventions`.
+
+        If *before* is given (either a :py:class:`~.Parameter` object or a
+        name), then we will place the parameter immediately before this one.
+        Otherwise, it will be added at the end. This is ignored if the
+        parameter already exists.
+
+        If *preserve_spacing* is ``False``, we will avoid preserving spacing
+        conventions when changing the value of an existing parameter or when
+        adding a new one.
         """
         name, value = parse_anything(name), parse_anything(value)
         self._surface_escape(value, "|")
@@ -220,10 +230,10 @@ class Template(Node):
                     self._surface_escape(value, "=")
                 existing.showkey = showkey
             nodes = existing.value.nodes
-            if force_nonconformity:
-                existing.value = value
-            else:
+            if preserve_spacing:
                 existing.value = parse_anything([nodes[0], value, nodes[1]])
+            else:
+                existing.value = value
             return existing
 
         if showkey is None:
@@ -245,22 +255,11 @@ class Template(Node):
         if not showkey:
             self._surface_escape(value, "=")
 
-        if not force_nonconformity:
+        if preserve_spacing:
             before_n, after_n = self._get_spacing_conventions(use_names=True)
-            if before_n and after_n:
-                name = parse_anything([before_n, name, after_n])
-            elif before_n:
-                name = parse_anything([before_n, name])
-            elif after_n:
-                name = parse_anything([name, after_n])
-
             before_v, after_v = self._get_spacing_conventions(use_names=False)
-            if before_v and after_v:
-                value = parse_anything([before_v, value, after_v])
-            elif before_v:
-                value = parse_anything([before_v, value])
-            elif after_v:
-                value = parse_anything([value, after_v])
+            name = parse_anything([before_n, name, after_n])
+            value = parse_anything([before_v, value, after_v])
 
         param = Parameter(name, value, showkey)
         self.params.append(param)

From 2ca3b2805e5a346600508e3e622bddad6be38f93 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 26 Apr 2013 10:39:53 -0400
Subject: [PATCH 141/180] Implement 'before' parameter for Template.add()
 (closes #21)

---
 mwparserfromhell/nodes/template.py | 11 ++++++++---
 tests/test_template.py             | 22 +++++++++++++++++++++-
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 4b74971..9d28be4 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -226,9 +226,9 @@ class Template(Node):
             self.remove(name, keep_field=True)
             existing = self.get(name)
             if showkey is not None:
-                if not showkey:
-                    self._surface_escape(value, "=")
                 existing.showkey = showkey
+            if not existing.showkey:
+                self._surface_escape(value, "=")
             nodes = existing.value.nodes
             if preserve_spacing:
                 existing.value = parse_anything([nodes[0], value, nodes[1]])
@@ -262,7 +262,12 @@ class Template(Node):
             value = parse_anything([before_v, value, after_v])
 
         param = Parameter(name, value, showkey)
-        self.params.append(param)
+        if before:
+            if not isinstance(before, Parameter):
+                before = self.get(before)
+            self.params.insert(self.params.index(before), param)
+        else:
+            self.params.append(param)
         return param
 
     def remove(self, name, keep_field=False):
diff --git a/tests/test_template.py b/tests/test_template.py
index 0895219..a1661f2 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -133,7 +133,27 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertIs(node3p2, node3.get("1"))
         self.assertIs(node4p1, node4.get("b "))
 
-    # add
+    def test_add(self):
+        """test Template.add()"""
+        # add new param with showkey to end
+        # add new param without showkey to end
+        # add new param to end with an escapable |
+        # add new param with showkey to end with an escapable =
+        # add new param without showkey to end with an escapable =
+        # add new param with showkey to end preserving spacing (x3)
+        # add new param without showkey to end not preserving spacing
+        # add new param guessing showkey where key is to be shown
+        # add new param guessing showkey where key is to be shown with an escapable =
+        # add new param guessing showkey where key is not to be shown
+        # add new param guessing showkey where key is not to be shown with an escapable =
+        # add existing parameter without modifying showkey
+        # add existing parameter without modifying showkey with an escapable =
+        # add existing parameter with modifying showkey
+        # add existing parameter with modifying showkey with an escapable =
+        # add existing parameter preserving spacing (x3)
+        # add existing parameter not preserving spacing
+        # add existing parameter when there are multiple params involved
+        # add existing parameter when there are multiple params involved; params with dependencies
 
     def test_remove(self):
         """test Template.remove()"""

From 81849013bc31b12b1a82a98ff0b4a25ccb597822 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 2 May 2013 11:01:13 -0400
Subject: [PATCH 142/180] Finishing tests for Templates; some fixes.

---
 mwparserfromhell/nodes/template.py |  21 ++-
 tests/test_template.py             | 262 +++++++++++++++++++++++++++++--------
 2 files changed, 220 insertions(+), 63 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 9d28be4..3834d41 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -81,7 +81,7 @@ class Template(Node):
         in parameter names or values so they are not mistaken for new
         parameters.
         """
-        replacement = HTMLEntity(value=ord(char))
+        replacement = str(HTMLEntity(value=ord(char)))
         for node in code.filter_text(recursive=False):
             if char in node:
                 code.replace(node, node.replace(char, replacement))
@@ -107,7 +107,7 @@ class Template(Node):
             values = tuple(theories.values())
             best = max(values)
             confidence = float(best) / sum(values)
-            if confidence > 0.75:
+            if confidence >= 0.75:
                 return tuple(theories.keys())[values.index(best)]
 
     def _get_spacing_conventions(self, use_names):
@@ -205,15 +205,19 @@ class Template(Node):
         If *showkey* is given, this will determine whether or not to show the
         parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of
         ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent
-        guess. If *name* is already a parameter, we'll replace its value while
-        keeping the same spacing rules. We will also try to guess the dominant
-        spacing convention when adding a new parameter using
+        guess.
+
+        If *name* is already a parameter in the template, we'll replace its
+        value while keeping the same whitespace around it. We will also try to
+        guess the dominant spacing convention when adding a new parameter using
         :py:meth:`_get_spacing_conventions`.
 
         If *before* is given (either a :py:class:`~.Parameter` object or a
         name), then we will place the parameter immediately before this one.
-        Otherwise, it will be added at the end. This is ignored if the
-        parameter already exists.
+        Otherwise, it will be added at the end. If *before* is a name and
+        exists multiple times in the template, we will place it before the last
+        occurance. If *before* is not in the template, :py:exc:`ValueError` is
+        raised. The argument is ignored if the new parameter already exists.
 
         If *preserve_spacing* is ``False``, we will avoid preserving spacing
         conventions when changing the value of an existing parameter or when
@@ -231,6 +235,9 @@ class Template(Node):
                 self._surface_escape(value, "=")
             nodes = existing.value.nodes
             if preserve_spacing:
+                for i in range(2):  # Ignore empty text nodes
+                    if not nodes[i]:
+                        nodes[i] = None
                 existing.value = parse_anything([nodes[0], value, nodes[1]])
             else:
                 existing.value = value
diff --git a/tests/test_template.py b/tests/test_template.py
index a1661f2..3eb88ad 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -24,31 +24,32 @@ from __future__ import unicode_literals
 import unittest
 
 from mwparserfromhell.compat import str
-from mwparserfromhell.nodes import Template, Text
+from mwparserfromhell.nodes import HTMLEntity, Template, Text
 from mwparserfromhell.nodes.extras import Parameter
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 from ._test_tree_equality import TreeEqualityTestCase
 
 wrap = lambda L: Wikicode(SmartList(L))
-pgens = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), True)
-pgenh = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), False)
+wraptext = lambda t: wrap([Text(t)])
+pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
+pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False)
 
 class TestTemplate(TreeEqualityTestCase):
     """Test cases for the Template node."""
 
     def test_unicode(self):
         """test Template.__unicode__()"""
-        node = Template(wrap([Text("foobar")]))
+        node = Template(wraptext("foobar"))
         self.assertEqual("{{foobar}}", str(node))
-        node2 = Template(wrap([Text("foo")]),
+        node2 = Template(wraptext("foo"),
                          [pgenh("1", "bar"), pgens("abc", "def")])
         self.assertEqual("{{foo|bar|abc=def}}", str(node2))
 
     def test_strip(self):
         """test Template.__strip__()"""
-        node1 = Template(wrap([Text("foobar")]))
-        node2 = Template(wrap([Text("foo")]),
+        node1 = Template(wraptext("foobar"))
+        node2 = Template(wraptext("foo"),
                          [pgenh("1", "bar"), pgens("abc", "def")])
         for a in (True, False):
             for b in (True, False):
@@ -61,8 +62,8 @@ class TestTemplate(TreeEqualityTestCase):
         getter, marker = object(), object()
         get = lambda code: output.append((getter, code))
         mark = lambda: output.append(marker)
-        node1 = Template(wrap([Text("foobar")]))
-        node2 = Template(wrap([Text("foo")]),
+        node1 = Template(wraptext("foobar"))
+        node2 = Template(wraptext("foo"),
                          [pgenh("1", "bar"), pgens("abc", "def")])
         node1.__showtree__(output.append, get, mark)
         node2.__showtree__(output.append, get, mark)
@@ -76,33 +77,32 @@ class TestTemplate(TreeEqualityTestCase):
 
     def test_name(self):
         """test getter/setter for the name attribute"""
-        name = wrap([Text("foobar")])
+        name = wraptext("foobar")
         node1 = Template(name)
         node2 = Template(name, [pgenh("1", "bar")])
         self.assertIs(name, node1.name)
         self.assertIs(name, node2.name)
         node1.name = "asdf"
         node2.name = "téstïng"
-        self.assertWikicodeEqual(wrap([Text("asdf")]), node1.name)
-        self.assertWikicodeEqual(wrap([Text("téstïng")]), node2.name)
+        self.assertWikicodeEqual(wraptext("asdf"), node1.name)
+        self.assertWikicodeEqual(wraptext("téstïng"), node2.name)
 
     def test_params(self):
         """test getter for the params attribute"""
-        node1 = Template(wrap([Text("foobar")]))
+        node1 = Template(wraptext("foobar"))
         plist = [pgenh("1", "bar"), pgens("abc", "def")]
-        node2 = Template(wrap([Text("foo")]), plist)
+        node2 = Template(wraptext("foo"), plist)
         self.assertEqual([], node1.params)
         self.assertIs(plist, node2.params)
 
     def test_has_param(self):
         """test Template.has_param()"""
-        node1 = Template(wrap([Text("foobar")]))
-        node2 = Template(wrap([Text("foo")]),
+        node1 = Template(wraptext("foobar"))
+        node2 = Template(wraptext("foo"),
                          [pgenh("1", "bar"), pgens("\nabc ", "def")])
-        node3 = Template(wrap([Text("foo")]),
+        node3 = Template(wraptext("foo"),
                          [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")])
-        node4 = Template(wrap([Text("foo")]),
-                         [pgenh("1", "a"), pgens("b", " ")])
+        node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")])
         self.assertFalse(node1.has_param("foobar"))
         self.assertTrue(node2.has_param(1))
         self.assertTrue(node2.has_param("abc"))
@@ -115,16 +115,15 @@ class TestTemplate(TreeEqualityTestCase):
 
     def test_get(self):
         """test Template.get()"""
-        node1 = Template(wrap([Text("foobar")]))
+        node1 = Template(wraptext("foobar"))
         node2p1 = pgenh("1", "bar")
         node2p2 = pgens("abc", "def")
-        node2 = Template(wrap([Text("foo")]), [node2p1, node2p2])
+        node2 = Template(wraptext("foo"), [node2p1, node2p2])
         node3p1 = pgens("b", "c")
         node3p2 = pgens("1", "d")
-        node3 = Template(wrap([Text("foo")]),
-                         [pgenh("1", "a"), node3p1, node3p2])
+        node3 = Template(wraptext("foo"), [pgenh("1", "a"), node3p1, node3p2])
         node4p1 = pgens(" b", " ")
-        node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1])
+        node4 = Template(wraptext("foo"), [pgenh("1", "a"), node4p1])
         self.assertRaises(ValueError, node1.get, "foobar")
         self.assertIs(node2p1, node2.get(1))
         self.assertIs(node2p2, node2.get("abc"))
@@ -135,46 +134,197 @@ class TestTemplate(TreeEqualityTestCase):
 
     def test_add(self):
         """test Template.add()"""
-        # add new param with showkey to end
-        # add new param without showkey to end
-        # add new param to end with an escapable |
-        # add new param with showkey to end with an escapable =
-        # add new param without showkey to end with an escapable =
-        # add new param with showkey to end preserving spacing (x3)
-        # add new param without showkey to end not preserving spacing
-        # add new param guessing showkey where key is to be shown
-        # add new param guessing showkey where key is to be shown with an escapable =
-        # add new param guessing showkey where key is not to be shown
-        # add new param guessing showkey where key is not to be shown with an escapable =
-        # add existing parameter without modifying showkey
-        # add existing parameter without modifying showkey with an escapable =
-        # add existing parameter with modifying showkey
-        # add existing parameter with modifying showkey with an escapable =
-        # add existing parameter preserving spacing (x3)
-        # add existing parameter not preserving spacing
-        # add existing parameter when there are multiple params involved
-        # add existing parameter when there are multiple params involved; params with dependencies
+        node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")])
+        node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")])
+        node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")])
+        node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")])
+        node5 = Template(wraptext("a"), [pgens("b", "c"),
+                                         pgens("    d ", "e")])
+        node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"),
+                                         pgens("b", "e")])
+        node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")])
+        node8p = pgenh("1", "d")
+        node8 = Template(wraptext("a"), [pgens("b", "c"), node8p])
+        node9 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")])
+        node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")])
+        node11 = Template(wraptext("a"), [pgens("b", "c")])
+        node12 = Template(wraptext("a"), [pgens("b", "c")])
+        node13 = Template(wraptext("a"), [pgens("\nb ", " c"),
+                                          pgens("\nd ", " e"),
+                                          pgens("\nf ", " g")])
+        node14 = Template(wraptext("a\n"), [pgens("b ", "c\n"),
+                                            pgens("d ", " e"),
+                                            pgens("f ", "g\n"),
+                                            pgens("h ", " i\n")])
+        node15 = Template(wraptext("a"), [pgens("b  ", " c\n"),
+                                          pgens("\nd  ", " e"),
+                                          pgens("\nf  ", "g ")])
+        node16 = Template(wraptext("a"), [pgens("\nb ", " c"),
+                                          pgens("\nd ", " e"),
+                                          pgens("\nf ", " g")])
+        node17 = Template(wraptext("a"), [pgens("\nb ", " c"),
+                                          pgens("\nd ", " e"),
+                                          pgens("\nf ", " g")])
+        node18 = Template(wraptext("a\n"), [pgens("b ", "c\n"),
+                                          pgens("d ", " e"),
+                                          pgens("f ", "g\n"),
+                                          pgens("h ", " i\n")])
+        node19 = Template(wraptext("a"), [pgens("b  ", " c\n"),
+                                          pgens("\nd  ", " e"),
+                                          pgens("\nf  ", "g ")])
+        node20 = Template(wraptext("a"), [pgens("\nb ", " c"),
+                                          pgens("\nd ", " e"),
+                                          pgens("\nf ", " g")])
+        node21 = Template(wraptext("a"), [pgenh("1", "b")])
+        node22 = Template(wraptext("a"), [pgenh("1", "b")])
+        node23 = Template(wraptext("a"), [pgenh("1", "b")])
+        node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
+                                          pgenh("3", "d"), pgenh("4", "e")])
+        node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
+                                          pgens("4", "d"), pgens("5", "e")])
+        node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
+                                          pgens("4", "d"), pgens("5", "e")])
+        node27 = Template(wraptext("a"), [pgenh("1", "b")])
+        node28 = Template(wraptext("a"), [pgenh("1", "b")])
+        node29 = Template(wraptext("a"), [pgens("b", "c")])
+        node30 = Template(wraptext("a"), [pgenh("1", "b")])
+        node31 = Template(wraptext("a"), [pgenh("1", "b")])
+        node32 = Template(wraptext("a"), [pgens("1", "b")])
+        node33 = Template(wraptext("a"), [pgens("\nb ", " c"),
+                                          pgens("\nd ", " e"),
+                                          pgens("\nf ", " g")])
+        node34 = Template(wraptext("a\n"), [pgens("b ", "c\n"),
+                                            pgens("d ", " e"),
+                                            pgens("f ", "g\n"),
+                                            pgens("h ", " i\n")])
+        node35 = Template(wraptext("a"), [pgens("b  ", " c\n"),
+                                          pgens("\nd  ", " e"),
+                                          pgens("\nf  ", "g ")])
+        node36 = Template(wraptext("a"), [pgens("\nb ", " c "),
+                                          pgens("\nd ", " e "),
+                                          pgens("\nf ", " g ")])
+        node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
+                                          pgens("b", "f"), pgens("b", "h"),
+                                          pgens("i", "j")])
+        node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
+                                          pgens("b", "f"), pgens("b", "h"),
+                                          pgens("i", "j")])
+        node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"),
+                                          pgens("1", "c"), pgens("2", "d")])
+        node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"),
+                                          pgenh("1", "c"), pgenh("2", "d")])
+        node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
+                                          pgens("f", "g")])
+
+        node1.add("e", "f", showkey=True)
+        node2.add(2, "g", showkey=False)
+        node3.add("e", "foo|bar", showkey=True)
+        node4.add("e", "f", showkey=True, before="b")
+        node5.add("f", "g", showkey=True, before=" d     ")
+        node6.add("f", "g", showkey=True, before="b")
+        self.assertRaises(ValueError, node7.add, "e", "f", showkey=True,
+                          before="q")
+        node8.add("e", "f", showkey=True, before=node8p)
+        node9.add("e", "f", showkey=True, before=pgenh("1", "d"))
+        self.assertRaises(ValueError, node10.add, "e", "f", showkey=True,
+                          before=pgenh("1", "d"))
+        node11.add("d", "foo=bar", showkey=True)
+        node12.add("1", "foo=bar", showkey=False)
+        node13.add("h", "i", showkey=True)
+        node14.add("j", "k", showkey=True)
+        node15.add("h", "i", showkey=True)
+        node16.add("h", "i", showkey=True, preserve_spacing=False)
+        node17.add("h", "i", showkey=False)
+        node18.add("j", "k", showkey=False)
+        node19.add("h", "i", showkey=False)
+        node20.add("h", "i", showkey=False, preserve_spacing=False)
+        node21.add("2", "c")
+        node22.add("3", "c")
+        node23.add("c", "d")
+        node24.add("5", "f")
+        node25.add("3", "f")
+        node26.add("6", "f")
+        node27.add("c", "foo=bar")
+        node28.add("2", "foo=bar")
+        node29.add("b", "d")
+        node30.add("1", "foo=bar")
+        node31.add("1", "foo=bar", showkey=True)
+        node32.add("1", "foo=bar", showkey=False)
+        node33.add("d", "foo")
+        node34.add("f", "foo")
+        node35.add("f", "foo")
+        node36.add("d", "foo", preserve_spacing=False)
+        node37.add("b", "k")
+        node38.add("1", "e")
+        node39.add("1", "e")
+        node40.add("d", "h", before="b")
+
+        self.assertEquals("{{a|b=c|d|e=f}}", node1)
+        self.assertEquals("{{a|b=c|d|g}}", node2)
+        self.assertEquals("{{a|b=c|d|e=foo&#124;bar}}", node3)
+        self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity)
+        self.assertEquals("{{a|e=f|b=c|d}}", node4)
+        self.assertEquals("{{a|b=c|f=g|    d =e}}", node5)
+        self.assertEquals("{{a|b=c|b=d|f=g|b=e}}", node6)
+        self.assertEquals("{{a|b=c|d}}", node7)
+        self.assertEquals("{{a|b=c|e=f|d}}", node8)
+        self.assertEquals("{{a|b=c|e=f|d}}", node9)
+        self.assertEquals("{{a|b=c|e}}", node10)
+        self.assertEquals("{{a|b=c|d=foo=bar}}", node11)
+        self.assertEquals("{{a|b=c|foo&#61;bar}}", node12)
+        self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity)
+        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13)
+        self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
+        self.assertEquals("{{a|b  = c\n|\nd  = e|\nf  =g |h  =i}}", node15)
+        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
+        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g| i}}", node17)
+        self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18)
+        self.assertEquals("{{a|b  = c\n|\nd  = e|\nf  =g |i}}", node19)
+        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|i}}", node20)
+        self.assertEquals("{{a|b|c}}", node21)
+        self.assertEquals("{{a|b|3=c}}", node22)
+        self.assertEquals("{{a|b|c=d}}", node23)
+        self.assertEquals("{{a|b|c|d|e|f}}", node24)
+        self.assertEquals("{{a|b|c|4=d|5=e|f}}", node25)
+        self.assertEquals("{{a|b|c|4=d|5=e|6=f}}", node26)
+        self.assertEquals("{{a|b|c=foo=bar}}", node27)
+        self.assertEquals("{{a|b|foo&#61;bar}}", node28)
+        self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity)
+        self.assertEquals("{{a|b=d}}", node29)
+        self.assertEquals("{{a|foo&#61;bar}}", node30)
+        self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity)
+        self.assertEquals("{{a|1=foo=bar}}", node31)
+        self.assertEquals("{{a|foo&#61;bar}}", node32)
+        self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity)
+        self.assertEquals("{{a|\nb = c|\nd = foo|\nf = g}}", node33)
+        self.assertEquals("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34)
+        self.assertEquals("{{a|b  = c\n|\nd  = e|\nf  =foo }}", node35)
+        self.assertEquals("{{a|\nb = c |\nd =foo|\nf = g }}", node36)
+        self.assertEquals("{{a|b=k|d=e|i=j}}", node37)
+        self.assertEquals("{{a|1=e|x=y|2=d}}", node38)
+        self.assertEquals("{{a|x=y|e|d}}", node39)
+        self.assertEquals("{{a|b=c|d=h|f=g}}", node40)
 
     def test_remove(self):
         """test Template.remove()"""
-        node1 = Template(wrap([Text("foobar")]))
-        node2 = Template(wrap([Text("foo")]), [pgenh("1", "bar"),
-                                               pgens("abc", "def")])
-        node3 = Template(wrap([Text("foo")]), [pgenh("1", "bar"),
-                                               pgens("abc", "def")])
-        node4 = Template(wrap([Text("foo")]), [pgenh("1", "bar"),
-                                               pgenh("2", "baz")])
-        node5 = Template(wrap([Text("foo")]), [
+        node1 = Template(wraptext("foobar"))
+        node2 = Template(wraptext("foo"), [pgenh("1", "bar"),
+                                           pgens("abc", "def")])
+        node3 = Template(wraptext("foo"), [pgenh("1", "bar"),
+                                           pgens("abc", "def")])
+        node4 = Template(wraptext("foo"), [pgenh("1", "bar"),
+                                           pgenh("2", "baz")])
+        node5 = Template(wraptext("foo"), [
             pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")])
-        node6 = Template(wrap([Text("foo")]), [
+        node6 = Template(wraptext("foo"), [
             pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")])
-        node7 = Template(wrap([Text("foo")]), [
+        node7 = Template(wraptext("foo"), [
             pgens("1  ", "a"), pgens("  1", "b"), pgens("2", "c")])
-        node8 = Template(wrap([Text("foo")]), [
+        node8 = Template(wraptext("foo"), [
             pgens("1  ", "a"), pgens("  1", "b"), pgens("2", "c")])
-        node9 = Template(wrap([Text("foo")]), [
+        node9 = Template(wraptext("foo"), [
             pgens("1  ", "a"), pgenh("1", "b"), pgenh("2", "c")])
-        node10 = Template(wrap([Text("foo")]), [
+        node10 = Template(wraptext("foo"), [
             pgens("1  ", "a"), pgenh("1", "b"), pgenh("2", "c")])
 
         node2.remove("1")

From 1d26c4b312207f956c29c224f34814e486607757 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 2 May 2013 22:40:35 -0400
Subject: [PATCH 143/180] Why do I always make this mistake?

---
 tests/test_template.py | 80 +++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/tests/test_template.py b/tests/test_template.py
index 3eb88ad..b9fd6e8 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -259,51 +259,51 @@ class TestTemplate(TreeEqualityTestCase):
         node39.add("1", "e")
         node40.add("d", "h", before="b")
 
-        self.assertEquals("{{a|b=c|d|e=f}}", node1)
-        self.assertEquals("{{a|b=c|d|g}}", node2)
-        self.assertEquals("{{a|b=c|d|e=foo&#124;bar}}", node3)
+        self.assertEqual("{{a|b=c|d|e=f}}", node1)
+        self.assertEqual("{{a|b=c|d|g}}", node2)
+        self.assertEqual("{{a|b=c|d|e=foo&#124;bar}}", node3)
         self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity)
-        self.assertEquals("{{a|e=f|b=c|d}}", node4)
-        self.assertEquals("{{a|b=c|f=g|    d =e}}", node5)
-        self.assertEquals("{{a|b=c|b=d|f=g|b=e}}", node6)
-        self.assertEquals("{{a|b=c|d}}", node7)
-        self.assertEquals("{{a|b=c|e=f|d}}", node8)
-        self.assertEquals("{{a|b=c|e=f|d}}", node9)
-        self.assertEquals("{{a|b=c|e}}", node10)
-        self.assertEquals("{{a|b=c|d=foo=bar}}", node11)
-        self.assertEquals("{{a|b=c|foo&#61;bar}}", node12)
+        self.assertEqual("{{a|e=f|b=c|d}}", node4)
+        self.assertEqual("{{a|b=c|f=g|    d =e}}", node5)
+        self.assertEqual("{{a|b=c|b=d|f=g|b=e}}", node6)
+        self.assertEqual("{{a|b=c|d}}", node7)
+        self.assertEqual("{{a|b=c|e=f|d}}", node8)
+        self.assertEqual("{{a|b=c|e=f|d}}", node9)
+        self.assertEqual("{{a|b=c|e}}", node10)
+        self.assertEqual("{{a|b=c|d=foo=bar}}", node11)
+        self.assertEqual("{{a|b=c|foo&#61;bar}}", node12)
         self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity)
-        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13)
-        self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
-        self.assertEquals("{{a|b  = c\n|\nd  = e|\nf  =g |h  =i}}", node15)
-        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
-        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g| i}}", node17)
-        self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18)
-        self.assertEquals("{{a|b  = c\n|\nd  = e|\nf  =g |i}}", node19)
-        self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|i}}", node20)
-        self.assertEquals("{{a|b|c}}", node21)
-        self.assertEquals("{{a|b|3=c}}", node22)
-        self.assertEquals("{{a|b|c=d}}", node23)
-        self.assertEquals("{{a|b|c|d|e|f}}", node24)
-        self.assertEquals("{{a|b|c|4=d|5=e|f}}", node25)
-        self.assertEquals("{{a|b|c|4=d|5=e|6=f}}", node26)
-        self.assertEquals("{{a|b|c=foo=bar}}", node27)
-        self.assertEquals("{{a|b|foo&#61;bar}}", node28)
+        self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13)
+        self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
+        self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =g |h  =i}}", node15)
+        self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
+        self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17)
+        self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18)
+        self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =g |i}}", node19)
+        self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20)
+        self.assertEqual("{{a|b|c}}", node21)
+        self.assertEqual("{{a|b|3=c}}", node22)
+        self.assertEqual("{{a|b|c=d}}", node23)
+        self.assertEqual("{{a|b|c|d|e|f}}", node24)
+        self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25)
+        self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26)
+        self.assertEqual("{{a|b|c=foo=bar}}", node27)
+        self.assertEqual("{{a|b|foo&#61;bar}}", node28)
         self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity)
-        self.assertEquals("{{a|b=d}}", node29)
-        self.assertEquals("{{a|foo&#61;bar}}", node30)
+        self.assertEqual("{{a|b=d}}", node29)
+        self.assertEqual("{{a|foo&#61;bar}}", node30)
         self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity)
-        self.assertEquals("{{a|1=foo=bar}}", node31)
-        self.assertEquals("{{a|foo&#61;bar}}", node32)
+        self.assertEqual("{{a|1=foo=bar}}", node31)
+        self.assertEqual("{{a|foo&#61;bar}}", node32)
         self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity)
-        self.assertEquals("{{a|\nb = c|\nd = foo|\nf = g}}", node33)
-        self.assertEquals("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34)
-        self.assertEquals("{{a|b  = c\n|\nd  = e|\nf  =foo }}", node35)
-        self.assertEquals("{{a|\nb = c |\nd =foo|\nf = g }}", node36)
-        self.assertEquals("{{a|b=k|d=e|i=j}}", node37)
-        self.assertEquals("{{a|1=e|x=y|2=d}}", node38)
-        self.assertEquals("{{a|x=y|e|d}}", node39)
-        self.assertEquals("{{a|b=c|d=h|f=g}}", node40)
+        self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33)
+        self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34)
+        self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =foo }}", node35)
+        self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36)
+        self.assertEqual("{{a|b=k|d=e|i=j}}", node37)
+        self.assertEqual("{{a|1=e|x=y|2=d}}", node38)
+        self.assertEqual("{{a|x=y|e|d}}", node39)
+        self.assertEqual("{{a|b=c|d=h|f=g}}", node40)
 
     def test_remove(self):
         """test Template.remove()"""

From 3b78541eeb19cf0cb528cd856e8f3048d354fb4e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 3 May 2013 10:57:30 -0400
Subject: [PATCH 144/180] Clean up indentation.

---
 tests/test_template.py | 66 +++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

diff --git a/tests/test_template.py b/tests/test_template.py
index b9fd6e8..31ed33b 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -149,32 +149,24 @@ class TestTemplate(TreeEqualityTestCase):
         node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")])
         node11 = Template(wraptext("a"), [pgens("b", "c")])
         node12 = Template(wraptext("a"), [pgens("b", "c")])
-        node13 = Template(wraptext("a"), [pgens("\nb ", " c"),
-                                          pgens("\nd ", " e"),
-                                          pgens("\nf ", " g")])
-        node14 = Template(wraptext("a\n"), [pgens("b ", "c\n"),
-                                            pgens("d ", " e"),
-                                            pgens("f ", "g\n"),
-                                            pgens("h ", " i\n")])
-        node15 = Template(wraptext("a"), [pgens("b  ", " c\n"),
-                                          pgens("\nd  ", " e"),
-                                          pgens("\nf  ", "g ")])
-        node16 = Template(wraptext("a"), [pgens("\nb ", " c"),
-                                          pgens("\nd ", " e"),
-                                          pgens("\nf ", " g")])
-        node17 = Template(wraptext("a"), [pgens("\nb ", " c"),
-                                          pgens("\nd ", " e"),
-                                          pgens("\nf ", " g")])
-        node18 = Template(wraptext("a\n"), [pgens("b ", "c\n"),
-                                          pgens("d ", " e"),
-                                          pgens("f ", "g\n"),
-                                          pgens("h ", " i\n")])
-        node19 = Template(wraptext("a"), [pgens("b  ", " c\n"),
-                                          pgens("\nd  ", " e"),
-                                          pgens("\nf  ", "g ")])
-        node20 = Template(wraptext("a"), [pgens("\nb ", " c"),
-                                          pgens("\nd ", " e"),
-                                          pgens("\nf ", " g")])
+        node13 = Template(wraptext("a"), [
+            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
+        node14 = Template(wraptext("a\n"), [
+            pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"),
+            pgens("h ", " i\n")])
+        node15 = Template(wraptext("a"), [
+            pgens("b  ", " c\n"), pgens("\nd  ", " e"), pgens("\nf  ", "g ")])
+        node16 = Template(wraptext("a"), [
+            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
+        node17 = Template(wraptext("a"), [
+            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
+        node18 = Template(wraptext("a\n"), [
+            pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"),
+            pgens("h ", " i\n")])
+        node19 = Template(wraptext("a"), [
+            pgens("b  ", " c\n"), pgens("\nd  ", " e"), pgens("\nf  ", "g ")])
+        node20 = Template(wraptext("a"), [
+            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
         node21 = Template(wraptext("a"), [pgenh("1", "b")])
         node22 = Template(wraptext("a"), [pgenh("1", "b")])
         node23 = Template(wraptext("a"), [pgenh("1", "b")])
@@ -190,19 +182,15 @@ class TestTemplate(TreeEqualityTestCase):
         node30 = Template(wraptext("a"), [pgenh("1", "b")])
         node31 = Template(wraptext("a"), [pgenh("1", "b")])
         node32 = Template(wraptext("a"), [pgens("1", "b")])
-        node33 = Template(wraptext("a"), [pgens("\nb ", " c"),
-                                          pgens("\nd ", " e"),
-                                          pgens("\nf ", " g")])
-        node34 = Template(wraptext("a\n"), [pgens("b ", "c\n"),
-                                            pgens("d ", " e"),
-                                            pgens("f ", "g\n"),
-                                            pgens("h ", " i\n")])
-        node35 = Template(wraptext("a"), [pgens("b  ", " c\n"),
-                                          pgens("\nd  ", " e"),
-                                          pgens("\nf  ", "g ")])
-        node36 = Template(wraptext("a"), [pgens("\nb ", " c "),
-                                          pgens("\nd ", " e "),
-                                          pgens("\nf ", " g ")])
+        node33 = Template(wraptext("a"), [
+            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
+        node34 = Template(wraptext("a\n"), [
+            pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"),
+            pgens("h ", " i\n")])
+        node35 = Template(wraptext("a"), [
+            pgens("b  ", " c\n"), pgens("\nd  ", " e"), pgens("\nf  ", "g ")])
+        node36 = Template(wraptext("a"), [
+            pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")])
         node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
                                           pgens("b", "f"), pgens("b", "h"),
                                           pgens("i", "j")])

From 7853e207451a69081573624856025f2a3f750f83 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 3 May 2013 23:43:57 -0400
Subject: [PATCH 145/180] Move wrap() and wraptext() TO _test_tree_equality.

---
 tests/_test_tree_equality.py | 4 ++++
 tests/test_argument.py       | 6 +-----
 tests/test_builder.py        | 6 +-----
 tests/test_heading.py        | 6 +-----
 tests/test_html_entity.py    | 6 +-----
 tests/test_parameter.py      | 6 +-----
 tests/test_template.py       | 6 +-----
 tests/test_wikilink.py       | 6 +-----
 8 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 758a72e..a12bd68 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -26,8 +26,12 @@ from unittest import TestCase
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                     Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
+from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 
+wrap = lambda L: Wikicode(SmartList(L))
+wraptext = lambda t: wrap([Text(t)])
+
 class TreeEqualityTestCase(TestCase):
     """A base test case with support for comparing the equality of node trees.
 
diff --git a/tests/test_argument.py b/tests/test_argument.py
index e0524c4..ae5ae62 100644
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -25,12 +25,8 @@ import unittest
 
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Argument, Text
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
-
-wrap = lambda L: Wikicode(SmartList(L))
+from ._test_tree_equality import TreeEqualityTestCase, wrap
 
 class TestArgument(TreeEqualityTestCase):
     """Test cases for the Argument node."""
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 1e578ed..76917e8 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -28,12 +28,8 @@ from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
 from mwparserfromhell.nodes.extras import Attribute, Parameter
 from mwparserfromhell.parser import tokens
 from mwparserfromhell.parser.builder import Builder
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
-
-wrap = lambda L: Wikicode(SmartList(L))
+from ._test_tree_equality import TreeEqualityTestCase, wrap
 
 class TestBuilder(TreeEqualityTestCase):
     """Tests for the builder, which turns tokens into Wikicode objects."""
diff --git a/tests/test_heading.py b/tests/test_heading.py
index a0e78e5..88603a8 100644
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -25,12 +25,8 @@ import unittest
 
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Heading, Text
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
-
-wrap = lambda L: Wikicode(SmartList(L))
+from ._test_tree_equality import TreeEqualityTestCase, wrap
 
 class TestHeading(TreeEqualityTestCase):
     """Test cases for the Heading node."""
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index a7a9669..b6b4394 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -25,12 +25,8 @@ import unittest
 
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
-
-wrap = lambda L: Wikicode(SmartList(L))
+from ._test_tree_equality import TreeEqualityTestCase, wrap
 
 class TestHTMLEntity(TreeEqualityTestCase):
     """Test cases for the HTMLEntity node."""
diff --git a/tests/test_parameter.py b/tests/test_parameter.py
index b46ad71..8e85eda 100644
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -26,12 +26,8 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text
 from mwparserfromhell.nodes.extras import Parameter
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
-
-wrap = lambda L: Wikicode(SmartList(L))
+from ._test_tree_equality import TreeEqualityTestCase, wrap
 
 class TestParameter(TreeEqualityTestCase):
     """Test cases for the Parameter node extra."""
diff --git a/tests/test_template.py b/tests/test_template.py
index 31ed33b..81b7382 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -26,12 +26,8 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity, Template, Text
 from mwparserfromhell.nodes.extras import Parameter
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
-from ._test_tree_equality import TreeEqualityTestCase
+from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
-wrap = lambda L: Wikicode(SmartList(L))
-wraptext = lambda t: wrap([Text(t)])
 pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
 pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False)
 
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
index 422489f..7c02744 100644
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -25,12 +25,8 @@ import unittest
 
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text, Wikilink
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
-
-wrap = lambda L: Wikicode(SmartList(L))
+from ._test_tree_equality import TreeEqualityTestCase, wrap
 
 class TestWikilink(TreeEqualityTestCase):
     """Test cases for the Wikilink node."""

From eea5c774e342752dae016d79782bf755ca48de53 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Fri, 3 May 2013 23:52:10 -0400
Subject: [PATCH 146/180] Clean up some repetitive lines.

---
 tests/test_argument.py    | 13 ++++---------
 tests/test_comment.py     |  7 +++----
 tests/test_heading.py     |  7 +++----
 tests/test_html_entity.py | 20 +++++++-------------
 tests/test_text.py        |  7 +++----
 tests/test_wikilink.py    | 13 ++++---------
 6 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/tests/test_argument.py b/tests/test_argument.py
index ae5ae62..3a959b6 100644
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -41,16 +41,11 @@ class TestArgument(TreeEqualityTestCase):
     def test_strip(self):
         """test Argument.__strip__()"""
         node = Argument(wrap([Text("foobar")]))
-        self.assertIs(None, node.__strip__(True, True))
-        self.assertIs(None, node.__strip__(True, False))
-        self.assertIs(None, node.__strip__(False, True))
-        self.assertIs(None, node.__strip__(False, False))
-
         node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
-        self.assertEqual("bar", node2.__strip__(True, True))
-        self.assertEqual("bar", node2.__strip__(True, False))
-        self.assertEqual("bar", node2.__strip__(False, True))
-        self.assertEqual("bar", node2.__strip__(False, False))
+        for a in (True, False):
+            for b in (True, False):
+                self.assertIs(None, node.__strip__(a, b))
+                self.assertEqual("bar", node2.__strip__(a, b))
 
     def test_showtree(self):
         """test Argument.__showtree__()"""
diff --git a/tests/test_comment.py b/tests/test_comment.py
index 980f594..a7a3c4d 100644
--- a/tests/test_comment.py
+++ b/tests/test_comment.py
@@ -39,10 +39,9 @@ class TestComment(TreeEqualityTestCase):
     def test_strip(self):
         """test Comment.__strip__()"""
         node = Comment("foobar")
-        self.assertIs(None, node.__strip__(True, True))
-        self.assertIs(None, node.__strip__(True, False))
-        self.assertIs(None, node.__strip__(False, True))
-        self.assertIs(None, node.__strip__(False, False))
+        for a in (True, False):
+            for b in (True, False):
+                self.assertIs(None, node.__strip__(a, b))
 
     def test_showtree(self):
         """test Comment.__showtree__()"""
diff --git a/tests/test_heading.py b/tests/test_heading.py
index 88603a8..79b0ebf 100644
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -41,10 +41,9 @@ class TestHeading(TreeEqualityTestCase):
     def test_strip(self):
         """test Heading.__strip__()"""
         node = Heading(wrap([Text("foobar")]), 3)
-        self.assertEqual("foobar", node.__strip__(True, True))
-        self.assertEqual("foobar", node.__strip__(True, False))
-        self.assertEqual("foobar", node.__strip__(False, True))
-        self.assertEqual("foobar", node.__strip__(False, False))
+        for a in (True, False):
+            for b in (True, False):
+                self.assertEqual("foobar", node.__strip__(a, b))
 
     def test_showtree(self):
         """test Heading.__showtree__()"""
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index b6b4394..d3d23bf 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -47,19 +47,13 @@ class TestHTMLEntity(TreeEqualityTestCase):
         node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
         node2 = HTMLEntity("107", named=False, hexadecimal=False)
         node3 = HTMLEntity("e9", named=False, hexadecimal=True)
-
-        self.assertEqual("\xa0", node1.__strip__(True, True))
-        self.assertEqual("\xa0", node1.__strip__(True, False))
-        self.assertEqual("&nbsp;", node1.__strip__(False, True))
-        self.assertEqual("&nbsp;", node1.__strip__(False, False))
-        self.assertEqual("k", node2.__strip__(True, True))
-        self.assertEqual("k", node2.__strip__(True, False))
-        self.assertEqual("&#107;", node2.__strip__(False, True))
-        self.assertEqual("&#107;", node2.__strip__(False, False))
-        self.assertEqual("é", node3.__strip__(True, True))
-        self.assertEqual("é", node3.__strip__(True, False))
-        self.assertEqual("&#xe9;", node3.__strip__(False, True))
-        self.assertEqual("&#xe9;", node3.__strip__(False, False))
+        for a in (True, False):
+            self.assertEqual("\xa0", node1.__strip__(True, a))
+            self.assertEqual("&nbsp;", node1.__strip__(False, a))
+            self.assertEqual("k", node2.__strip__(True, a))
+            self.assertEqual("&#107;", node2.__strip__(False, a))
+            self.assertEqual("é", node3.__strip__(True, a))
+            self.assertEqual("&#xe9;", node3.__strip__(False, a))
 
     def test_showtree(self):
         """test HTMLEntity.__showtree__()"""
diff --git a/tests/test_text.py b/tests/test_text.py
index 13636bf..f3649dd 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -39,10 +39,9 @@ class TestText(unittest.TestCase):
     def test_strip(self):
         """test Text.__strip__()"""
         node = Text("foobar")
-        self.assertIs(node, node.__strip__(True, True))
-        self.assertIs(node, node.__strip__(True, False))
-        self.assertIs(node, node.__strip__(False, True))
-        self.assertIs(node, node.__strip__(False, False))
+        for a in (True, False):
+            for b in (True, False):
+                self.assertIs(node, node.__strip__(a, b))
 
     def test_showtree(self):
         """test Text.__showtree__()"""
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
index 7c02744..09ca5b3 100644
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -41,16 +41,11 @@ class TestWikilink(TreeEqualityTestCase):
     def test_strip(self):
         """test Wikilink.__strip__()"""
         node = Wikilink(wrap([Text("foobar")]))
-        self.assertEqual("foobar", node.__strip__(True, True))
-        self.assertEqual("foobar", node.__strip__(True, False))
-        self.assertEqual("foobar", node.__strip__(False, True))
-        self.assertEqual("foobar", node.__strip__(False, False))
-
         node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
-        self.assertEqual("bar", node2.__strip__(True, True))
-        self.assertEqual("bar", node2.__strip__(True, False))
-        self.assertEqual("bar", node2.__strip__(False, True))
-        self.assertEqual("bar", node2.__strip__(False, False))
+        for a in (True, False):
+            for b in (True, False):
+                self.assertEqual("foobar", node.__strip__(a, b))
+                self.assertEqual("bar", node2.__strip__(a, b))
 
     def test_showtree(self):
         """test Wikilink.__showtree__()"""

From 06873ee6edcc88b6ee57d5ad57296655f2fb85c8 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 4 May 2013 15:50:48 -0400
Subject: [PATCH 147/180] Add tests for __iternodes__(); add a getnodes()
 function.

---
 tests/_test_tree_equality.py |  9 +++++++++
 tests/test_argument.py       | 19 ++++++++++++++++++-
 tests/test_comment.py        |  7 +++++++
 tests/test_heading.py        | 12 +++++++++++-
 tests/test_html_entity.py    |  7 +++++++
 tests/test_template.py       | 26 +++++++++++++++++++++++++-
 tests/test_text.py           |  7 +++++++
 tests/test_wikilink.py       | 19 ++++++++++++++++++-
 8 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index a12bd68..6d9b26a 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -32,6 +32,15 @@ from mwparserfromhell.wikicode import Wikicode
 wrap = lambda L: Wikicode(SmartList(L))
 wraptext = lambda t: wrap([Text(t)])
 
+def getnodes(code):
+    """Iterate over all child nodes of a given parent node.
+
+    Imitates Wikicode._get_all_nodes().
+    """
+    for node in code.nodes:
+        for context, child in node.__iternodes__(getnodes):
+            yield child
+
 class TreeEqualityTestCase(TestCase):
     """A base test case with support for comparing the equality of node trees.
 
diff --git a/tests/test_argument.py b/tests/test_argument.py
index 3a959b6..a9469d4 100644
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -26,7 +26,7 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Argument, Text
 
-from ._test_tree_equality import TreeEqualityTestCase, wrap
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap
 
 class TestArgument(TreeEqualityTestCase):
     """Test cases for the Argument node."""
@@ -38,6 +38,23 @@ class TestArgument(TreeEqualityTestCase):
         node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
         self.assertEqual("{{{foo|bar}}}", str(node2))
 
+    def test_iternodes(self):
+        """test Argument.__iternodes__()"""
+        node1n1 = Text("foobar")
+        node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz")
+        node1 = Argument(wrap([node1n1]))
+        node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3]))
+        gen1 = node1.__iternodes__(getnodes)
+        gen2 = node2.__iternodes__(getnodes)
+        self.assertEqual((None, node1), next(gen1))
+        self.assertEqual((None, node2), next(gen2))
+        self.assertEqual((node1.name, node1n1), next(gen1))
+        self.assertEqual((node2.name, node2n1), next(gen2))
+        self.assertEqual((node2.default, node2n2), next(gen2))
+        self.assertEqual((node2.default, node2n3), next(gen2))
+        self.assertRaises(StopIteration, next, gen1)
+        self.assertRaises(StopIteration, next, gen2)
+
     def test_strip(self):
         """test Argument.__strip__()"""
         node = Argument(wrap([Text("foobar")]))
diff --git a/tests/test_comment.py b/tests/test_comment.py
index a7a3c4d..44225a2 100644
--- a/tests/test_comment.py
+++ b/tests/test_comment.py
@@ -36,6 +36,13 @@ class TestComment(TreeEqualityTestCase):
         node = Comment("foobar")
         self.assertEqual("<!--foobar-->", str(node))
 
+    def test_iternodes(self):
+        """test Comment.__iternodes__()"""
+        node = Comment("foobar")
+        gen = node.__iternodes__(None)
+        self.assertEqual((None, node), next(gen))
+        self.assertRaises(StopIteration, next, gen)
+
     def test_strip(self):
         """test Comment.__strip__()"""
         node = Comment("foobar")
diff --git a/tests/test_heading.py b/tests/test_heading.py
index 79b0ebf..38f6545 100644
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -26,7 +26,7 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Heading, Text
 
-from ._test_tree_equality import TreeEqualityTestCase, wrap
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap
 
 class TestHeading(TreeEqualityTestCase):
     """Test cases for the Heading node."""
@@ -38,6 +38,16 @@ class TestHeading(TreeEqualityTestCase):
         node2 = Heading(wrap([Text(" zzz ")]), 5)
         self.assertEqual("===== zzz =====", str(node2))
 
+    def test_iternodes(self):
+        """test Heading.__iternodes__()"""
+        text1, text2 = Text("foo"), Text("bar")
+        node = Heading(wrap([text1, text2]), 3)
+        gen = node.__iternodes__(getnodes)
+        self.assertEqual((None, node), next(gen))
+        self.assertEqual((node.title, text1), next(gen))
+        self.assertEqual((node.title, text2), next(gen))
+        self.assertRaises(StopIteration, next, gen)
+
     def test_strip(self):
         """test Heading.__strip__()"""
         node = Heading(wrap([Text("foobar")]), 3)
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index d3d23bf..d38e5ec 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -42,6 +42,13 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertEqual("&#x6b;", str(node3))
         self.assertEqual("&#X6C;", str(node4))
 
+    def test_iternodes(self):
+        """test HTMLEntity.__iternodes__()"""
+        node = HTMLEntity("nbsp", named=True, hexadecimal=False)
+        gen = node.__iternodes__(None)
+        self.assertEqual((None, node), next(gen))
+        self.assertRaises(StopIteration, next, gen)
+
     def test_strip(self):
         """test HTMLEntity.__strip__()"""
         node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
diff --git a/tests/test_template.py b/tests/test_template.py
index 81b7382..28592df 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -26,7 +26,7 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity, Template, Text
 from mwparserfromhell.nodes.extras import Parameter
-from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 
 pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
 pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False)
@@ -42,6 +42,30 @@ class TestTemplate(TreeEqualityTestCase):
                          [pgenh("1", "bar"), pgens("abc", "def")])
         self.assertEqual("{{foo|bar|abc=def}}", str(node2))
 
+    def test_iternodes(self):
+        """test Template.__iternodes__()"""
+        node1n1 = Text("foobar")
+        node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc")
+        node2n4, node2n5 = Text("def"), Text("ghi")
+        node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False)
+        node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]),
+                            showkey=True)
+        node1 = Template(wrap([node1n1]))
+        node2 = Template(wrap([node2n1]), [node2p1, node2p2])
+
+        gen1 = node1.__iternodes__(getnodes)
+        gen2 = node2.__iternodes__(getnodes)
+        self.assertEqual((None, node1), next(gen1))
+        self.assertEqual((None, node2), next(gen2))
+        self.assertEqual((node1.name, node1n1), next(gen1))
+        self.assertEqual((node2.name, node2n1), next(gen2))
+        self.assertEqual((node2.params[0].value, node2n2), next(gen2))
+        self.assertEqual((node2.params[1].name, node2n3), next(gen2))
+        self.assertEqual((node2.params[1].value, node2n4), next(gen2))
+        self.assertEqual((node2.params[1].value, node2n5), next(gen2))
+        self.assertRaises(StopIteration, next, gen1)
+        self.assertRaises(StopIteration, next, gen2)
+
     def test_strip(self):
         """test Template.__strip__()"""
         node1 = Template(wraptext("foobar"))
diff --git a/tests/test_text.py b/tests/test_text.py
index f3649dd..35ac340 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -36,6 +36,13 @@ class TestText(unittest.TestCase):
         node2 = Text("fóóbar")
         self.assertEqual("fóóbar", str(node2))
 
+    def test_iternodes(self):
+        """test Text.__iternodes__()"""
+        node = Text("foobar")
+        gen = node.__iternodes__(None)
+        self.assertEqual((None, node), next(gen))
+        self.assertRaises(StopIteration, next, gen)
+
     def test_strip(self):
         """test Text.__strip__()"""
         node = Text("foobar")
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
index 09ca5b3..d4319c1 100644
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -26,7 +26,7 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text, Wikilink
 
-from ._test_tree_equality import TreeEqualityTestCase, wrap
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap
 
 class TestWikilink(TreeEqualityTestCase):
     """Test cases for the Wikilink node."""
@@ -38,6 +38,23 @@ class TestWikilink(TreeEqualityTestCase):
         node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
         self.assertEqual("[[foo|bar]]", str(node2))
 
+    def test_iternodes(self):
+        """test Wikilink.__iternodes__()"""
+        node1n1 = Text("foobar")
+        node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz")
+        node1 = Wikilink(wrap([node1n1]))
+        node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3]))
+        gen1 = node1.__iternodes__(getnodes)
+        gen2 = node2.__iternodes__(getnodes)
+        self.assertEqual((None, node1), next(gen1))
+        self.assertEqual((None, node2), next(gen2))
+        self.assertEqual((node1.title, node1n1), next(gen1))
+        self.assertEqual((node2.title, node2n1), next(gen2))
+        self.assertEqual((node2.text, node2n2), next(gen2))
+        self.assertEqual((node2.text, node2n3), next(gen2))
+        self.assertRaises(StopIteration, next, gen1)
+        self.assertRaises(StopIteration, next, gen2)
+
     def test_strip(self):
         """test Wikilink.__strip__()"""
         node = Wikilink(wrap([Text("foobar")]))

From 3fe629f54188a0f5ffde439811aab656a0418f0c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 4 May 2013 16:18:14 -0400
Subject: [PATCH 148/180] Condense usage of wrap([Text("foo")]) to just
 wraptext("foo").

---
 tests/_test_tree_equality.py |  2 +-
 tests/test_argument.py       | 30 ++++++++--------
 tests/test_builder.py        | 85 +++++++++++++++++++++-----------------------
 tests/test_heading.py        | 20 +++++------
 tests/test_parameter.py      | 28 +++++++--------
 tests/test_parser.py         | 21 +++++------
 tests/test_utils.py          | 27 ++++++--------
 tests/test_wikilink.py       | 30 ++++++++--------
 8 files changed, 114 insertions(+), 129 deletions(-)

diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 6d9b26a..52130ed 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -30,7 +30,7 @@ from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 
 wrap = lambda L: Wikicode(SmartList(L))
-wraptext = lambda t: wrap([Text(t)])
+wraptext = lambda *args: wrap([Text(t) for t in args])
 
 def getnodes(code):
     """Iterate over all child nodes of a given parent node.
diff --git a/tests/test_argument.py b/tests/test_argument.py
index a9469d4..8191804 100644
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -26,16 +26,16 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Argument, Text
 
-from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 
 class TestArgument(TreeEqualityTestCase):
     """Test cases for the Argument node."""
 
     def test_unicode(self):
         """test Argument.__unicode__()"""
-        node = Argument(wrap([Text("foobar")]))
+        node = Argument(wraptext("foobar"))
         self.assertEqual("{{{foobar}}}", str(node))
-        node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
+        node2 = Argument(wraptext("foo"), wraptext("bar"))
         self.assertEqual("{{{foo|bar}}}", str(node2))
 
     def test_iternodes(self):
@@ -57,8 +57,8 @@ class TestArgument(TreeEqualityTestCase):
 
     def test_strip(self):
         """test Argument.__strip__()"""
-        node = Argument(wrap([Text("foobar")]))
-        node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
+        node = Argument(wraptext("foobar"))
+        node2 = Argument(wraptext("foo"), wraptext("bar"))
         for a in (True, False):
             for b in (True, False):
                 self.assertIs(None, node.__strip__(a, b))
@@ -70,8 +70,8 @@ class TestArgument(TreeEqualityTestCase):
         getter, marker = object(), object()
         get = lambda code: output.append((getter, code))
         mark = lambda: output.append(marker)
-        node1 = Argument(wrap([Text("foobar")]))
-        node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")]))
+        node1 = Argument(wraptext("foobar"))
+        node2 = Argument(wraptext("foo"), wraptext("bar"))
         node1.__showtree__(output.append, get, mark)
         node2.__showtree__(output.append, get, mark)
         valid = [
@@ -81,26 +81,26 @@ class TestArgument(TreeEqualityTestCase):
 
     def test_name(self):
         """test getter/setter for the name attribute"""
-        name = wrap([Text("foobar")])
+        name = wraptext("foobar")
         node1 = Argument(name)
-        node2 = Argument(name, wrap([Text("baz")]))
+        node2 = Argument(name, wraptext("baz"))
         self.assertIs(name, node1.name)
         self.assertIs(name, node2.name)
         node1.name = "héhehé"
         node2.name = "héhehé"
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name)
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name)
+        self.assertWikicodeEqual(wraptext("héhehé"), node1.name)
+        self.assertWikicodeEqual(wraptext("héhehé"), node2.name)
 
     def test_default(self):
         """test getter/setter for the default attribute"""
-        default = wrap([Text("baz")])
-        node1 = Argument(wrap([Text("foobar")]))
-        node2 = Argument(wrap([Text("foobar")]), default)
+        default = wraptext("baz")
+        node1 = Argument(wraptext("foobar"))
+        node2 = Argument(wraptext("foobar"), default)
         self.assertIs(None, node1.default)
         self.assertIs(default, node2.default)
         node1.default = "buzz"
         node2.default = None
-        self.assertWikicodeEqual(wrap([Text("buzz")]), node1.default)
+        self.assertWikicodeEqual(wraptext("buzz"), node1.default)
         self.assertIs(None, node2.default)
 
 if __name__ == "__main__":
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 76917e8..903d144 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -29,7 +29,7 @@ from mwparserfromhell.nodes.extras import Attribute, Parameter
 from mwparserfromhell.parser import tokens
 from mwparserfromhell.parser.builder import Builder
 
-from ._test_tree_equality import TreeEqualityTestCase, wrap
+from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
 class TestBuilder(TreeEqualityTestCase):
     """Tests for the builder, which turns tokens into Wikicode objects."""
@@ -40,10 +40,10 @@ class TestBuilder(TreeEqualityTestCase):
     def test_text(self):
         """tests for building Text nodes"""
         tests = [
-            ([tokens.Text(text="foobar")], wrap([Text("foobar")])),
-            ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])),
+            ([tokens.Text(text="foobar")], wraptext("foobar")),
+            ([tokens.Text(text="fóóbar")], wraptext("fóóbar")),
             ([tokens.Text(text="spam"), tokens.Text(text="eggs")],
-             wrap([Text("spam"), Text("eggs")])),
+             wraptext("spam", "eggs")),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -53,25 +53,24 @@ class TestBuilder(TreeEqualityTestCase):
         tests = [
             ([tokens.TemplateOpen(), tokens.Text(text="foobar"),
               tokens.TemplateClose()],
-             wrap([Template(wrap([Text("foobar")]))])),
+             wrap([Template(wraptext("foobar"))])),
 
             ([tokens.TemplateOpen(), tokens.Text(text="spam"),
               tokens.Text(text="eggs"), tokens.TemplateClose()],
-             wrap([Template(wrap([Text("spam"), Text("eggs")]))])),
+             wrap([Template(wraptext("spam", "eggs"))])),
 
             ([tokens.TemplateOpen(), tokens.Text(text="foo"),
               tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
               tokens.TemplateClose()],
-             wrap([Template(wrap([Text("foo")]), params=[
-                 Parameter(wrap([Text("1")]), wrap([Text("bar")]),
-                           showkey=False)])])),
+             wrap([Template(wraptext("foo"), params=[
+                 Parameter(wraptext("1"), wraptext("bar"), showkey=False)])])),
 
             ([tokens.TemplateOpen(), tokens.Text(text="foo"),
               tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
               tokens.TemplateParamEquals(), tokens.Text(text="baz"),
               tokens.TemplateClose()],
-             wrap([Template(wrap([Text("foo")]), params=[
-                 Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])),
+             wrap([Template(wraptext("foo"), params=[
+                 Parameter(wraptext("bar"), wraptext("baz"))])])),
 
             ([tokens.TemplateOpen(), tokens.Text(text="foo"),
               tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
@@ -82,14 +81,12 @@ class TestBuilder(TreeEqualityTestCase):
               tokens.TemplateParamEquals(), tokens.Text(text="buff"),
               tokens.TemplateParamSeparator(), tokens.Text(text="baff"),
               tokens.TemplateClose()],
-             wrap([Template(wrap([Text("foo")]), params=[
-                 Parameter(wrap([Text("bar")]), wrap([Text("baz")])),
-                 Parameter(wrap([Text("1")]), wrap([Text("biz")]),
-                           showkey=False),
-                 Parameter(wrap([Text("2")]), wrap([Text("buzz")]),
-                           showkey=False),
-                 Parameter(wrap([Text("3")]), wrap([Text("buff")])),
-                 Parameter(wrap([Text("3")]), wrap([Text("baff")]),
+             wrap([Template(wraptext("foo"), params=[
+                 Parameter(wraptext("bar"), wraptext("baz")),
+                 Parameter(wraptext("1"), wraptext("biz"), showkey=False),
+                 Parameter(wraptext("2"), wraptext("buzz"), showkey=False),
+                 Parameter(wraptext("3"), wraptext("buff")),
+                 Parameter(wraptext("3"), wraptext("baff"),
                            showkey=False)])])),
         ]
         for test, valid in tests:
@@ -100,23 +97,22 @@ class TestBuilder(TreeEqualityTestCase):
         tests = [
             ([tokens.ArgumentOpen(), tokens.Text(text="foobar"),
               tokens.ArgumentClose()],
-             wrap([Argument(wrap([Text("foobar")]))])),
+             wrap([Argument(wraptext("foobar"))])),
 
             ([tokens.ArgumentOpen(), tokens.Text(text="spam"),
               tokens.Text(text="eggs"), tokens.ArgumentClose()],
-             wrap([Argument(wrap([Text("spam"), Text("eggs")]))])),
+             wrap([Argument(wraptext("spam", "eggs"))])),
 
             ([tokens.ArgumentOpen(), tokens.Text(text="foo"),
               tokens.ArgumentSeparator(), tokens.Text(text="bar"),
               tokens.ArgumentClose()],
-             wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])),
+             wrap([Argument(wraptext("foo"), wraptext("bar"))])),
 
             ([tokens.ArgumentOpen(), tokens.Text(text="foo"),
               tokens.Text(text="bar"), tokens.ArgumentSeparator(),
               tokens.Text(text="baz"), tokens.Text(text="biz"),
               tokens.ArgumentClose()],
-             wrap([Argument(wrap([Text("foo"), Text("bar")]),
-                            wrap([Text("baz"), Text("biz")]))])),
+             wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -126,23 +122,22 @@ class TestBuilder(TreeEqualityTestCase):
         tests = [
             ([tokens.WikilinkOpen(), tokens.Text(text="foobar"),
               tokens.WikilinkClose()],
-             wrap([Wikilink(wrap([Text("foobar")]))])),
+             wrap([Wikilink(wraptext("foobar"))])),
 
             ([tokens.WikilinkOpen(), tokens.Text(text="spam"),
               tokens.Text(text="eggs"), tokens.WikilinkClose()],
-             wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])),
+             wrap([Wikilink(wraptext("spam", "eggs"))])),
 
             ([tokens.WikilinkOpen(), tokens.Text(text="foo"),
               tokens.WikilinkSeparator(), tokens.Text(text="bar"),
               tokens.WikilinkClose()],
-             wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])),
+             wrap([Wikilink(wraptext("foo"), wraptext("bar"))])),
 
             ([tokens.WikilinkOpen(), tokens.Text(text="foo"),
               tokens.Text(text="bar"), tokens.WikilinkSeparator(),
               tokens.Text(text="baz"), tokens.Text(text="biz"),
               tokens.WikilinkClose()],
-             wrap([Wikilink(wrap([Text("foo"), Text("bar")]),
-                            wrap([Text("baz"), Text("biz")]))])),
+             wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -172,11 +167,11 @@ class TestBuilder(TreeEqualityTestCase):
         tests = [
             ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"),
               tokens.HeadingEnd()],
-             wrap([Heading(wrap([Text("foobar")]), 2)])),
+             wrap([Heading(wraptext("foobar"), 2)])),
 
             ([tokens.HeadingStart(level=4), tokens.Text(text="spam"),
               tokens.Text(text="eggs"), tokens.HeadingEnd()],
-             wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])),
+             wrap([Heading(wraptext("spam", "eggs"), 4)])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -186,11 +181,11 @@ class TestBuilder(TreeEqualityTestCase):
         tests = [
             ([tokens.CommentStart(), tokens.Text(text="foobar"),
               tokens.CommentEnd()],
-             wrap([Comment(wrap([Text("foobar")]))])),
+             wrap([Comment(wraptext("foobar"))])),
 
             ([tokens.CommentStart(), tokens.Text(text="spam"),
               tokens.Text(text="eggs"), tokens.CommentEnd()],
-             wrap([Comment(wrap([Text("spam"), Text("eggs")]))])),
+             wrap([Comment(wraptext("spam", "eggs"))])),
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
@@ -214,10 +209,10 @@ class TestBuilder(TreeEqualityTestCase):
                 tokens.TemplateOpen(), tokens.Text(text="bin"),
                 tokens.TemplateClose(), tokens.TemplateClose()]
         valid = wrap(
-            [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text(
-            "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]),
-            wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[
-            Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]),
+            [Template(wrap([Template(wrap([Template(wrap([Template(wraptext(
+            "foo")), Text("bar")]), params=[Parameter(wraptext("baz"),
+            wraptext("biz"))]), Text("buzz")])), Text("usr")]), params=[
+            Parameter(wraptext("1"), wrap([Template(wraptext("bin"))]),
             showkey=False)])])
         self.assertWikicodeEqual(valid, self.builder.build(test))
 
@@ -243,14 +238,14 @@ class TestBuilder(TreeEqualityTestCase):
                 tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(),
                 tokens.TemplateClose()]
         valid = wrap(
-            [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]),
-            wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]),
-            wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1")
-            ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))]
-            ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")]
-            ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))])
-            ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]),
-            wrap([HTMLEntity("nbsp", named=True)]))])])
+            [Template(wraptext("a"), params=[Parameter(wraptext("1"), wraptext(
+            "b"), showkey=False), Parameter(wraptext("2"), wrap([Template(
+            wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink(
+            wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]),
+            showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext(
+            "g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[
+            Parameter(wraptext("j"), wrap([HTMLEntity("nbsp",
+            named=True)]))])])
         self.assertWikicodeEqual(valid, self.builder.build(test))
 
 if __name__ == "__main__":
diff --git a/tests/test_heading.py b/tests/test_heading.py
index 38f6545..7a65872 100644
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -26,16 +26,16 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Heading, Text
 
-from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 
 class TestHeading(TreeEqualityTestCase):
     """Test cases for the Heading node."""
 
     def test_unicode(self):
         """test Heading.__unicode__()"""
-        node = Heading(wrap([Text("foobar")]), 2)
+        node = Heading(wraptext("foobar"), 2)
         self.assertEqual("==foobar==", str(node))
-        node2 = Heading(wrap([Text(" zzz ")]), 5)
+        node2 = Heading(wraptext(" zzz "), 5)
         self.assertEqual("===== zzz =====", str(node2))
 
     def test_iternodes(self):
@@ -50,7 +50,7 @@ class TestHeading(TreeEqualityTestCase):
 
     def test_strip(self):
         """test Heading.__strip__()"""
-        node = Heading(wrap([Text("foobar")]), 3)
+        node = Heading(wraptext("foobar"), 3)
         for a in (True, False):
             for b in (True, False):
                 self.assertEqual("foobar", node.__strip__(a, b))
@@ -60,8 +60,8 @@ class TestHeading(TreeEqualityTestCase):
         output = []
         getter = object()
         get = lambda code: output.append((getter, code))
-        node1 = Heading(wrap([Text("foobar")]), 3)
-        node2 = Heading(wrap([Text(" baz ")]), 4)
+        node1 = Heading(wraptext("foobar"), 3)
+        node2 = Heading(wraptext(" baz "), 4)
         node1.__showtree__(output.append, get, None)
         node2.__showtree__(output.append, get, None)
         valid = ["===", (getter, node1.title), "===",
@@ -70,20 +70,18 @@ class TestHeading(TreeEqualityTestCase):
 
     def test_title(self):
         """test getter/setter for the title attribute"""
-        title = wrap([Text("foobar")])
+        title = wraptext("foobar")
         node = Heading(title, 3)
         self.assertIs(title, node.title)
         node.title = "héhehé"
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node.title)
+        self.assertWikicodeEqual(wraptext("héhehé"), node.title)
 
     def test_level(self):
         """test getter/setter for the level attribute"""
-        node = Heading(wrap([Text("foobar")]), 3)
+        node = Heading(wraptext("foobar"), 3)
         self.assertEqual(3, node.level)
         node.level = 5
         self.assertEqual(5, node.level)
-        node.level = True
-        self.assertEqual(1, node.level)
         self.assertRaises(ValueError, setattr, node, "level", 0)
         self.assertRaises(ValueError, setattr, node, "level", 7)
         self.assertRaises(ValueError, setattr, node, "level", "abc")
diff --git a/tests/test_parameter.py b/tests/test_parameter.py
index 8e85eda..4786e12 100644
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -27,43 +27,43 @@ from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text
 from mwparserfromhell.nodes.extras import Parameter
 
-from ._test_tree_equality import TreeEqualityTestCase, wrap
+from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
 class TestParameter(TreeEqualityTestCase):
     """Test cases for the Parameter node extra."""
 
     def test_unicode(self):
         """test Parameter.__unicode__()"""
-        node = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False)
+        node = Parameter(wraptext("1"), wraptext("foo"), showkey=False)
         self.assertEqual("foo", str(node))
-        node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")]))
+        node2 = Parameter(wraptext("foo"), wraptext("bar"))
         self.assertEqual("foo=bar", str(node2))
 
     def test_name(self):
         """test getter/setter for the name attribute"""
-        name1 = wrap([Text("1")])
-        name2 = wrap([Text("foobar")])
-        node1 = Parameter(name1, wrap([Text("foobar")]), showkey=False)
-        node2 = Parameter(name2, wrap([Text("baz")]))
+        name1 = wraptext("1")
+        name2 = wraptext("foobar")
+        node1 = Parameter(name1, wraptext("foobar"), showkey=False)
+        node2 = Parameter(name2, wraptext("baz"))
         self.assertIs(name1, node1.name)
         self.assertIs(name2, node2.name)
         node1.name = "héhehé"
         node2.name = "héhehé"
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name)
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name)
+        self.assertWikicodeEqual(wraptext("héhehé"), node1.name)
+        self.assertWikicodeEqual(wraptext("héhehé"), node2.name)
 
     def test_value(self):
         """test getter/setter for the value attribute"""
-        value = wrap([Text("bar")])
-        node = Parameter(wrap([Text("foo")]), value)
+        value = wraptext("bar")
+        node = Parameter(wraptext("foo"), value)
         self.assertIs(value, node.value)
         node.value = "héhehé"
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node.value)
+        self.assertWikicodeEqual(wraptext("héhehé"), node.value)
 
     def test_showkey(self):
         """test getter/setter for the showkey attribute"""
-        node1 = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False)
-        node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")]))
+        node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False)
+        node2 = Parameter(wraptext("foo"), wraptext("bar"))
         self.assertFalse(node1.showkey)
         self.assertTrue(node2.showkey)
         node1.showkey = True
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 9d2c969..ec5f065 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -26,10 +26,8 @@ import unittest
 from mwparserfromhell import parser
 from mwparserfromhell.nodes import Template, Text, Wikilink
 from mwparserfromhell.nodes.extras import Parameter
-from mwparserfromhell.smart_list import SmartList
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
+from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 from .compat import range
 
 class TestParser(TreeEqualityTestCase):
@@ -45,18 +43,17 @@ class TestParser(TreeEqualityTestCase):
     def test_parsing(self):
         """integration test for parsing overall"""
         text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}"
-        wrap = lambda L: Wikicode(SmartList(L))
         expected = wrap([
             Text("this is text; "),
-            Template(wrap([Text("this")]), [
-                Parameter(wrap([Text("is")]), wrap([Text("a")])),
-                Parameter(wrap([Text("template")]), wrap([
-                    Template(wrap([Text("with")]), [
-                        Parameter(wrap([Text("1")]),
-                                  wrap([Wikilink(wrap([Text("links")]))]),
+            Template(wraptext("this"), [
+                Parameter(wraptext("is"), wraptext("a")),
+                Parameter(wraptext("template"), wrap([
+                    Template(wraptext("with"), [
+                        Parameter(wraptext("1"),
+                                  wrap([Wikilink(wraptext("links"))]),
                                   showkey=False),
-                        Parameter(wrap([Text("2")]),
-                                  wrap([Text("in")]), showkey=False)
+                        Parameter(wraptext("2"),
+                                  wraptext("in"), showkey=False)
                     ]),
                     Text("it")
                 ]))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c088530..80a0e5e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -24,33 +24,28 @@ from __future__ import unicode_literals
 import unittest
 
 from mwparserfromhell.nodes import Template, Text
-from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.utils import parse_anything
-from mwparserfromhell.wikicode import Wikicode
 
-from ._test_tree_equality import TreeEqualityTestCase
+from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
 class TestUtils(TreeEqualityTestCase):
     """Tests for the utils module, which provides parse_anything()."""
 
     def test_parse_anything_valid(self):
         """tests for valid input to utils.parse_anything()"""
-        wrap = lambda L: Wikicode(SmartList(L))
-        textify = lambda L: wrap([Text(item) for item in L])
         tests = [
-            (wrap([Text("foobar")]), textify(["foobar"])),
-            (Template(wrap([Text("spam")])),
-                wrap([Template(textify(["spam"]))])),
-            ("fóóbar", textify(["fóóbar"])),
-            (b"foob\xc3\xa1r", textify(["foobár"])),
-            (123, textify(["123"])),
-            (True, textify(["True"])),
+            (wraptext("foobar"), wraptext("foobar")),
+            (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])),
+            ("fóóbar", wraptext("fóóbar")),
+            (b"foob\xc3\xa1r", wraptext("foobár")),
+            (123, wraptext("123")),
+            (True, wraptext("True")),
             (None, wrap([])),
             ([Text("foo"), Text("bar"), Text("baz")],
-                textify(["foo", "bar", "baz"])),
-            ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456],
-                textify(["foo", "bar", "baz", "123", "456"])),
-            ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"]))
+             wraptext("foo", "bar", "baz")),
+            ([wraptext("foo"), Text("bar"), "baz", 123, 456],
+             wraptext("foo", "bar", "baz", "123", "456")),
+            ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar"))
         ]
         for test, valid in tests:
             self.assertWikicodeEqual(valid, parse_anything(test))
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
index d4319c1..7851032 100644
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -26,16 +26,16 @@ import unittest
 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text, Wikilink
 
-from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap
+from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 
 class TestWikilink(TreeEqualityTestCase):
     """Test cases for the Wikilink node."""
 
     def test_unicode(self):
         """test Wikilink.__unicode__()"""
-        node = Wikilink(wrap([Text("foobar")]))
+        node = Wikilink(wraptext("foobar"))
         self.assertEqual("[[foobar]]", str(node))
-        node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
+        node2 = Wikilink(wraptext("foo"), wraptext("bar"))
         self.assertEqual("[[foo|bar]]", str(node2))
 
     def test_iternodes(self):
@@ -57,8 +57,8 @@ class TestWikilink(TreeEqualityTestCase):
 
     def test_strip(self):
         """test Wikilink.__strip__()"""
-        node = Wikilink(wrap([Text("foobar")]))
-        node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
+        node = Wikilink(wraptext("foobar"))
+        node2 = Wikilink(wraptext("foo"), wraptext("bar"))
         for a in (True, False):
             for b in (True, False):
                 self.assertEqual("foobar", node.__strip__(a, b))
@@ -70,8 +70,8 @@ class TestWikilink(TreeEqualityTestCase):
         getter, marker = object(), object()
         get = lambda code: output.append((getter, code))
         mark = lambda: output.append(marker)
-        node1 = Wikilink(wrap([Text("foobar")]))
-        node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))
+        node1 = Wikilink(wraptext("foobar"))
+        node2 = Wikilink(wraptext("foo"), wraptext("bar"))
         node1.__showtree__(output.append, get, mark)
         node2.__showtree__(output.append, get, mark)
         valid = [
@@ -81,26 +81,26 @@ class TestWikilink(TreeEqualityTestCase):
 
     def test_title(self):
         """test getter/setter for the title attribute"""
-        title = wrap([Text("foobar")])
+        title = wraptext("foobar")
         node1 = Wikilink(title)
-        node2 = Wikilink(title, wrap([Text("baz")]))
+        node2 = Wikilink(title, wraptext("baz"))
         self.assertIs(title, node1.title)
         self.assertIs(title, node2.title)
         node1.title = "héhehé"
         node2.title = "héhehé"
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.title)
-        self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.title)
+        self.assertWikicodeEqual(wraptext("héhehé"), node1.title)
+        self.assertWikicodeEqual(wraptext("héhehé"), node2.title)
 
     def test_text(self):
         """test getter/setter for the text attribute"""
-        text = wrap([Text("baz")])
-        node1 = Wikilink(wrap([Text("foobar")]))
-        node2 = Wikilink(wrap([Text("foobar")]), text)
+        text = wraptext("baz")
+        node1 = Wikilink(wraptext("foobar"))
+        node2 = Wikilink(wraptext("foobar"), text)
         self.assertIs(None, node1.text)
         self.assertIs(text, node2.text)
         node1.text = "buzz"
         node2.text = None
-        self.assertWikicodeEqual(wrap([Text("buzz")]), node1.text)
+        self.assertWikicodeEqual(wraptext("buzz"), node1.text)
         self.assertIs(None, node2.text)
 
 if __name__ == "__main__":

From 852c5ff9af1c91aef34b85b10afa59623a117271 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 4 May 2013 21:06:17 -0400
Subject: [PATCH 149/180] Start TestWikicode; make Wikicode.nodes's setter
 handle more inputs.

---
 mwparserfromhell/wikicode.py |   2 +
 tests/test_wikicode.py       | 117 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 tests/test_wikicode.py

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index f2d9c89..c295fd6 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -162,6 +162,8 @@ class Wikicode(StringMixIn):
 
     @nodes.setter
     def nodes(self, value):
+        if not isinstance(value, list):
+            value = parse_anything(value).nodes
         self._nodes = value
 
     def get(self, index):
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
new file mode 100644
index 0000000..421a714
--- /dev/null
+++ b/tests/test_wikicode.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+import unittest
+
+from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
+                                    Tag, Template, Text, Wikilink)
+from mwparserfromhell.smart_list import SmartList
+from mwparserfromhell.wikicode import Wikicode
+from mwparserfromhell import parse
+from mwparserfromhell.compat import str
+
+from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
+
+class TestWikicode(TreeEqualityTestCase):
+    """Tests for the Wikicode class, which manages a list of nodes."""
+
+    def test_unicode(self):
+        """test Wikicode.__unicode__()"""
+        code1 = parse("foobar")
+        code2 = parse("Have a {{template}} and a [[page|link]]")
+        self.assertEqual("foobar", str(code1))
+        self.assertEqual("Have a {{template}} and a [[page|link]]", str(code2))
+
+    def test_nodes(self):
+        """test getter/setter for the nodes attribute"""
+        code = parse("Have a {{template}}")
+        self.assertEqual(["Have a ", "{{template}}"], code.nodes)
+        L1 = SmartList([Text("foobar"), Template(wraptext("abc"))])
+        L2 = [Text("barfoo"), Template(wraptext("cba"))]
+        L3 = "abc{{def}}"
+        code.nodes = L1
+        self.assertIs(L1, code.nodes)
+        code.nodes = L2
+        self.assertIs(L2, code.nodes)
+        code.nodes = L3
+        self.assertEqual(["abc", "{{def}}"], code.nodes)
+        self.assertRaises(ValueError, setattr, code, "nodes", object)
+
+    def test_get(self):
+        """test Wikicode.get()"""
+        code = parse("Have a {{template}} and a [[page|link]]")
+        self.assertIs(code.nodes[0], code.get(0))
+        self.assertIs(code.nodes[2], code.get(2))
+        self.assertRaises(IndexError, code.get, 4)
+
+    def test_set(self):
+        """test Wikicode.set()"""
+        pass
+
+    def test_index(self):
+        """test Wikicode.index()"""
+        pass
+
+    def test_insert(self):
+        """test Wikicode.insert()"""
+        pass
+
+    def test_insert_before(self):
+        """test Wikicode.insert_before()"""
+        pass
+
+    def test_insert_after(self):
+        """test Wikicode.insert_after()"""
+        pass
+
+    def test_replace(self):
+        """test Wikicode.replace()"""
+        pass
+
+    def test_append(self):
+        """test Wikicode.append()"""
+        pass
+
+    def test_remove(self):
+        """test Wikicode.remove()"""
+        pass
+
+    def test_filter_family(self):
+        """test the Wikicode.i?filter() family of functions"""
+        pass
+
+    def test_get_sections(self):
+        """test Wikicode.get_sections()"""
+        pass
+
+    def test_strip_code(self):
+        """test Wikicode.strip_code()"""
+        pass
+
+    def test_get_tree(self):
+        """test Wikicode.get_tree()"""
+        pass
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From ee99e6eceb5e77dae0b786422a48893e4255a76c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 6 May 2013 22:29:02 -0400
Subject: [PATCH 150/180] Fix a bug in Wikicode.set(); implement test_set() and
 test_index()

---
 mwparserfromhell/wikicode.py |  3 ++-
 tests/test_wikicode.py       | 27 +++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index c295fd6..e9bd133 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -184,9 +184,10 @@ class Wikicode(StringMixIn):
             raise ValueError("Cannot coerce multiple nodes into one index")
         if index >= len(self.nodes) or -1 * index > len(self.nodes):
             raise IndexError("List assignment index out of range")
-        self.nodes.pop(index)
         if nodes:
             self.nodes[index] = nodes[0]
+        else:
+            self.nodes.pop(index)
 
     def index(self, obj, recursive=False):
         """Return the index of *obj* in the list of nodes.
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 421a714..485ab99 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -66,11 +66,34 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_set(self):
         """test Wikicode.set()"""
-        pass
+        code = parse("Have a {{template}} and a [[page|link]]")
+        code.set(1, "{{{argument}}}")
+        self.assertEqual("Have a {{{argument}}} and a [[page|link]]", code)
+        self.assertIsInstance(code.get(1), Argument)
+        code.set(2, None)
+        self.assertEqual("Have a {{{argument}}}[[page|link]]", code)
+        code.set(-3, "This is an ")
+        self.assertEqual("This is an {{{argument}}}[[page|link]]", code)
+        self.assertRaises(ValueError, code.set, 1, "foo {{bar}}")
+        self.assertRaises(IndexError, code.set, 3, "{{baz}}")
+        self.assertRaises(IndexError, code.set, -4, "{{baz}}")
 
     def test_index(self):
         """test Wikicode.index()"""
-        pass
+        code = parse("Have a {{template}} and a [[page|link]]")
+        self.assertEqual(0, code.index("Have a "))
+        self.assertEqual(3, code.index("[[page|link]]"))
+        self.assertEqual(1, code.index(code.get(1)))
+        self.assertRaises(ValueError, code.index, "foo")
+
+        code = parse("{{foo}}{{bar|{{baz}}}}")
+        self.assertEqual(1, code.index("{{bar|{{baz}}}}"))
+        self.assertEqual(1, code.index("{{baz}}", recursive=True))
+        self.assertEqual(1, code.index(code.get(1).get(1).value,
+                                       recursive=True))
+        self.assertRaises(ValueError, code.index, "{{baz}}", recursive=False)
+        self.assertRaises(ValueError, code.index,
+                          code.get(1).get(1).value, recursive=False)
 
     def test_insert(self):
         """test Wikicode.insert()"""

From 3095a4203f7e7ca680da861d8b243a2284acfc93 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 8 May 2013 11:03:04 -0400
Subject: [PATCH 151/180] Finish tests for Wikicode's list-like methods; fix a
 bug.

---
 mwparserfromhell/wikicode.py |   2 +-
 tests/test_wikicode.py       | 101 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index e9bd133..b704590 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -68,7 +68,7 @@ class Wikicode(StringMixIn):
         Raises ``ValueError`` if *obj* is not within *node*.
         """
         for context, child in node.__iternodes__(self._get_all_nodes):
-            if child is obj:
+            if self._is_equivalent(obj, child):
                 return context
         raise ValueError(obj)
 
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 485ab99..179d588 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -97,27 +97,116 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_insert(self):
         """test Wikicode.insert()"""
-        pass
+        code = parse("Have a {{template}} and a [[page|link]]")
+        code.insert(1, "{{{argument}}}")
+        self.assertEqual(
+            "Have a {{{argument}}}{{template}} and a [[page|link]]", code)
+        self.assertIsInstance(code.get(1), Argument)
+        code.insert(2, None)
+        self.assertEqual(
+            "Have a {{{argument}}}{{template}} and a [[page|link]]", code)
+        code.insert(-3, Text("foo"))
+        self.assertEqual(
+            "Have a {{{argument}}}foo{{template}} and a [[page|link]]", code)
+
+        code2 = parse("{{foo}}{{bar}}{{baz}}")
+        code2.insert(1, "abc{{def}}ghi[[jk]]")
+        self.assertEqual("{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}", code2)
+        self.assertEqual(["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]",
+                          "{{bar}}", "{{baz}}"], code2.nodes)
+
+        code3 = parse("{{foo}}bar")
+        code3.insert(1000, "[[baz]]")
+        code3.insert(-1000, "derp")
+        self.assertEqual("derp{{foo}}bar[[baz]]", code3)
 
     def test_insert_before(self):
         """test Wikicode.insert_before()"""
-        pass
+        code = parse("{{a}}{{b}}{{c}}{{d}}")
+        code.insert_before("{{b}}", "x", recursive=True)
+        code.insert_before("{{d}}", "[[y]]", recursive=False)
+        self.assertEqual("{{a}}x{{b}}{{c}}[[y]]{{d}}", code)
+        code.insert_before(code.get(2), "z")
+        self.assertEqual("{{a}}xz{{b}}{{c}}[[y]]{{d}}", code)
+        self.assertRaises(ValueError, code.insert_before, "{{r}}", "n",
+                          recursive=True)
+        self.assertRaises(ValueError, code.insert_before, "{{r}}", "n",
+                          recursive=False)
+
+        code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
+        code2.insert_before(code2.get(0).params[0].value.get(0), "x",
+                            recursive=True)
+        code2.insert_before("{{f}}", "y", recursive=True)
+        self.assertEqual("{{a|x{{b}}|{{c|d=y{{f}}}}}}", code2)
+        self.assertRaises(ValueError, code2.insert_before, "{{f}}", "y",
+                          recursive=False)
 
     def test_insert_after(self):
         """test Wikicode.insert_after()"""
-        pass
+        code = parse("{{a}}{{b}}{{c}}{{d}}")
+        code.insert_after("{{b}}", "x", recursive=True)
+        code.insert_after("{{d}}", "[[y]]", recursive=False)
+        self.assertEqual("{{a}}{{b}}x{{c}}{{d}}[[y]]", code)
+        code.insert_after(code.get(2), "z")
+        self.assertEqual("{{a}}{{b}}xz{{c}}{{d}}[[y]]", code)
+        self.assertRaises(ValueError, code.insert_after, "{{r}}", "n",
+                          recursive=True)
+        self.assertRaises(ValueError, code.insert_after, "{{r}}", "n",
+                          recursive=False)
+
+        code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
+        code2.insert_after(code2.get(0).params[0].value.get(0), "x",
+                           recursive=True)
+        code2.insert_after("{{f}}", "y", recursive=True)
+        self.assertEqual("{{a|{{b}}x|{{c|d={{f}}y}}}}", code2)
+        self.assertRaises(ValueError, code2.insert_after, "{{f}}", "y",
+                          recursive=False)
 
     def test_replace(self):
         """test Wikicode.replace()"""
-        pass
+        code = parse("{{a}}{{b}}{{c}}{{d}}")
+        code.replace("{{b}}", "x", recursive=True)
+        code.replace("{{d}}", "[[y]]", recursive=False)
+        self.assertEqual("{{a}}x{{c}}[[y]]", code)
+        code.replace(code.get(1), "z")
+        self.assertEqual("{{a}}z{{c}}[[y]]", code)
+        self.assertRaises(ValueError, code.replace, "{{r}}", "n",
+                          recursive=True)
+        self.assertRaises(ValueError, code.replace, "{{r}}", "n",
+                          recursive=False)
+
+        code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
+        code2.replace(code2.get(0).params[0].value.get(0), "x", recursive=True)
+        code2.replace("{{f}}", "y", recursive=True)
+        self.assertEqual("{{a|x|{{c|d=y}}}}", code2)
+        self.assertRaises(ValueError, code2.replace, "y", "z", recursive=False)
 
     def test_append(self):
         """test Wikicode.append()"""
-        pass
+        code = parse("Have a {{template}}")
+        code.append("{{{argument}}}")
+        self.assertEqual("Have a {{template}}{{{argument}}}", code)
+        self.assertIsInstance(code.get(2), Argument)
+        code.append(None)
+        self.assertEqual("Have a {{template}}{{{argument}}}", code)
+        code.append(Text(" foo"))
+        self.assertEqual("Have a {{template}}{{{argument}}} foo", code)
+        self.assertRaises(ValueError, code.append, slice(0, 1))
 
     def test_remove(self):
         """test Wikicode.remove()"""
-        pass
+        code = parse("{{a}}{{b}}{{c}}{{d}}")
+        code.remove("{{b}}", recursive=True)
+        code.remove(code.get(1), recursive=True)
+        self.assertEqual("{{a}}{{d}}", code)
+        self.assertRaises(ValueError, code.remove, "{{r}}", recursive=True)
+        self.assertRaises(ValueError, code.remove, "{{r}}", recursive=False)
+
+        code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}")
+        code2.remove(code2.get(0).params[0].value.get(0), recursive=True)
+        code2.remove("{{f}}", recursive=True)
+        self.assertEqual("{{a||{{c|d={{h}}}}}}", code2)
+        self.assertRaises(ValueError, code2.remove, "{{h}}", recursive=False)
 
     def test_filter_family(self):
         """test the Wikicode.i?filter() family of functions"""

From 17ac79e79660e3775e3e06dde254d122515a08da Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 11 May 2013 15:58:45 -0400
Subject: [PATCH 152/180] Build filter methods dynamically.

---
 mwparserfromhell/wikicode.py | 97 ++++++++++++++------------------------------
 1 file changed, 31 insertions(+), 66 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index b704590..4750094 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -23,7 +23,7 @@
 from __future__ import unicode_literals
 import re
 
-from .compat import maxsize, str
+from .compat import maxsize, py3k, str
 from .nodes import Heading, Node, Tag, Template, Text, Wikilink
 from .string_mixin import StringMixIn
 from .utils import parse_anything
@@ -291,46 +291,36 @@ class Wikicode(StringMixIn):
         *flags*. If *forcetype* is given, only nodes that are instances of this
         type are yielded.
         """
-        if recursive:
-            nodes = self._get_all_nodes(self)
-        else:
-            nodes = self.nodes
-        for node in nodes:
+        for node in (self._get_all_nodes(self) if recursive else self.nodes):
             if not forcetype or isinstance(node, forcetype):
                 if not matches or re.search(matches, str(node), flags):
                     yield node
 
-    def ifilter_links(self, recursive=False, matches=None, flags=FLAGS):
-        """Iterate over wikilink nodes.
-
-        This is equivalent to :py:meth:`ifilter` with *forcetype* set to
-        :py:class:`~.Wikilink`.
-        """
-        return self.ifilter(recursive, matches, flags, forcetype=Wikilink)
-
-    def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
-        """Iterate over template nodes.
-
-        This is equivalent to :py:meth:`ifilter` with *forcetype* set to
-        :py:class:`~.Template`.
-        """
-        return self.filter(recursive, matches, flags, forcetype=Template)
-
-    def ifilter_text(self, recursive=False, matches=None, flags=FLAGS):
-        """Iterate over text nodes.
-
-        This is equivalent to :py:meth:`ifilter` with *forcetype* set to
-        :py:class:`~.nodes.Text`.
+    @classmethod
+    def _build_filter_methods(cls, meths):
+        """Given a dict of Node types, build corresponding i?filter shortcuts.
+
+        The dict should be given as keys storing the method's base name paired
+        with values storing the corresponding :py:class:`~.Node` type. For
+        example, the dict may contain the pair ``("templates", Template)``,
+        which will produce the methods :py:meth:`ifilter_templates` and
+        :py:meth:`filter_templates`, which are shortcuts for
+        :py:meth:`ifilter(forcetype=Template) <ifilter>` and
+        :py:meth:`filter(forcetype=Template) <filter>`, respectively. These
+        shortcuts are added to the class itself, with an appropriate docstring.
         """
-        return self.filter(recursive, matches, flags, forcetype=Text)
-
-    def ifilter_tags(self, recursive=False, matches=None, flags=FLAGS):
-        """Iterate over tag nodes.
+        doc = """Iterate over {0}.
 
-        This is equivalent to :py:meth:`ifilter` with *forcetype* set to
-        :py:class:`~.Tag`.
+        This is equivalent to :py:meth:`{1}` with *forcetype* set to
+        :py:class:`~.{2}`.
         """
-        return self.ifilter(recursive, matches, flags, forcetype=Tag)
+        for name, forcetype in (meths.items() if py3k else meths.iteritems()):
+            ifil = lambda self, **kw: self.ifilter(forcetype=forcetype, **kw)
+            fil = lambda self, **kw: self.filter(forcetype=forcetype, **kw)
+            ifil.__doc__ = doc.format(name, "ifilter", forcetype)
+            fil.__doc__ = doc.format(name, "filter", forcetype)
+            setattr(cls, "ifilter_" + name, ifil)
+            setattr(cls, "filter_" + name, fil)
 
     def filter(self, recursive=False, matches=None, flags=FLAGS,
                forcetype=None):
@@ -340,38 +330,6 @@ class Wikicode(StringMixIn):
         """
         return list(self.ifilter(recursive, matches, flags, forcetype))
 
-    def filter_links(self, recursive=False, matches=None, flags=FLAGS):
-        """Return a list of wikilink nodes.
-
-        This is equivalent to calling :py:func:`list` on
-        :py:meth:`ifilter_links`.
-        """
-        return list(self.ifilter_links(recursive, matches, flags))
-
-    def filter_templates(self, recursive=False, matches=None, flags=FLAGS):
-        """Return a list of template nodes.
-
-        This is equivalent to calling :py:func:`list` on
-        :py:meth:`ifilter_templates`.
-        """
-        return list(self.ifilter_templates(recursive, matches, flags))
-
-    def filter_text(self, recursive=False, matches=None, flags=FLAGS):
-        """Return a list of text nodes.
-
-        This is equivalent to calling :py:func:`list` on
-        :py:meth:`ifilter_text`.
-        """
-        return list(self.ifilter_text(recursive, matches, flags))
-
-    def filter_tags(self, recursive=False, matches=None, flags=FLAGS):
-        """Return a list of tag nodes.
-
-        This is equivalent to calling :py:func:`list` on
-        :py:meth:`ifilter_tags`.
-        """
-        return list(self.ifilter_tags(recursive, matches, flags))
-
     def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS,
                      include_headings=True):
         """Return a list of sections within the page.
@@ -470,3 +428,10 @@ class Wikicode(StringMixIn):
         """
         marker = object()  # Random object we can find with certainty in a list
         return "\n".join(self._get_tree(self, [], marker, 0))
+
+Wikicode._build_filter_methods({
+    "links": Wikilink,
+    "templates": Template,
+    "text": Text,
+    "tag": Tag
+    })

From f700914caf895ff7a6ac628797e7a337ee53e4be Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 11 May 2013 19:21:24 -0400
Subject: [PATCH 153/180] Cleanup Wikicode's filter functions; implement
 test_filter_family().

---
 mwparserfromhell/wikicode.py | 69 +++++++++++++++++++++++---------------------
 tests/test_wikicode.py       | 65 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 99 insertions(+), 35 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 4750094..365eab7 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -24,7 +24,8 @@ from __future__ import unicode_literals
 import re
 
 from .compat import maxsize, py3k, str
-from .nodes import Heading, Node, Tag, Template, Text, Wikilink
+from .nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag,
+                    Template, Text, Wikilink)
 from .string_mixin import StringMixIn
 from .utils import parse_anything
 
@@ -151,6 +152,36 @@ class Wikicode(StringMixIn):
             node.__showtree__(write, get, mark)
         return lines
 
+    @classmethod
+    def _build_filter_methods(cls, **meths):
+        """Given Node types, build the corresponding i?filter shortcuts.
+
+        The should be given as keys storing the method's base name paired
+        with values storing the corresponding :py:class:`~.Node` type. For
+        example, the dict may contain the pair ``("templates", Template)``,
+        which will produce the methods :py:meth:`ifilter_templates` and
+        :py:meth:`filter_templates`, which are shortcuts for
+        :py:meth:`ifilter(forcetype=Template) <ifilter>` and
+        :py:meth:`filter(forcetype=Template) <filter>`, respectively. These
+        shortcuts are added to the class itself, with an appropriate docstring.
+        """
+        doc = """Iterate over {0}.
+
+        This is equivalent to :py:meth:`{1}` with *forcetype* set to
+        :py:class:`~.{2}`.
+        """
+        make_ifilter = lambda ftype: (lambda self, **kw:
+                                      self.ifilter(forcetype=ftype, **kw))
+        make_filter = lambda ftype: (lambda self, **kw:
+                                     self.filter(forcetype=ftype, **kw))
+        for name, ftype in (meths.items() if py3k else meths.iteritems()):
+            ifilter = make_ifilter(ftype)
+            filter = make_filter(ftype)
+            ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__)
+            filter.__doc__ = doc.format(name, "filter", ftype.__name__)
+            setattr(cls, "ifilter_" + name, ifilter)
+            setattr(cls, "filter_" + name, filter)
+
     @property
     def nodes(self):
         """A list of :py:class:`~.Node` objects.
@@ -296,32 +327,6 @@ class Wikicode(StringMixIn):
                 if not matches or re.search(matches, str(node), flags):
                     yield node
 
-    @classmethod
-    def _build_filter_methods(cls, meths):
-        """Given a dict of Node types, build corresponding i?filter shortcuts.
-
-        The dict should be given as keys storing the method's base name paired
-        with values storing the corresponding :py:class:`~.Node` type. For
-        example, the dict may contain the pair ``("templates", Template)``,
-        which will produce the methods :py:meth:`ifilter_templates` and
-        :py:meth:`filter_templates`, which are shortcuts for
-        :py:meth:`ifilter(forcetype=Template) <ifilter>` and
-        :py:meth:`filter(forcetype=Template) <filter>`, respectively. These
-        shortcuts are added to the class itself, with an appropriate docstring.
-        """
-        doc = """Iterate over {0}.
-
-        This is equivalent to :py:meth:`{1}` with *forcetype* set to
-        :py:class:`~.{2}`.
-        """
-        for name, forcetype in (meths.items() if py3k else meths.iteritems()):
-            ifil = lambda self, **kw: self.ifilter(forcetype=forcetype, **kw)
-            fil = lambda self, **kw: self.filter(forcetype=forcetype, **kw)
-            ifil.__doc__ = doc.format(name, "ifilter", forcetype)
-            fil.__doc__ = doc.format(name, "filter", forcetype)
-            setattr(cls, "ifilter_" + name, ifil)
-            setattr(cls, "filter_" + name, fil)
-
     def filter(self, recursive=False, matches=None, flags=FLAGS,
                forcetype=None):
         """Return a list of nodes within our list matching certain conditions.
@@ -429,9 +434,7 @@ class Wikicode(StringMixIn):
         marker = object()  # Random object we can find with certainty in a list
         return "\n".join(self._get_tree(self, [], marker, 0))
 
-Wikicode._build_filter_methods({
-    "links": Wikilink,
-    "templates": Template,
-    "text": Text,
-    "tag": Tag
-    })
+Wikicode._build_filter_methods(
+    arguments=Argument, comments=Comment, headings=Heading,
+    html_entities=HTMLEntity, tags=Tag, templates=Template, text=Text,
+    wikilinks=Wikilink)
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 179d588..69600c4 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -21,6 +21,8 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+import re
+from types import GeneratorType
 import unittest
 
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
@@ -210,7 +212,67 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_filter_family(self):
         """test the Wikicode.i?filter() family of functions"""
-        pass
+        def genlist(gen):
+            self.assertIsInstance(gen, GeneratorType)
+            return list(gen)
+        ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw)))
+
+        code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]")
+        for func in (code.filter, ifilter(code)):
+            self.assertEqual(["a", "{{b}}", "c", "[[d]]", "{{{e}}}", "{{f}}",
+                              "[[g]]"], func())
+            self.assertEqual(["{{{e}}}"], func(forcetype=Argument))
+            self.assertIs(code.get(4), func(forcetype=Argument)[0])
+            self.assertEqual(["a", "c"], func(forcetype=Text))
+            self.assertEqual([], func(forcetype=Heading))
+            self.assertRaises(TypeError, func, forcetype=True)
+
+        funcs = [
+            lambda name, **kw: getattr(code, "filter_" + name)(**kw),
+            lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw))
+        ]
+        for get_filter in funcs:
+            self.assertEqual(["{{{e}}}"], get_filter("arguments"))
+            self.assertIs(code.get(4), get_filter("arguments")[0])
+            self.assertEqual([], get_filter("comments"))
+            self.assertEqual([], get_filter("headings"))
+            self.assertEqual([], get_filter("html_entities"))
+            self.assertEqual([], get_filter("tags"))
+            self.assertEqual(["{{b}}", "{{f}}"], get_filter("templates"))
+            self.assertEqual(["a", "c"], get_filter("text"))
+            self.assertEqual(["[[d]]", "[[g]]"], get_filter("wikilinks"))
+
+        code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}")
+        for func in (code2.filter, ifilter(code2)):
+            self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"],
+                             func(recursive=False, forcetype=Template))
+            self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
+                              "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
+                             func(recursive=True, forcetype=Template))
+
+        code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}")
+        for func in (code3.filter, ifilter(code3)):
+            self.assertEqual(["{{foobar}}", "{{FOO}}"], func(matches=r"foo"))
+            self.assertEqual(["{{foobar}}", "{{FOO}}"],
+                             func(matches=r"^{{foo.*?}}"))
+            self.assertEqual(["{{foobar}}"],
+                             func(matches=r"^{{foo.*?}}", flags=re.UNICODE))
+            self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z"))
+            self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}"))
+
+        self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"],
+                         code2.filter_templates(recursive=False))
+        self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
+                          "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
+                         code2.filter_templates(recursive=True))
+        self.assertEqual(["{{baz}}", "{{bz}}"],
+                         code3.filter_templates(matches=r"^{{b.*?z"))
+        self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z"))
+        self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0))
+
+        self.assertRaises(TypeError, code.filter_templates, 100)
+        self.assertRaises(TypeError, code.filter_templates, a=42)
+        self.assertRaises(TypeError, code.filter_templates, forcetype=Template)
 
     def test_get_sections(self):
         """test Wikicode.get_sections()"""
@@ -224,6 +286,5 @@ class TestWikicode(TreeEqualityTestCase):
         """test Wikicode.get_tree()"""
         pass
 
-
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 0b56f2e2673339c2a096928168f64666b595ab53 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 13 May 2013 02:39:00 -0400
Subject: [PATCH 154/180] Improve Wikicode.get_sections(); implement
 test_get_tree(); part of test_get_sections()

---
 mwparserfromhell/wikicode.py | 32 +++++++++++++++++---------------
 tests/test_wikicode.py       | 44 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 365eab7..f258921 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -335,34 +335,36 @@ class Wikicode(StringMixIn):
         """
         return list(self.ifilter(recursive, matches, flags, forcetype))
 
-    def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS,
-                     include_headings=True):
+    def get_sections(self, levels=None, matches=None, flags=FLAGS,
+                     include_lead=True, include_headings=True):
         """Return a list of sections within the page.
 
         Sections are returned as :py:class:`~.Wikicode` objects with a shared
         node list (implemented using :py:class:`~.SmartList`) so that changes
         to sections are reflected in the parent Wikicode object.
 
-        With *flat* as ``True``, each returned section contains all of its
-        subsections within the :py:class:`~.Wikicode`; otherwise, the returned
-        sections contain only the section up to the next heading, regardless of
-        its size. If *matches* is given, it should be a regex to be matched
-        against the titles of section headings; only sections whose headings
-        match the regex will be included. If *levels* is given, it should be a
-        iterable of integers; only sections whose heading levels are within it
-        will be returned. If *include_headings* is ``True``, the section's
-        beginning :py:class:`~.Heading` object will be included in returned
-        :py:class:`~.Wikicode` objects; otherwise, this is skipped.
+        Each section contains all of its subsections. If *levels* is given, it
+        should be a iterable of integers; only sections whose heading levels
+        are within it will be returned.If *matches* is given, it should be a
+        regex to be matched against the titles of section headings; only
+        sections whose headings match the regex will be included. *flags* can
+        be used to override the default regex flags (see :py:meth:`ifilter`) if
+        *matches* is used.
+
+        If *include_lead* is ``True``, the first, lead section (without a
+        heading) will be included in the list. If *include_headings* is
+        ``True``, the section's beginning :py:class:`~.Heading` object will be
+        included; otherwise, this is skipped.
         """
         if matches:
             matches = r"^(=+?)\s*" + matches + r"\s*\1$"
-        headings = self.filter(recursive=True, matches=matches, flags=flags,
-                                forcetype=Heading)
+        headings = self.filter_headings(recursive=True, matches=matches,
+                                        flags=flags)
         if levels:
             headings = [head for head in headings if head.level in levels]
 
         sections = []
-        buffers = [(maxsize, 0)]
+        buffers = [(maxsize, 0)] if include_lead else []
         i = 0
         while i < len(self.nodes):
             if self.nodes[i] in headings:
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 69600c4..4aa07f1 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -26,11 +26,11 @@ from types import GeneratorType
 import unittest
 
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
-                                    Tag, Template, Text, Wikilink)
+                                    Node, Tag, Template, Text, Wikilink)
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 from mwparserfromhell import parse
-from mwparserfromhell.compat import str
+from mwparserfromhell.compat import py3k, str
 
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
@@ -276,7 +276,37 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_get_sections(self):
         """test Wikicode.get_sections()"""
-        pass
+        page1 = ""
+        page2 = "==Heading=="
+        page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
+        page4 = """
+This is a lead.
+== Section I ==
+Section I body. {{and a|template}}
+=== Section I.A ===
+Section I.A [[body]].
+=== Section I.B ===
+==== Section I.B.1 ====
+Section I.B.1 body.
+
+&bull;Some content.
+
+== Section II ==
+Section II body.
+
+== Section III ==
+=== Section III.A ===
+Text.
+===== Section III.A.1.a =====
+More text.
+==== Section III.A.2 ====
+Even more text.
+======= section III.A.2.a.i.1 =======
+An invalid section!"""
+
+        self.assertEqual([], parse(page1).get_sections())
+        self.assertEqual(["==Heading=="], parse(page2).get_sections())
+        self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections())
 
     def test_strip_code(self):
         """test Wikicode.strip_code()"""
@@ -284,7 +314,13 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_get_tree(self):
         """test Wikicode.get_tree()"""
-        pass
+        # Since individual nodes have test cases for their __showtree___
+        # methods, and the docstring covers all possibilities, this doesn't
+        # need to test anything other than it:
+        code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}")
+        expected = "Lorem ipsum \n{{\n\t  foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \
+                   "{{\n\t\t\tbaz\n\t  }}\n\t| spam\n\t= eggs\n}}"
+        self.assertEqual(expected.expandtabs(4), code.get_tree())
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 35acc1b812edf46bebcd19c753e170a288c20dc3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 13 May 2013 18:10:06 -0400
Subject: [PATCH 155/180] Fix a couple bugs.

---
 mwparserfromhell/wikicode.py | 2 +-
 tests/test_wikicode.py       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index f258921..1d5de5d 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -370,7 +370,7 @@ class Wikicode(StringMixIn):
             if self.nodes[i] in headings:
                 this = self.nodes[i].level
                 for (level, start) in buffers:
-                    if not flat or this <= level:
+                    if this <= level:
                         buffers.remove((level, start))
                         sections.append(Wikicode(self.nodes[start:i]))
                 buffers.append((this, i))
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 4aa07f1..1eacb11 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -305,8 +305,8 @@ Even more text.
 An invalid section!"""
 
         self.assertEqual([], parse(page1).get_sections())
-        self.assertEqual(["==Heading=="], parse(page2).get_sections())
-        self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections())
+        self.assertEqual(["", "==Heading=="], parse(page2).get_sections())
+        self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections())
 
     def test_strip_code(self):
         """test Wikicode.strip_code()"""

From 9ede1121ba4caa547a85a9e71eac4171f95eefa3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 13 May 2013 18:44:21 -0400
Subject: [PATCH 156/180] Fix tokenizer.c on Windows; add another template test
 (#25)

Mostly by @gdooms, with tweaks.
---
 mwparserfromhell/parser/tokenizer.c | 47 +++++++++++++++++++++++--------------
 mwparserfromhell/parser/tokenizer.h |  1 +
 tests/tokenizer/templates.mwtest    |  9 ++++++-
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 875263c..1fd4804 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -23,6 +23,11 @@ SOFTWARE.
 
 #include "tokenizer.h"
 
+double log2(double n)
+{
+    return log(n) / log(2);
+}
+
 static PyObject*
 Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
 {
@@ -52,8 +57,9 @@ Textbuffer_new(void)
 static void
 Tokenizer_dealloc(Tokenizer* self)
 {
-    Py_XDECREF(self->text);
     struct Stack *this = self->topstack, *next;
+    Py_XDECREF(self->text);
+
     while (this) {
         Py_DECREF(this->stack);
         Textbuffer_dealloc(this->textbuffer);
@@ -139,20 +145,21 @@ Textbuffer_render(struct Textbuffer* self)
 static int
 Tokenizer_push_textbuffer(Tokenizer* self)
 {
+    PyObject *text, *kwargs, *token;
     struct Textbuffer* buffer = self->topstack->textbuffer;
     if (buffer->size == 0 && !buffer->next)
         return 0;
-    PyObject* text = Textbuffer_render(buffer);
+    text = Textbuffer_render(buffer);
     if (!text)
         return -1;
-    PyObject* kwargs = PyDict_New();
+    kwargs = PyDict_New();
     if (!kwargs) {
         Py_DECREF(text);
         return -1;
     }
     PyDict_SetItemString(kwargs, "text", text);
     Py_DECREF(text);
-    PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
+    token = PyObject_Call(Text, NOARGS, kwargs);
     Py_DECREF(kwargs);
     if (!token)
         return -1;
@@ -185,9 +192,10 @@ Tokenizer_delete_top_of_stack(Tokenizer* self)
 static PyObject*
 Tokenizer_pop(Tokenizer* self)
 {
+    PyObject* stack;
     if (Tokenizer_push_textbuffer(self))
         return NULL;
-    PyObject* stack = self->topstack->stack;
+    stack = self->topstack->stack;
     Py_INCREF(stack);
     Tokenizer_delete_top_of_stack(self);
     return stack;
@@ -200,11 +208,13 @@ Tokenizer_pop(Tokenizer* self)
 static PyObject*
 Tokenizer_pop_keeping_context(Tokenizer* self)
 {
+    PyObject* stack;
+    int context;
     if (Tokenizer_push_textbuffer(self))
         return NULL;
-    PyObject* stack = self->topstack->stack;
+    stack = self->topstack->stack;
     Py_INCREF(stack);
-    int context = self->topstack->context;
+    context = self->topstack->context;
     Tokenizer_delete_top_of_stack(self);
     self->topstack->context = context;
     return stack;
@@ -376,9 +386,10 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
 static PyObject*
 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
 {
+    Py_ssize_t index;
     if (delta > self->head)
         return EMPTY;
-    Py_ssize_t index = self->head - delta;
+    index = self->head - delta;
     return PyList_GET_ITEM(self->text, index);
 }
 
@@ -392,7 +403,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
     PyObject *tokenlist;
 
     self->head += 2;
-    while (Tokenizer_READ(self, 0) == *"{") {
+    while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) {
         self->head++;
         braces++;
     }
@@ -423,8 +434,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
             if (Tokenizer_parse_template(self))
                 return -1;
             if (BAD_ROUTE) {
+                char text[MAX_BRACES];
                 RESET_ROUTE();
-                char text[braces + 1];
                 for (i = 0; i < braces; i++) text[i] = *"{";
                 text[braces] = *"";
                 if (Tokenizer_write_text_then_stack(self, text)) {
@@ -635,9 +646,10 @@ Tokenizer_handle_template_end(Tokenizer* self)
 static int
 Tokenizer_handle_argument_separator(Tokenizer* self)
 {
+    PyObject* token;
     self->topstack->context ^= LC_ARGUMENT_NAME;
     self->topstack->context |= LC_ARGUMENT_DEFAULT;
-    PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL);
+    token = PyObject_CallObject(ArgumentSeparator, NULL);
     if (!token)
         return -1;
     if (Tokenizer_write(self, token)) {
@@ -654,8 +666,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_argument_end(Tokenizer* self)
 {
-    self->head += 2;
     PyObject* stack = Tokenizer_pop(self);
+    self->head += 2;
     return stack;
 }
 
@@ -716,9 +728,10 @@ Tokenizer_parse_wikilink(Tokenizer* self)
 static int
 Tokenizer_handle_wikilink_separator(Tokenizer* self)
 {
+    PyObject* token;
     self->topstack->context ^= LC_WIKILINK_TITLE;
     self->topstack->context |= LC_WIKILINK_TEXT;
-    PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL);
+    token = PyObject_CallObject(WikilinkSeparator, NULL);
     if (!token)
         return -1;
     if (Tokenizer_write(self, token)) {
@@ -735,8 +748,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
 static PyObject*
 Tokenizer_handle_wikilink_end(Tokenizer* self)
 {
-    self->head += 1;
     PyObject* stack = Tokenizer_pop(self);
+    self->head += 1;
     return stack;
 }
 
@@ -1093,9 +1106,9 @@ Tokenizer_parse_comment(Tokenizer* self)
     self->head += 4;
     comment = Tokenizer_parse(self, LC_COMMENT);
     if (BAD_ROUTE) {
+        const char* text = "<!--";
         RESET_ROUTE();
         self->head = reset;
-        const char* text = "<!--";
         i = 0;
         while (1) {
             if (!text[i])
@@ -1359,10 +1372,10 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
     PyObject *text, *temp;
 
     if (!PyArg_ParseTuple(args, "U", &text)) {
-        /* Failed to parse a Unicode object; try a string instead. */
-        PyErr_Clear();
         const char* encoded;
         Py_ssize_t size;
+        /* Failed to parse a Unicode object; try a string instead. */
+        PyErr_Clear();
         if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
             return NULL;
         temp = PyUnicode_FromStringAndSize(encoded, size);
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 0730ea8..cdc0cca 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -48,6 +48,7 @@ static const char* MARKERS[] = {
 #define TEXTBUFFER_BLOCKSIZE 1024
 #define MAX_DEPTH 40
 #define MAX_CYCLES 100000
+#define MAX_BRACES 255
 #define MAX_ENTITY_SIZE 8
 
 static int route_state = 0;
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index cf41bb3..6f03f6d 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -607,8 +607,15 @@ output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|
 
 ---
 
+name:   recursion_five_hundred_opens
+label:  test potentially dangerous recursion: five hundred template openings, without spaces
+input:  "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{"
+output: [Text(text="{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{")]
+
+---
+
 name:   recursion_one_hundred_opens
-label:  test potentially dangerous recursion: one hundred template openings
+label:  test potentially dangerous recursion: one hundred template openings, with spaces
 input:  "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{"
 output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")]
 

From 496475c977e83d990d00b3b8dd056918dafb498a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Mon, 13 May 2013 18:52:32 -0400
Subject: [PATCH 157/180] Whoops, that should be one larger (#25).

---
 mwparserfromhell/parser/tokenizer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 1fd4804..df0882e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -434,7 +434,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
             if (Tokenizer_parse_template(self))
                 return -1;
             if (BAD_ROUTE) {
-                char text[MAX_BRACES];
+                char text[MAX_BRACES + 1];
                 RESET_ROUTE();
                 for (i = 0; i < braces; i++) text[i] = *"{";
                 text[braces] = *"";

From d5699f7d2347032b86bb44ba383ebcccae31bdfc Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 16 May 2013 21:33:00 -0400
Subject: [PATCH 158/180] Add note about vcvarsall.bat error (#26, closes #32)

---
 README.rst     | 22 ++++++++++++++--------
 docs/index.rst | 10 ++++++++--
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/README.rst b/README.rst
index 77262ca..9847c33 100644
--- a/README.rst
+++ b/README.rst
@@ -18,6 +18,11 @@ so you can install the latest release with ``pip install mwparserfromhell``
     cd mwparserfromhell
     python setup.py install
 
+If you get ``error: Unable to find vcvarsall.bat`` while installing, this is
+because Windows can't find the compiler for C extensions. Consult this
+`StackOverflow question`_ for help. You can also set ``ext_modules`` in
+``setup.py`` to an empty list to prevent the extension from building.
+
 You can run the comprehensive unit testing suite with
 ``python setup.py test -q``.
 
@@ -132,11 +137,12 @@ following code (via the API_)::
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
         return mwparserfromhell.parse(text)
 
-.. _MediaWiki:            http://mediawiki.org
-.. _Earwig:               http://en.wikipedia.org/wiki/User:The_Earwig
-.. _Σ:                    http://en.wikipedia.org/wiki/User:%CE%A3
-.. _Python Package Index: http://pypi.python.org
-.. _get pip:              http://pypi.python.org/pypi/pip
-.. _EarwigBot:            https://github.com/earwig/earwigbot
-.. _Pywikipedia:          https://www.mediawiki.org/wiki/Manual:Pywikipediabot
-.. _API:                  http://mediawiki.org/wiki/API
+.. _MediaWiki:              http://mediawiki.org
+.. _Earwig:                 http://en.wikipedia.org/wiki/User:The_Earwig
+.. _Σ:                      http://en.wikipedia.org/wiki/User:%CE%A3
+.. _Python Package Index:   http://pypi.python.org
+.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat
+.. _get pip:                http://pypi.python.org/pypi/pip
+.. _EarwigBot:              https://github.com/earwig/earwigbot
+.. _Pywikipedia:            https://www.mediawiki.org/wiki/Manual:Pywikipediabot
+.. _API:                    http://mediawiki.org/wiki/API
diff --git a/docs/index.rst b/docs/index.rst
index 24f42f2..4b4c392 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -22,10 +22,16 @@ so you can install the latest release with ``pip install mwparserfromhell``
     cd mwparserfromhell
     python setup.py install
 
+If you get ``error: Unable to find vcvarsall.bat`` while installing, this is
+because Windows can't find the compiler for C extensions. Consult this
+`StackOverflow question`_ for help. You can also set ``ext_modules`` in
+``setup.py`` to an empty list to prevent the extension from building.
+
 You can run the comprehensive unit testing suite with ``python setup.py test``.
 
-.. _Python Package Index: http://pypi.python.org
-.. _get pip:              http://pypi.python.org/pypi/pip
+.. _Python Package Index:   http://pypi.python.org
+.. _get pip:                http://pypi.python.org/pypi/pip
+.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat
 
 Contents
 --------

From 4ae10eccf510cd2494dcddc8330567972286da42 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 18:29:44 -0400
Subject: [PATCH 159/180] Fix bugs in get_sections(); finish
 test_get_sections()

---
 mwparserfromhell/wikicode.py | 31 +++++++++------
 tests/test_wikicode.py       | 90 +++++++++++++++++++++++++++++---------------
 2 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 1d5de5d..581707d 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -336,7 +336,7 @@ class Wikicode(StringMixIn):
         return list(self.ifilter(recursive, matches, flags, forcetype))
 
     def get_sections(self, levels=None, matches=None, flags=FLAGS,
-                     include_lead=True, include_headings=True):
+                     include_lead=None, include_headings=True):
         """Return a list of sections within the page.
 
         Sections are returned as :py:class:`~.Wikicode` objects with a shared
@@ -352,30 +352,39 @@ class Wikicode(StringMixIn):
         *matches* is used.
 
         If *include_lead* is ``True``, the first, lead section (without a
-        heading) will be included in the list. If *include_headings* is
-        ``True``, the section's beginning :py:class:`~.Heading` object will be
-        included; otherwise, this is skipped.
+        heading) will be included in the list; ``False`` will not include it;
+        the default will include it only if no specific *levels* were given. If
+        *include_headings* is ``True``, the section's beginning
+        :py:class:`~.Heading` object will be included; otherwise, this is
+        skipped.
         """
         if matches:
             matches = r"^(=+?)\s*" + matches + r"\s*\1$"
-        headings = self.filter_headings(recursive=True, matches=matches,
+        headings = self.filter_headings(recursive=True)
+        filtered = self.filter_headings(recursive=True, matches=matches,
                                         flags=flags)
         if levels:
-            headings = [head for head in headings if head.level in levels]
+            filtered = [head for head in filtered if head.level in levels]
 
+        if matches or include_lead is False or (not include_lead and levels):
+            buffers = []
+        else:
+            buffers = [(maxsize, 0)]
         sections = []
-        buffers = [(maxsize, 0)] if include_lead else []
         i = 0
         while i < len(self.nodes):
             if self.nodes[i] in headings:
                 this = self.nodes[i].level
                 for (level, start) in buffers:
                     if this <= level:
-                        buffers.remove((level, start))
                         sections.append(Wikicode(self.nodes[start:i]))
-                buffers.append((this, i))
-                if not include_headings:
-                    i += 1
+                buffers = [buf for buf in buffers if buf[0] < this]
+                if self.nodes[i] in filtered:
+                    if not include_headings:
+                        i += 1
+                        if i >= len(self.nodes):
+                            break
+                    buffers.append((this, i))
             i += 1
         for (level, start) in buffers:
             if start != i:
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 1eacb11..a6ad950 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -276,37 +276,65 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_get_sections(self):
         """test Wikicode.get_sections()"""
-        page1 = ""
-        page2 = "==Heading=="
-        page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
-        page4 = """
-This is a lead.
-== Section I ==
-Section I body. {{and a|template}}
-=== Section I.A ===
-Section I.A [[body]].
-=== Section I.B ===
-==== Section I.B.1 ====
-Section I.B.1 body.
-
-&bull;Some content.
-
-== Section II ==
-Section II body.
-
-== Section III ==
-=== Section III.A ===
-Text.
-===== Section III.A.1.a =====
-More text.
-==== Section III.A.2 ====
-Even more text.
-======= section III.A.2.a.i.1 =======
-An invalid section!"""
-
-        self.assertEqual([], parse(page1).get_sections())
-        self.assertEqual(["", "==Heading=="], parse(page2).get_sections())
-        self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections())
+        page1 = parse("")
+        page2 = parse("==Heading==")
+        page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n")
+
+        p4_lead = "This is a lead.\n"
+        p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n"
+        p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n&bull;Some content.\n\n"
+        p4_IB = "=== Section I.B ===\n" + p4_IB1
+        p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB
+        p4_II = "== Section II ==\nSection II body.\n\n"
+        p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n"
+        p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!"
+        p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1
+        p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2
+        p4_III = "== Section III ==\n" + p4_IIIA
+        page4 = parse(p4_lead + p4_I + p4_II + p4_III)
+
+        self.assertEqual([], page1.get_sections())
+        self.assertEqual(["", "==Heading=="], page2.get_sections())
+        self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n",
+                          "====Gnidaeh====\n"], page3.get_sections())
+        self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II,
+                          p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
+                         page4.get_sections())
+
+        self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4]))
+        self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"],
+                         page3.get_sections(levels=(2, 3)))
+        self.assertEqual([], page3.get_sections(levels=[0]))
+        self.assertEqual(["", "====Gnidaeh====\n"],
+                         page3.get_sections(levels=[4], include_lead=True))
+        self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n",
+                          "====Gnidaeh====\n"],
+                         page3.get_sections(include_lead=False))
+
+        self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4]))
+        self.assertEqual([""], page2.get_sections(include_headings=False))
+        self.assertEqual(["\nSection I.B.1 body.\n\n&bull;Some content.\n\n",
+                          "\nEven more text.\n" + p4_IIIA2ai1],
+                         page4.get_sections(levels=[4],
+                                            include_headings=False))
+
+        self.assertEqual([], page4.get_sections(matches=r"body"))
+        self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1],
+                         page4.get_sections(matches=r"Section\sI[.\s].*?"))
+        self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
+                         page4.get_sections(matches=r".*?a.*?"))
+        self.assertEqual([p4_IIIA1a, p4_IIIA2ai1],
+                         page4.get_sections(matches=r".*?a.*?", flags=re.U))
+        self.assertEqual(["\nMore text.\n", "\nAn invalid section!"],
+                         page4.get_sections(matches=r".*?a.*?", flags=re.U,
+                                            include_headings=False))
+
+        page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz")
+        section = page5.get_sections(matches="Foo")[0]
+        section.replace("\nBar\n", "\nBarf ")
+        section.append("{{Haha}}\n")
+        self.assertEqual("== Foo ==\nBarf {{Haha}}\n", section)
+        self.assertEqual("X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz", page5)
 
     def test_strip_code(self):
         """test Wikicode.strip_code()"""

From 2354fe13aa53503f72b820818d94432a32436270 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 18:42:19 -0400
Subject: [PATCH 160/180] Implement test_strip_code(); finish TestWikicode (#7)

---
 tests/test_wikicode.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index a6ad950..8dfa655 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -338,13 +338,23 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_strip_code(self):
         """test Wikicode.strip_code()"""
-        pass
+        # Since individual nodes have test cases for their __strip__ methods,
+        # we're only going to do an integration test:
+        code = parse("Foo [[bar]]\n\n{{baz}}\n\n[[a|b]] &Sigma;")
+        self.assertEqual("Foo bar\n\nb Σ",
+                         code.strip_code(normalize=True, collapse=True))
+        self.assertEqual("Foo bar\n\n\n\nb Σ",
+                         code.strip_code(normalize=True, collapse=False))
+        self.assertEqual("Foo bar\n\nb &Sigma;",
+                         code.strip_code(normalize=False, collapse=True))
+        self.assertEqual("Foo bar\n\n\n\nb &Sigma;",
+                         code.strip_code(normalize=False, collapse=False))
 
     def test_get_tree(self):
         """test Wikicode.get_tree()"""
         # Since individual nodes have test cases for their __showtree___
-        # methods, and the docstring covers all possibilities, this doesn't
-        # need to test anything other than it:
+        # methods, and the docstring covers all possibilities for the output of
+        # __showtree__, we'll test it only:
         code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}")
         expected = "Lorem ipsum \n{{\n\t  foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \
                    "{{\n\t\t\tbaz\n\t  }}\n\t| spam\n\t= eggs\n}}"

From a0fb8361eb051fc4de63710ac8516fe5b4019f3e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 19:05:27 -0400
Subject: [PATCH 161/180] Can specify specific tokenizer tests via the command
 line; add files

---
 tests/_test_tokenizer.py             | 23 +++++++++++++++++------
 tests/test_ctokenizer.py             |  9 +++++----
 tests/test_pytokenizer.py            |  9 +++++----
 tests/tokenizer/arguments.mwtest     |  4 ++++
 tests/tokenizer/comments.mwtest      |  0
 tests/tokenizer/headings.mwtest      |  0
 tests/tokenizer/html_entities.mwtest |  0
 tests/tokenizer/integration.mwtest   |  0
 tests/tokenizer/wikilinks.mwtest     |  0
 9 files changed, 31 insertions(+), 14 deletions(-)
 create mode 100644 tests/tokenizer/arguments.mwtest
 create mode 100644 tests/tokenizer/comments.mwtest
 create mode 100644 tests/tokenizer/headings.mwtest
 create mode 100644 tests/tokenizer/html_entities.mwtest
 create mode 100644 tests/tokenizer/integration.mwtest
 create mode 100644 tests/tokenizer/wikilinks.mwtest

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 379b4fa..d9badba 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -22,6 +22,7 @@
 
 from __future__ import print_function, unicode_literals
 from os import listdir, path
+import sys
 
 from mwparserfromhell.compat import py3k
 from mwparserfromhell.parser import tokens
@@ -107,15 +108,25 @@ class TokenizerTestCase(object):
     @classmethod
     def build(cls):
         """Load and install all tests from the 'tokenizer' directory."""
-        directory = path.join(path.dirname(__file__), "tokenizer")
-        extension = ".mwtest"
-        for filename in listdir(directory):
-            if not filename.endswith(extension):
-                continue
-            with open(path.join(directory, filename), "r") as fp:
+        def load_file(filename):
+            with open(filename, "r") as fp:
                 text = fp.read()
                 if not py3k:
                     text = text.decode("utf8")
                 cls._load_tests(filename[:0-len(extension)], text)
 
+        directory = path.join(path.dirname(__file__), "tokenizer")
+        extension = ".mwtest"
+        if len(sys.argv) > 1:  # Read specific tests from command line
+            for name in sys.argv[1:]:
+                load_file(path.join(directory, name + extension))
+            sys.argv = [sys.argv[0]]  # So unittest doesn't try to load these
+            cls.skip_others = True
+        else:
+            for filename in listdir(directory):
+                if not filename.endswith(extension):
+                    continue
+                load_file(path.join(directory, filename))
+            cls.skip_others = False
+
 TokenizerTestCase.build()
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 7a082e8..2374516 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -38,10 +38,11 @@ class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
     def setUpClass(cls):
         cls.tokenizer = CTokenizer
 
-    def test_uses_c(self):
-        """make sure the C tokenizer identifies as using a C extension"""
-        self.assertTrue(CTokenizer.USES_C)
-        self.assertTrue(CTokenizer().USES_C)
+    if not TokenizerTestCase.skip_others:
+        def test_uses_c(self):
+            """make sure the C tokenizer identifies as using a C extension"""
+            self.assertTrue(CTokenizer.USES_C)
+            self.assertTrue(CTokenizer().USES_C)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 697c7e5..0211e7f 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -34,10 +34,11 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
     def setUpClass(cls):
         cls.tokenizer = Tokenizer
 
-    def test_uses_c(self):
-        """make sure the Python tokenizer identifies as not using C"""
-        self.assertFalse(Tokenizer.USES_C)
-        self.assertFalse(Tokenizer().USES_C)
+    if not TokenizerTestCase.skip_others:
+        def test_uses_c(self):
+            """make sure the Python tokenizer identifies as not using C"""
+            self.assertFalse(Tokenizer.USES_C)
+            self.assertFalse(Tokenizer().USES_C)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/tokenizer/arguments.mwtest b/tests/tokenizer/arguments.mwtest
new file mode 100644
index 0000000..31d3e86
--- /dev/null
+++ b/tests/tokenizer/arguments.mwtest
@@ -0,0 +1,4 @@
+name:   no_params
+label:  simplest type of argument
+input:  "{{{argument}}}"
+output: [ArgumentOpen(), Text(text="argument"), ArgumentClose()]
diff --git a/tests/tokenizer/comments.mwtest b/tests/tokenizer/comments.mwtest
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tokenizer/headings.mwtest b/tests/tokenizer/headings.mwtest
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tokenizer/html_entities.mwtest b/tests/tokenizer/html_entities.mwtest
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest
new file mode 100644
index 0000000..e69de29

From db06eda8c0860dbb38b86a78a5492507f6b1a6a6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 19:52:37 -0400
Subject: [PATCH 162/180] Tokenizer tests for arguments.

Also add a couple for templates and one for integration.
---
 tests/tokenizer/arguments.mwtest   | 121 ++++++++++++++++++++++++++++++++++++-
 tests/tokenizer/integration.mwtest |   4 ++
 tests/tokenizer/templates.mwtest   |  14 +++++
 3 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/tests/tokenizer/arguments.mwtest b/tests/tokenizer/arguments.mwtest
index 31d3e86..e8d8864 100644
--- a/tests/tokenizer/arguments.mwtest
+++ b/tests/tokenizer/arguments.mwtest
@@ -1,4 +1,123 @@
-name:   no_params
+name:   blank
+label:  argument with no content
+input:  "{{{}}}"
+output: [ArgumentOpen(), ArgumentClose()]
+
+---
+
+name:   blank_with_default
+label:  argument with no content but a pipe
+input:  "{{{|}}}"
+output: [ArgumentOpen(), ArgumentSeparator(), ArgumentClose()]
+
+---
+
+name:   basic
 label:  simplest type of argument
 input:  "{{{argument}}}"
 output: [ArgumentOpen(), Text(text="argument"), ArgumentClose()]
+
+---
+
+name:   default
+label:  argument with a default value
+input:  "{{{foo|bar}}}"
+output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar"), ArgumentClose()]
+
+---
+
+name:   blank_with_multiple_defaults
+label:  no content, multiple pipes
+input:  "{{{|||}}}"
+output: [ArgumentOpen(), ArgumentSeparator(), Text(text="||"), ArgumentClose()]
+
+---
+
+name:   multiple_defaults
+label:  multiple values separated by pipes
+input:  "{{{foo|bar|baz}}}"
+output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar|baz"), ArgumentClose()]
+
+---
+
+name:   newline
+label:  newline as only content
+input:  "{{{\n}}}"
+output: [ArgumentOpen(), Text(text="\n"), ArgumentClose()]
+
+---
+
+name:   right_braces
+label:  multiple } scattered throughout text
+input:  "{{{foo}b}a}r}}}"
+output: [ArgumentOpen(), Text(text="foo}b}a}r"), ArgumentClose()]
+
+---
+
+name:   right_braces_default
+label:  multiple } scattered throughout text, with a default value
+input:  "{{{foo}b}|}a}r}}}"
+output: [ArgumentOpen(), Text(text="foo}b}"), ArgumentSeparator(), Text(text="}a}r"), ArgumentClose()]
+
+---
+
+name:   invalid_braces
+label:  invalid argument: multiple braces that are not part of a template or argument
+input:  "{{{foo{{[a}}}}}"
+output: [Text(text="{{{foo{{[a}}}}}")]
+
+---
+
+name:   incomplete_open_only
+label:  incomplete arguments: just an open
+input:  "{{{"
+output: [Text(text="{{{")]
+
+---
+
+name:   incomplete_open_text
+label:  incomplete arguments: an open with some text
+input:  "{{{foo"
+output: [Text(text="{{{foo")]
+
+---
+
+name:   incomplete_open_text_pipe
+label:  incomplete arguments: an open, text, then a pipe
+input:  "{{{foo|"
+output: [Text(text="{{{foo|")]
+
+---
+
+name:   incomplete_open_pipe
+label:  incomplete arguments: an open, then a pipe
+input:  "{{{|"
+output: [Text(text="{{{|")]
+
+---
+
+name:   incomplete_open_pipe_text
+label:  incomplete arguments: an open, then a pipe, then text
+input:  "{{{|foo"
+output: [Text(text="{{{|foo")]
+
+---
+
+name:   incomplete_open_pipes_text
+label:  incomplete arguments: a pipe, then text then two pipes
+input:  "{{{|f||"
+output: [Text(text="{{{|f||")]
+
+---
+
+name:   incomplete_open_partial_close
+label:  incomplete arguments: an open, then one right brace
+input:  "{{{{}"
+output: [Text(text="{{{{}")]
+
+---
+
+name:   incomplete_preserve_previous
+label:  incomplete arguments: a valid argument followed by an invalid one
+input:  "{{{foo}}} {{{bar"
+output: [ArgumentOpen(), Text(text="foo"), ArgumentClose(), Text(text=" {{{bar")]
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index e69de29..b12d25a 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -0,0 +1,4 @@
+name:   empty
+label:  sanity check that parsing an empty string yields nothing
+input:  ""
+output: []
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index 6f03f6d..78d7883 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -1,3 +1,17 @@
+name:   blank
+label:  template with no content
+input:  "{{}}"
+output: [TemplateOpen(), TemplateClose()]
+
+---
+
+name:   blank_with_params
+label:  template with no content, but pipes and equal signs
+input:  "{{||=|}}"
+output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()]
+
+---
+
 name:   no_params
 label:  simplest type of template
 input:  "{{template}}"

From ebaa0e60b68f6847c84692931e1529de84a6f4e2 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 20:07:20 -0400
Subject: [PATCH 163/180] Tokenizer tests for wikilinks.

Also add one for arguments.
---
 tests/tokenizer/arguments.mwtest |   7 ++
 tests/tokenizer/wikilinks.mwtest | 158 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+)

diff --git a/tests/tokenizer/arguments.mwtest b/tests/tokenizer/arguments.mwtest
index e8d8864..3270a96 100644
--- a/tests/tokenizer/arguments.mwtest
+++ b/tests/tokenizer/arguments.mwtest
@@ -61,6 +61,13 @@ output: [ArgumentOpen(), Text(text="foo}b}"), ArgumentSeparator(), Text(text="}a
 
 ---
 
+name:   nested
+label:  an argument nested within another argument
+input:  "{{{{{{foo}}}|{{{bar}}}}}}"
+output: [ArgumentOpen(), ArgumentOpen(), Text(text="foo"), ArgumentClose(), ArgumentSeparator(), ArgumentOpen(), Text(text="bar"), ArgumentClose(), ArgumentClose()]
+
+---
+
 name:   invalid_braces
 label:  invalid argument: multiple braces that are not part of a template or argument
 input:  "{{{foo{{[a}}}}}"
diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest
index e69de29..0682ef1 100644
--- a/tests/tokenizer/wikilinks.mwtest
+++ b/tests/tokenizer/wikilinks.mwtest
@@ -0,0 +1,158 @@
+name:   blank
+label:  wikilink with no content
+input:  "[[]]"
+output: [WikilinkOpen(), WikilinkClose()]
+
+---
+
+name:   blank_with_text
+label:  wikilink with no content but a pipe
+input:  "[[|]]"
+output: [WikilinkOpen(), WikilinkSeparator(), WikilinkClose()]
+
+---
+
+name:   basic
+label:  simplest type of wikilink
+input:  "[[wikilink]]"
+output: [WikilinkOpen(), Text(text="wikilink"), WikilinkClose()]
+
+---
+
+name:   with_text
+label:  wikilink with a text value
+input:  "[[foo|bar]]"
+output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar"), WikilinkClose()]
+
+---
+
+name:   blank_with_multiple_texts
+label:  no content, multiple pipes
+input:  "[[|||]]"
+output: [WikilinkOpen(), WikilinkSeparator(), Text(text="||"), WikilinkClose()]
+
+---
+
+name:   multiple_texts
+label:  multiple text values separated by pipes
+input:  "[[foo|bar|baz]]"
+output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar|baz"), WikilinkClose()]
+
+---
+
+name:   nested
+label:  a wikilink nested within the value of another
+input:  "[[foo|[[bar]]]]"
+output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()]
+
+---
+
+name:   nested_with_text
+label:  a wikilink nested within the value of another, separated by other data
+input:  "[[foo|a[[b]]c]]"
+output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()]
+
+---
+
+name:   invalid_newline
+label:  invalid wikilink: newline as only content
+input:  "[[\n]]"
+output: [Text(text="[[\n]]")]
+
+---
+
+name:   invalid_right_brace
+label:  invalid wikilink: right brace
+input:  "[[foo}b}a}r]]"
+output: [Text(text="[[foo}b}a}r]]")]
+
+---
+
+name:   invalid_left_brace
+label:  invalid wikilink: left brace
+input:  "[[foo{{[a}}]]"
+output: [Text(text="[[foo{{[a}}]]")]
+
+---
+
+name:   invalid_right_bracket
+label:  invalid wikilink: right bracket
+input:  "[[foo]bar]]"
+output: [Text(text="[[foo]bar]]")]
+
+---
+
+name:   invalid_left_bracket
+label:  invalid wikilink: left bracket
+input:  "[[foo[bar]]"
+output: [Text(text="[[foo[bar]]")]
+
+---
+
+name:   invalid_nested
+label:  invalid wikilink: trying to nest in the wrong context
+input:  "[[foo[[bar]]]]"
+output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")]
+
+---
+
+name:   invalid_nested_text
+label:  invalid wikilink: trying to nest in the wrong context, with a text param
+input:  "[[foo[[bar]]|baz]]"
+output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="|baz]]")]
+
+---
+
+name:   incomplete_open_only
+label:  incomplete wikilinks: just an open
+input:  "[["
+output: [Text(text="[[")]
+
+---
+
+name:   incomplete_open_text
+label:  incomplete wikilinks: an open with some text
+input:  "[[foo"
+output: [Text(text="[[foo")]
+
+---
+
+name:   incomplete_open_text_pipe
+label:  incomplete wikilinks: an open, text, then a pipe
+input:  "[[foo|"
+output: [Text(text="[[foo|")]
+
+---
+
+name:   incomplete_open_pipe
+label:  incomplete wikilinks: an open, then a pipe
+input:  "[[|"
+output: [Text(text="[[|")]
+
+---
+
+name:   incomplete_open_pipe_text
+label:  incomplete wikilinks: an open, then a pipe, then text
+input:  "[[|foo"
+output: [Text(text="[[|foo")]
+
+---
+
+name:   incomplete_open_pipes_text
+label:  incomplete wikilinks: a pipe, then text then two pipes
+input:  "[[|f||"
+output: [Text(text="[[|f||")]
+
+---
+
+name:   incomplete_open_partial_close
+label:  incomplete wikilinks: an open, then one right brace
+input:  "[[{}"
+output: [Text(text="[[{}")]
+
+---
+
+name:   incomplete_preserve_previous
+label:  incomplete wikilinks: a valid wikilink followed by an invalid one
+input:  "[[foo]] [[bar"
+output: [WikilinkOpen(), Text(text="foo"), WikilinkClose(), Text(text=" [[bar")]

From 9e527146c76fbf67d302c8d149c4dab9ed881e84 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 20:56:56 -0400
Subject: [PATCH 164/180] Tokenizer tests for comments.

---
 tests/tokenizer/comments.mwtest | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/tokenizer/comments.mwtest b/tests/tokenizer/comments.mwtest
index e69de29..0d7c0fa 100644
--- a/tests/tokenizer/comments.mwtest
+++ b/tests/tokenizer/comments.mwtest
@@ -0,0 +1,39 @@
+name:   blank
+label:  a blank comment
+input:  "<!---->"
+output: [CommentStart(), CommentEnd()]
+
+---
+
+name:   basic
+label:  a basic comment
+input:  "<!-- comment -->"
+output: [CommentStart(), Text(text=" comment "), CommentEnd()]
+
+---
+
+name:   tons_of_nonsense
+label:  a comment with tons of ignorable garbage in it
+input:  "<!-- foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aa<!--aa -->"
+output: [CommentStart(), Text(text=" foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aa<!--aa "), CommentEnd()]
+
+---
+
+name:   incomplete_blank
+label:  a comment that doesn't close
+input:  "<!--"
+output: [Text(text="<!--")]
+
+---
+
+name:   incomplete_text
+label:  a comment that doesn't close, with text
+input:  "<!-- foo"
+output: [Text(text="<!-- foo")]
+
+---
+
+name:   incomplete_partial_close
+label:  a comment that doesn't close, with a partial close
+input:  "<!-- foo --\x00>"
+output: [Text(text="<!-- foo --\x00>")]

From dd199c4389add25b7c2274d81dae3fb5457e64af Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 21:08:33 -0400
Subject: [PATCH 165/180] Fix tokenizer tests with setup.py test.

---
 tests/_test_tokenizer.py        | 11 ++++++-----
 tests/tokenizer/comments.mwtest |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index d9badba..382a9bf 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -58,7 +58,7 @@ class TokenizerTestCase(object):
         return inner
 
     @classmethod
-    def _load_tests(cls, filename, text):
+    def _load_tests(cls, filename, name, text):
         """Load all tests in *text* from the file *filename*."""
         tests = text.split("\n---\n")
         counter = 1
@@ -100,7 +100,7 @@ class TokenizerTestCase(object):
                 print(error.format(data["name"], filename))
                 continue
             number = str(counter).zfill(digits)
-            fname = "test_{0}{1}_{2}".format(filename, number, data["name"])
+            fname = "test_{0}{1}_{2}".format(name, number, data["name"])
             meth = cls._build_test_method(fname, data)
             setattr(cls, fname, meth)
             counter += 1
@@ -113,12 +113,13 @@ class TokenizerTestCase(object):
                 text = fp.read()
                 if not py3k:
                     text = text.decode("utf8")
-                cls._load_tests(filename[:0-len(extension)], text)
+                name = path.split(filename)[1][:0-len(extension)]
+                cls._load_tests(filename, name, text)
 
         directory = path.join(path.dirname(__file__), "tokenizer")
         extension = ".mwtest"
-        if len(sys.argv) > 1:  # Read specific tests from command line
-            for name in sys.argv[1:]:
+        if len(sys.argv) > 2 and sys.argv[1] == "--use":
+            for name in sys.argv[2:]:
                 load_file(path.join(directory, name + extension))
             sys.argv = [sys.argv[0]]  # So unittest doesn't try to load these
             cls.skip_others = True
diff --git a/tests/tokenizer/comments.mwtest b/tests/tokenizer/comments.mwtest
index 0d7c0fa..ea2e89f 100644
--- a/tests/tokenizer/comments.mwtest
+++ b/tests/tokenizer/comments.mwtest
@@ -35,5 +35,5 @@ output: [Text(text="<!-- foo")]
 
 name:   incomplete_partial_close
 label:  a comment that doesn't close, with a partial close
-input:  "<!-- foo --\x00>"
-output: [Text(text="<!-- foo --\x00>")]
+input:  "<!-- foo --\x01>"
+output: [Text(text="<!-- foo --\x01>")]

From 6070eff657f169541ae68b8035ff1da2f5d08bd0 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 23:06:48 -0400
Subject: [PATCH 166/180] Tokenizer tests for headings. Also, add an
 integration test.

---
 tests/tokenizer/headings.mwtest    | 109 +++++++++++++++++++++++++++++++++++++
 tests/tokenizer/integration.mwtest |   7 +++
 2 files changed, 116 insertions(+)

diff --git a/tests/tokenizer/headings.mwtest b/tests/tokenizer/headings.mwtest
index e69de29..5f22091 100644
--- a/tests/tokenizer/headings.mwtest
+++ b/tests/tokenizer/headings.mwtest
@@ -0,0 +1,109 @@
+name:   level_1
+label:  a basic level-1 heading
+input:  "= Heading ="
+output: [HeadingStart(level=1), Text(text=" Heading "), HeadingEnd()]
+
+---
+
+name:   level_2
+label:  a basic level-2 heading
+input:  "== Heading =="
+output: [HeadingStart(level=2), Text(text=" Heading "), HeadingEnd()]
+
+---
+
+name:   level_3
+label:  a basic level-3 heading
+input:  "=== Heading ==="
+output: [HeadingStart(level=3), Text(text=" Heading "), HeadingEnd()]
+
+---
+
+name:   level_4
+label:  a basic level-4 heading
+input:  "==== Heading ===="
+output: [HeadingStart(level=4), Text(text=" Heading "), HeadingEnd()]
+
+---
+
+name:   level_5
+label:  a basic level-5 heading
+input:  "===== Heading ====="
+output: [HeadingStart(level=5), Text(text=" Heading "), HeadingEnd()]
+
+---
+
+name:   level_6
+label:  a basic level-6 heading
+input:  "====== Heading ======"
+output: [HeadingStart(level=6), Text(text=" Heading "), HeadingEnd()]
+
+---
+
+name:   level_7
+label:  a level-6 heading that pretends to be a level-7 heading
+input:  "======= Heading ======="
+output: [HeadingStart(level=6), Text(text="= Heading ="), HeadingEnd()]
+
+---
+
+name:   level_3_2
+label:  a level-2 heading that pretends to be a level-3 heading
+input:  "=== Heading =="
+output: [HeadingStart(level=2), Text(text="= Heading "), HeadingEnd()]
+
+---
+
+name:   level_4_6
+label:  a level-4 heading that pretends to be a level-6 heading
+input:  "==== Heading ======"
+output: [HeadingStart(level=4), Text(text=" Heading =="), HeadingEnd()]
+
+---
+
+name:   newline_before
+label:  a heading that starts after a newline
+input:  "This is some text.\n== Foobar ==\nbaz"
+output: [Text(text="This is some text.\n"), HeadingStart(level=2), Text(text=" Foobar "), HeadingEnd(), Text(text="\nbaz")]
+
+---
+
+name:   text_after
+label:  text on the same line after
+input:  "This is some text.\n== Foobar == baz"
+output: [Text(text="This is some text.\n"), HeadingStart(level=2), Text(text=" Foobar "), HeadingEnd(), Text(text=" baz")]
+
+---
+
+name:   invalid_text_before
+label:  invalid headings: text on the same line before
+input:  "This is some text. == Foobar ==\nbaz"
+output: [Text(text="This is some text. == Foobar ==\nbaz")]
+
+---
+
+name:   invalid_newline_middle
+label:  invalid headings: newline in the middle
+input:  "This is some text.\n== Foo\nbar =="
+output: [Text(text="This is some text.\n== Foo\nbar ==")]
+
+---
+
+name:   invalid_newline_end
+label:  invalid headings: newline in the middle
+input:  "This is some text.\n=== Foo\n==="
+output: [Text(text="This is some text.\n=== Foo\n===")]
+
+---
+
+name:   invalid_nesting
+label:  invalid headings: attempts at nesting
+input:  "== Foo === Bar === Baz =="
+output: [HeadingStart(level=2), Text(text=" Foo === Bar === Baz "), HeadingEnd()]
+
+---
+
+name:   incomplete
+label:  a heading that starts but doesn't finish
+input:  "Foobar. \n== Heading "
+output: [Text(text="Foobar. \n== Heading ")]
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index b12d25a..4cca02d 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -2,3 +2,10 @@ name:   empty
 label:  sanity check that parsing an empty string yields nothing
 input:  ""
 output: []
+
+---
+
+name:   rich_heading
+label:  a heading with templates/wikilinks in it
+input:  "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} =="
+output: [HeadingStart(level=2), Text(text=" Head"), TemplateOpen(), Text(text="ing"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="with"), WikilinkClose(), Text(text=" "), ArgumentOpen(), Text(text="funky"), ArgumentSeparator(), TemplateOpen(), Text(text="stuf"), TemplateClose(), ArgumentClose(), Text(text=" "), HeadingEnd()]

From 6e2ca06ebef04c54828783df2c1ffcc28f85460e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 19 May 2013 00:09:46 -0400
Subject: [PATCH 167/180] Tokenizer tests for HTML entities.

---
 tests/tokenizer/html_entities.mwtest | 144 +++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/tests/tokenizer/html_entities.mwtest b/tests/tokenizer/html_entities.mwtest
index e69de29..625dd60 100644
--- a/tests/tokenizer/html_entities.mwtest
+++ b/tests/tokenizer/html_entities.mwtest
@@ -0,0 +1,144 @@
+name:   named
+label:  a basic named HTML entity
+input:  "&nbsp;"
+output: [HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_decimal
+label:  a basic decimal HTML entity
+input:  "&#107;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="107"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_hexadecimal_x
+label:  a basic hexadecimal HTML entity, using 'x' as a signal
+input:  "&#x6B;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="6B"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_hexadecimal_X
+label:  a basic hexadecimal HTML entity, using 'X' as a signal
+input:  "&#X6B;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="X"), Text(text="6B"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_decimal_max
+label:  the maximum acceptable decimal numeric entity
+input:  "&#1114111;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="1114111"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_hex_max
+label:  the maximum acceptable hexadecimal numeric entity
+input:  "&#x10FFFF;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="10FFFF"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_zeros
+label:  zeros accepted at the beginning of a numeric entity
+input:  "&#0000000107;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="0000000107"), HTMLEntityEnd()]
+
+---
+
+name:   numeric_hex_zeros
+label:  zeros accepted at the beginning of a hex numeric entity
+input:  "&#x0000000107;"
+output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="0000000107"), HTMLEntityEnd()]
+
+---
+
+name:   invalid_named_too_long
+label:  a named entity that is too long
+input:  "&sigmaSigma;"
+output: [Text(text="&sigmaSigma;")]
+
+---
+
+name:   invalid_named_undefined
+label:  a named entity that doesn't exist
+input:  "&foobar;"
+output: [Text(text="&foobar;")]
+
+---
+
+name:   invalid_named_nonascii
+label:  a named entity with non-ASCII characters
+input:  "&sígma;"
+output: [Text(text="&sígma;")]
+
+---
+
+name:   invalid_numeric_out_of_range_1
+label:  a numeric entity that is out of range: < 1
+input:  "&#0;"
+output: [Text(text="&#0;")]
+
+---
+
+name:   invalid_numeric_out_of_range_2
+label:  a hex numeric entity that is out of range: < 1
+input:  "&#x0;"
+output: [Text(text="&#x0;")]
+
+---
+
+name:   invalid_numeric_out_of_range_3
+label:  a numeric entity that is out of range: > 0x10FFFF
+input:  "&#1114112;"
+output: [Text(text="&#1114112;")]
+
+---
+
+name:   invalid_numeric_out_of_range_4
+label:  a hex numeric entity that is out of range: > 0x10FFFF
+input:  "&#x0110000;"
+output: [Text(text="&#x0110000;")]
+
+---
+
+name:   invalid_partial_amp
+label:  invalid entities: just an ampersand
+input:  "&"
+output: [Text(text="&")]
+
+---
+
+name:   invalid_partial_amp_semicolon
+label:  invalid entities: an ampersand and semicolon
+input:  "&;"
+output: [Text(text="&;")]
+
+---
+
+name:   invalid_partial_amp_pound_semicolon
+label:  invalid entities: an ampersand, pound sign, and semicolon
+input:  "&#;"
+output: [Text(text="&#;")]
+
+---
+
+name:   invalid_partial_amp_pound_x_semicolon
+label:  invalid entities: an ampersand, pound sign, x, and semicolon
+input:  "&#x;"
+output: [Text(text="&#x;")]
+
+---
+
+name:   invalid_partial_amp_pound_numbers
+label:  invalid entities: an ampersand, pound sign, numbers
+input:  "&#123"
+output: [Text(text="&#123")]
+
+---
+
+name:   invalid_partial_amp_pound_x_semicolon
+label:  invalid entities: an ampersand, pound sign, and x
+input:  "&#x"
+output: [Text(text="&#x")]

From ec6ba67dbe94646cdc1739f0c4be36b614826da3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 19 May 2013 00:33:19 -0400
Subject: [PATCH 168/180] Tokenizer integration tests; finish unit test
 coverage (closes #7)

---
 tests/tokenizer/integration.mwtest | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index 4cca02d..d3cb419 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -5,7 +5,42 @@ output: []
 
 ---
 
+name:   template_argument_mix
+label:  an ambiguous mix of templates and arguments
+input:  "{{{{{{{{foo}}}}}}}}{{{{{{{bar}}baz}}}buz}}"
+output: [TemplateOpen(), ArgumentOpen(), ArgumentOpen(), Text(text="foo"), ArgumentClose(), ArgumentClose(), TemplateClose(), TemplateOpen(), ArgumentOpen(), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), ArgumentClose(), Text(text="buz"), TemplateClose()]
+
+---
+
 name:   rich_heading
 label:  a heading with templates/wikilinks in it
 input:  "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} =="
 output: [HeadingStart(level=2), Text(text=" Head"), TemplateOpen(), Text(text="ing"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="with"), WikilinkClose(), Text(text=" "), ArgumentOpen(), Text(text="funky"), ArgumentSeparator(), TemplateOpen(), Text(text="stuf"), TemplateClose(), ArgumentClose(), Text(text=" "), HeadingEnd()]
+
+---
+
+name:   html_entity_with_template
+label:  a HTML entity with a template embedded inside
+input:  "&n{{bs}}p;"
+output: [Text(text="&n"), TemplateOpen(), Text(text="bs"), TemplateClose(), Text(text="p;")]
+
+---
+
+name:   html_entity_with_comment
+label:  a HTML entity with a comment embedded inside
+input:  "&n<!--foo-->bsp;"
+output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(text="bsp;")]
+
+---
+
+name:   wildcard
+label:  a wildcard assortment of various things
+input:  "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
+output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]
+
+---
+
+name:   wildcard_redux
+label:  an even wilder assortment of various things
+input:  "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j=&nbsp;}}"
+output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()]

From 22e869b1429dabd30976e4bdb8b819ed240c3f29 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 19 May 2013 01:45:09 -0400
Subject: [PATCH 169/180] Fix a failing HTML entity test in the C tokenizer.

Remove some extraneous whitespace in string_mixin.py.
---
 mwparserfromhell/parser/tokenizer.c | 19 +++++++++++++++++--
 mwparserfromhell/string_mixin.py    |  1 -
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index df0882e..939f30c 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -911,8 +911,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
 {
     PyObject *token, *kwargs, *textobj;
     Py_UNICODE this;
-    int numeric, hexadecimal, i, j, test;
-    char *valid, *text, *def;
+    int numeric, hexadecimal, i, j, zeroes, test;
+    char *valid, *text, *buffer, *def;
 
     #define FAIL_ROUTE_AND_EXIT() { \
         Tokenizer_fail_route(self); \
@@ -984,6 +984,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
         return -1;
     }
     i = 0;
+    zeroes = 0;
     while (1) {
         this = Tokenizer_READ(self, 0);
         if (this == *";") {
@@ -992,6 +993,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             break;
         }
         if (i == 0 && this == *"0") {
+            zeroes++;
             self->head++;
             continue;
         }
@@ -1029,6 +1031,19 @@ Tokenizer_really_parse_entity(Tokenizer* self)
             i++;
         }
     }
+    if (zeroes) {
+        buffer = calloc(strlen(text) + zeroes + 1, sizeof(char));
+        if (!buffer) {
+            free(text);
+            PyErr_NoMemory();
+            return -1;
+        }
+        for (i = 0; i < zeroes; i++)
+            strcat(buffer, "0");
+        strcat(buffer, text);
+        free(text);
+        text = buffer;
+    }
     textobj = PyUnicode_FromString(text);
     if (!textobj) {
         free(text);
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 6bee9c4..89c1bc0 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -40,7 +40,6 @@ def inheritdoc(method):
     method.__doc__ = getattr(str, method.__name__).__doc__
     return method
 
-
 class StringMixIn(object):
     """Implement the interface for ``unicode``/``str`` in a dynamic manner.
 

From ac9b64bf60741232c9d1f2210d287f2e1d481e80 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 1 Jun 2013 15:35:40 -0400
Subject: [PATCH 170/180] Travis integration (closes #33)

---
 .travis.yml | 6 ++++++
 README.rst  | 2 ++
 2 files changed, 8 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..71b8eb6
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,6 @@
+language: python
+python:
+    - "2.7"
+    - "3.3"
+install:
+script:   python setup.py test -q
diff --git a/README.rst b/README.rst
index 9847c33..0be8fd5 100644
--- a/README.rst
+++ b/README.rst
@@ -1,6 +1,8 @@
 mwparserfromhell
 ================
 
+[![Build Status](https://secure.travis-ci.org/earwig/mwparserfromhell.png?branch=develop)](http://travis-ci.org/earwig/mwparserfromhell)
+
 **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package
 that provides an easy-to-use and outrageously powerful parser for MediaWiki_
 wikicode. It supports Python 2 and Python 3.

From b2f0f23ea45cb79967e11acc1a14ba4418411913 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 1 Jun 2013 15:48:37 -0400
Subject: [PATCH 171/180] Fix README.

---
 .travis.yml | 2 +-
 README.rst  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 71b8eb6..7a9920d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,5 +2,5 @@ language: python
 python:
     - "2.7"
     - "3.3"
-install:
+install:  python setup.py build
 script:   python setup.py test -q
diff --git a/README.rst b/README.rst
index 0be8fd5..267f7ea 100644
--- a/README.rst
+++ b/README.rst
@@ -1,7 +1,9 @@
 mwparserfromhell
 ================
 
-[![Build Status](https://secure.travis-ci.org/earwig/mwparserfromhell.png?branch=develop)](http://travis-ci.org/earwig/mwparserfromhell)
+..image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop
+  :alt: Build Status
+  :target: http://travis-ci.org/earwig/mwparserfromhell
 
 **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package
 that provides an easy-to-use and outrageously powerful parser for MediaWiki_

From ce252f69ecb49864b0c4ec98e9b13ca55ce896b8 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 1 Jun 2013 16:00:40 -0400
Subject: [PATCH 172/180] Missed a space.

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 267f7ea..77c01eb 100644
--- a/README.rst
+++ b/README.rst
@@ -1,7 +1,7 @@
 mwparserfromhell
 ================
 
-..image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop
+.. image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop
   :alt: Build Status
   :target: http://travis-ci.org/earwig/mwparserfromhell
 

From 81954c50acd88aba523c5064e63a8316692997fb Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 19 Jun 2013 00:22:30 -0400
Subject: [PATCH 173/180] Removing a useless, skipped test.

---
 tests/test_builder.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/test_builder.py b/tests/test_builder.py
index 903d144..2d44b6c 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -190,11 +190,6 @@ class TestBuilder(TreeEqualityTestCase):
         for test, valid in tests:
             self.assertWikicodeEqual(valid, self.builder.build(test))
 
-    @unittest.skip("holding this until feature/html_tags is ready")
-    def test_tag(self):
-        """tests for building Tag nodes"""
-        pass
-
     def test_integration(self):
         """a test for building a combination of templates together"""
         # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}

From 3fb8f3214c91bcd63b5fe4e3a0206a05f8038c39 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 19 Jun 2013 00:39:46 -0400
Subject: [PATCH 174/180] Fix StringMixIn.maketrans() on Py3k.

- Make a test in Py3k actually use StringMixIn instead of str.
- Minor cosmetic fix.
---
 mwparserfromhell/nodes/template.py | 2 +-
 mwparserfromhell/string_mixin.py   | 8 ++++----
 tests/test_string_mixin.py         | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 3834d41..6dfc4f0 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -293,7 +293,7 @@ class Template(Node):
         """
         name = name.strip() if isinstance(name, basestring) else str(name)
         removed = False
-        to_remove =[]
+        to_remove = []
         for i, param in enumerate(self.params):
             if param.name.strip() == name:
                 if keep_field:
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 89c1bc0..a406401 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -253,12 +253,12 @@ class StringMixIn(object):
     if py3k:
         @staticmethod
         @inheritdoc
-        def maketrans(self, x, y=None, z=None):
+        def maketrans(x, y=None, z=None):
             if z is None:
                 if y is None:
-                    return self.__unicode__.maketrans(x)
-                return self.__unicode__.maketrans(x, y)
-            return self.__unicode__.maketrans(x, y, z)
+                    return str.maketrans(x)
+                return str.maketrans(x, y)
+            return str.maketrans(x, y, z)
 
     @inheritdoc
     def partition(self, sep):
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 306f2fd..b829bb2 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -414,10 +414,10 @@ class TestStringMixIn(unittest.TestCase):
         self.assertEqual("Fake String", str1.title())
 
         if py3k:
-            table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4",
-                                    117: "5"})
-            table2 = str.maketrans("aeiou", "12345")
-            table3 = str.maketrans("aeiou", "12345", "rts")
+            table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3",
+                                            111: "4", 117: "5"})
+            table2 = StringMixIn.maketrans("aeiou", "12345")
+            table3 = StringMixIn.maketrans("aeiou", "12345", "rts")
             self.assertEqual("f1k2 str3ng", str1.translate(table1))
             self.assertEqual("f1k2 str3ng", str1.translate(table2))
             self.assertEqual("f1k2 3ng", str1.translate(table3))

From 22d7995d9b6c47407e0f130df8146debe03c6066 Mon Sep 17 00:00:00 2001
From: Ben <ben.kurtovic@verizon.net>
Date: Wed, 19 Jun 2013 21:07:41 -0400
Subject: [PATCH 175/180] Fix newline behavior when loading test files on
 Windows.

---
 tests/_test_tokenizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 382a9bf..c1d49cb 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -109,7 +109,7 @@ class TokenizerTestCase(object):
     def build(cls):
         """Load and install all tests from the 'tokenizer' directory."""
         def load_file(filename):
-            with open(filename, "r") as fp:
+            with open(filename, "rU") as fp:
                 text = fp.read()
                 if not py3k:
                     text = text.decode("utf8")

From 25a9f4fe327d5fc95a5b1fb8302934a2b1d03294 Mon Sep 17 00:00:00 2001
From: Ben <ben.kurtovic@verizon.net>
Date: Wed, 19 Jun 2013 21:08:34 -0400
Subject: [PATCH 176/180] Add .dll to .gitignore for builds on Windows.

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index ec4e8ca..4068716 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.pyc
 *.so
+*.dll
 *.egg
 *.egg-info
 .DS_Store

From a68946757758a7c6936dbe8c8c9295ef263ca97d Mon Sep 17 00:00:00 2001
From: Ben <ben.kurtovic@verizon.net>
Date: Thu, 20 Jun 2013 16:17:39 -0400
Subject: [PATCH 177/180] Replace broken log2 function; add a missing comment.

---
 mwparserfromhell/parser/tokenizer.c | 16 +++++++++++++---
 mwparserfromhell/parser/tokenizer.h |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 939f30c..df65d0e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -23,9 +23,16 @@ SOFTWARE.
 
 #include "tokenizer.h"
 
-double log2(double n)
+/*
+    Given a context, return the heading level encoded within it.
+*/
+static int heading_level_from_context(int n)
 {
-    return log(n) / log(2);
+    int level;
+    n /= LC_HEADING_LEVEL_1;
+    for (level = 1; n > 1; n >>= 1)
+        level++;
+    return level;
 }
 
 static PyObject*
@@ -175,6 +182,9 @@ Tokenizer_push_textbuffer(Tokenizer* self)
     return 0;
 }
 
+/*
+    Pop and deallocate the top token stack/context/textbuffer.
+*/
 static void
 Tokenizer_delete_top_of_stack(Tokenizer* self)
 {
@@ -858,7 +868,7 @@ Tokenizer_handle_heading_end(Tokenizer* self)
         best++;
         self->head++;
     }
-    current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
+    current = heading_level_from_context(self->topstack->context);
     level = current > best ? (best > 6 ? 6 : best) :
                              (current > 6 ? 6 : current);
     after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index cdc0cca..1f58c49 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -181,6 +181,7 @@ typedef struct {
 
 /* Function prototypes: */
 
+static int heading_level_from_context(int);
 static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
 static struct Textbuffer* Textbuffer_new(void);
 static void Tokenizer_dealloc(Tokenizer*);

From 72473b433a8219c28245c0d560e9bb30f4df30de Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 20 Jun 2013 17:47:13 -0400
Subject: [PATCH 178/180] Adding a changelog (closes #23)

---
 CHANGELOG          | 33 +++++++++++++++++++++++++++++++
 docs/changelog.rst | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 docs/index.rst     |  1 +
 3 files changed, 92 insertions(+)
 create mode 100644 CHANGELOG
 create mode 100644 docs/changelog.rst

diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644
index 0000000..9772f8b
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,33 @@
+v0.1.1 (19da4d2144) to v0.2:
+
+- The parser now fully supports Python 3 in addition to Python 2.7.
+- Added a C tokenizer extension that is significantly faster than its Python
+  equivalent. It is enabled by default (if available) and can be toggled by
+  setting `mwparserfromhell.parser.use_c` to a boolean value.
+- Added a complete set of unit tests covering parsing and wikicode
+  manipulation.
+- Renamed Wikicode.filter_links() to filter_wikilinks() (applies to ifilter as
+  well).
+- Added filter methods for Arguments, Comments, Headings, and HTMLEntities.
+- Added 'before' param to Template.add(); renamed 'force_nonconformity' to
+  'preserve_spacing'.
+- Added 'include_lead' param to Wikicode.get_sections().
+- Removed 'flat' param from Wikicode.get_sections().
+- Removed 'force_no_field' param from Template.remove().
+- Added support for Travis CI.
+- Added note about Windows build issue in the README.
+- The tokenizer will limit itself to a realistic recursion depth to prevent
+  errors and unreasonably long parse times.
+- Fixed how some nodes' attribute setters handle input.
+- Fixed multiple bugs in the tokenizer's handling of invalid markup.
+- Fixed bugs in the implementation of SmartList and StringMixIn.
+- Fixed some broken example code in the README; other copyedits.
+- Other bugfixes and code cleanup.
+
+v0.1 (ba94938fe8) to v0.1.1 (19da4d2144):
+
+- Added support for Comments (<!-- foo -->) and Wikilinks ([[foo]]).
+- Added corresponding ifilter_links() and filter_links() methods to Wikicode.
+- Fixed a bug when parsing incomplete templates.
+- Fixed strip_code() to affect the contents of headings.
+- Various copyedits in documentation and comments.
diff --git a/docs/changelog.rst b/docs/changelog.rst
new file mode 100644
index 0000000..0e8bbef
--- /dev/null
+++ b/docs/changelog.rst
@@ -0,0 +1,58 @@
+Changelog
+=========
+
+v0.2
+----
+
+19da4d2144_ to master_ (released June 20, 2013)
+
+- The parser now fully supports Python 3 in addition to Python 2.7.
+- Added a C tokenizer extension that is significantly faster than its Python
+  equivalent. It is enabled by default (if available) and can be toggled by
+  setting :py:attr:`mwparserfromhell.parser.use_c` to a boolean value.
+- Added a complete set of unit tests covering parsing and wikicode
+  manipulation.
+- Renamed :py:meth:`.filter_links` to :py:meth:`.filter_wikilinks` (applies to
+  :py:meth:`.ifilter` as well).
+- Added filter methods for :py:class:`Arguments <.Argument>`,
+  :py:class:`Comments <.Comment>`, :py:class:`Headings <.Heading>`, and
+  :py:class:`HTMLEntities <.HTMLEntity>`.
+- Added *before* param to :py:meth:`Template.add() <.Template.add>`; renamed
+  *force_nonconformity* to *preserve_spacing*.
+- Added *include_lead* param to :py:meth:`Wikicode.get_sections()
+  <.get_sections>`.
+- Removed *flat* param from :py:meth:`.get_sections`.
+- Removed *force_no_field* param from :py:meth:`Template.remove()
+  <.Template.remove>`.
+- Added support for Travis CI.
+- Added note about Windows build issue in the README.
+- The tokenizer will limit itself to a realistic recursion depth to prevent
+  errors and unreasonably long parse times.
+- Fixed how some nodes' attribute setters handle input.
+- Fixed multiple bugs in the tokenizer's handling of invalid markup.
+- Fixed bugs in the implementation of :py:class:`.SmartList` and
+  :py:class:`.StringMixIn`.
+- Fixed some broken example code in the README; other copyedits.
+- Other bugfixes and code cleanup.
+
+v0.1.1
+------
+
+ba94938fe8_ to 19da4d2144_ (released September 21, 2012)
+
+- Added support for :py:class:`Comments <.Comment>` (``<!-- foo -->``) and
+  :py:class:`Wikilinks <.Wikilink>` (``[[foo]]``).
+- Added corresponding :py:meth:`.ifilter_links` and :py:meth:`.filter_links`
+  methods to :py:class:`.Wikicode`.
+- Fixed a bug when parsing incomplete templates.
+- Fixed :py:meth:`.strip_code` to affect the contents of headings.
+- Various copyedits in documentation and comments.
+
+v0.1
+----
+
+ba94938fe8_ (released August 23, 2012)
+
+.. _master:     https://github.com/earwig/mwparserfromhell/tree/v0.2
+.. _19da4d2144: https://github.com/earwig/mwparserfromhell/tree/v0.1.1
+.. _ba94938fe8: https://github.com/earwig/mwparserfromhell/tree/v0.1
diff --git a/docs/index.rst b/docs/index.rst
index 4b4c392..4355b61 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -41,6 +41,7 @@ Contents
 
    usage
    integration
+   changelog
    API Reference <api/modules>
 
 

From bbaf09dbf8fc2795c424f0934e4dce9924edb009 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 20 Jun 2013 18:07:41 -0400
Subject: [PATCH 179/180] Fix docstrings of generated filter methods.

---
 mwparserfromhell/wikicode.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 581707d..4ec889e 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -168,7 +168,7 @@ class Wikicode(StringMixIn):
         doc = """Iterate over {0}.
 
         This is equivalent to :py:meth:`{1}` with *forcetype* set to
-        :py:class:`~.{2}`.
+        :py:class:`~{2.__module__}.{2.__name__}`.
         """
         make_ifilter = lambda ftype: (lambda self, **kw:
                                       self.ifilter(forcetype=ftype, **kw))
@@ -177,8 +177,8 @@ class Wikicode(StringMixIn):
         for name, ftype in (meths.items() if py3k else meths.iteritems()):
             ifilter = make_ifilter(ftype)
             filter = make_filter(ftype)
-            ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__)
-            filter.__doc__ = doc.format(name, "filter", ftype.__name__)
+            ifilter.__doc__ = doc.format(name, "ifilter", ftype)
+            filter.__doc__ = doc.format(name, "filter", ftype)
             setattr(cls, "ifilter_" + name, ifilter)
             setattr(cls, "filter_" + name, filter)
 

From edf6a3a8a6ad4c31cf8649a273b4e4d0e275003a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Thu, 20 Jun 2013 18:13:52 -0400
Subject: [PATCH 180/180] release/0.2

---
 mwparserfromhell/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 99bc0c2..5db2d4c 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -31,7 +31,7 @@ from __future__ import unicode_literals
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.2.dev"
+__version__ = "0.2"
 __email__ = "ben.kurtovic@verizon.net"
 
 from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode