From 99cf0a4412139abf3255ca3fe5123aca3c7c14b4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Tue, 22 Apr 2014 16:44:14 -0400
Subject: [PATCH 001/102] Version bump to 0.4.dev.

---
 CHANGELOG                    | 4 ++++
 docs/changelog.rst           | 8 ++++++++
 mwparserfromhell/__init__.py | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 9faf6b7..6be48c6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+v0.4 (unreleased):
+
+-
+
 v0.3.3 (released April 22, 2014):
 
 - Added support for Python 2.6 and 3.4.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 9efc022..3f2ba0e 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,14 @@
 Changelog
 =========
 
+v0.4
+----
+
+Unreleased
+(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.3...develop>`__):
+
+-
+
 v0.3.3
 ------
 
diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 469e9a6..e7459e3 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -31,7 +31,7 @@ from __future__ import unicode_literals
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.3.3"
+__version__ = "0.4.dev"
 __email__ = "ben.kurtovic@gmail.com"
 
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,

From 2fe8826a9dec3d1015ff7a69857fb282617d3a45 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Tue, 22 Apr 2014 18:31:00 -0400
Subject: [PATCH 002/102] Added a script to test for memory leaks in
 scripts/memtest.py.

---
 CHANGELOG          |   2 +-
 docs/changelog.rst |   2 +-
 scripts/memtest.py | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 scripts/memtest.py

diff --git a/CHANGELOG b/CHANGELOG
index 6be48c6..564b09c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,6 @@
 v0.4 (unreleased):
 
--
+- Added a script to test for memory leaks in scripts/memtest.py.
 
 v0.3.3 (released April 22, 2014):
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3f2ba0e..5a59be0 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,7 +7,7 @@ v0.4
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.3...develop>`__):
 
--
+- Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 
 v0.3.3
 ------
diff --git a/scripts/memtest.py b/scripts/memtest.py
new file mode 100644
index 0000000..e6b8011
--- /dev/null
+++ b/scripts/memtest.py
@@ -0,0 +1,170 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+Tests for memory leaks in the CTokenizer. Python 2 and 3 compatible.
+
+This appears to work mostly fine under Linux, but gives an absurd number of
+false positives on OS X. I'm not sure why. Running the tests multiple times
+yields different results (tests don't always leak, and the amount they leak by
+varies). Increasing the number of loops results in a smaller bytes/loop value,
+too, indicating the increase in memory usage might be due to something else.
+Actual memory leaks typically leak very large amounts of memory (megabytes)
+and scale with the number of loops.
+"""
+
+from __future__ import unicode_literals, print_function
+from locale import LC_ALL, setlocale
+from multiprocessing import Process, Pipe
+from os import listdir, path
+import sys
+
+import psutil
+
+from mwparserfromhell.compat import py3k
+from mwparserfromhell.parser._tokenizer import CTokenizer
+
+if sys.version_info[0] == 2:
+    range = xrange
+
+LOOPS = 10000
+
+class Color(object):
+    GRAY = "\x1b[30;1m"
+    GREEN = "\x1b[92m"
+    YELLOW = "\x1b[93m"
+    RESET = "\x1b[0m"
+
+
+class MemoryTest(object):
+    """Manages a memory test."""
+
+    def __init__(self):
+        self._tests = []
+        self._load()
+
+    def _parse_file(self, name, text):
+        tests = text.split("\n---\n")
+        counter = 1
+        digits = len(str(len(tests)))
+        for test in tests:
+            data = {"name": None, "label": None, "input": None, "output": None}
+            for line in test.strip().splitlines():
+                if line.startswith("name:"):
+                    data["name"] = line[len("name:"):].strip()
+                elif line.startswith("label:"):
+                    data["label"] = line[len("label:"):].strip()
+                elif line.startswith("input:"):
+                    raw = line[len("input:"):].strip()
+                    if raw[0] == '"' and raw[-1] == '"':
+                        raw = raw[1:-1]
+                    raw = raw.encode("raw_unicode_escape")
+                    data["input"] = raw.decode("unicode_escape")
+            number = str(counter).zfill(digits)
+            fname = "test_{0}{1}_{2}".format(name, number, data["name"])
+            self._tests.append((fname, data["input"]))
+            counter += 1
+
+    def _load(self):
+        def load_file(filename):
+            with open(filename, "rU") as fp:
+                text = fp.read()
+                if not py3k:
+                    text = text.decode("utf8")
+                name = path.split(filename)[1][:0-len(extension)]
+                self._parse_file(name, text)
+
+        root = path.split(path.dirname(path.abspath(__file__)))[0]
+        directory = path.join(root, "tests", "tokenizer")
+        extension = ".mwtest"
+        if len(sys.argv) > 2 and sys.argv[1] == "--use":
+            for name in sys.argv[2:]:
+                load_file(path.join(directory, name + extension))
+            sys.argv = [sys.argv[0]]  # So unittest doesn't try to load these
+        else:
+            for filename in listdir(directory):
+                if not filename.endswith(extension):
+                    continue
+                load_file(path.join(directory, filename))
+
+    @staticmethod
+    def _print_results(info1, info2):
+        r1, r2 = info1.rss, info2.rss
+        buff = 8192
+        if r2 - buff > r1:
+            d = r2 - r1
+            p = float(d) / r1
+            bpt = d // LOOPS
+            tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)"
+            sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt))
+        else:
+            sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET))
+
+    def run(self):
+        """Run the memory test suite."""
+        width = 1
+        for (name, _) in self._tests:
+            if len(name) > width:
+                width = len(name)
+
+        tmpl = "{0}[{1:03}/{2}]{3} {4}: "
+        for i, (name, text) in enumerate(self._tests, 1):
+            sys.stdout.write(tmpl.format(Color.GRAY, i, len(self._tests),
+                                         Color.RESET, name.ljust(width)))
+            sys.stdout.flush()
+            parent, child = Pipe()
+            p = Process(target=_runner, args=(text, child))
+            p.start()
+            try:
+                proc = psutil.Process(p.pid)
+                parent.recv()
+                parent.send("OK")
+                parent.recv()
+                info1 = proc.get_memory_info()
+                sys.stdout.flush()
+                parent.send("OK")
+                parent.recv()
+                info2 = proc.get_memory_info()
+                self._print_results(info1, info2)
+                sys.stdout.flush()
+                parent.send("OK")
+            finally:
+                proc.kill()
+                print()
+
+
+def _runner(text, child):
+    r1, r2 = range(250), range(LOOPS)
+    for i in r1:
+        CTokenizer().tokenize(text)
+    child.send("OK")
+    child.recv()
+    child.send("OK")
+    child.recv()
+    for i in r2:
+        CTokenizer().tokenize(text)
+    child.send("OK")
+    child.recv()
+
+if __name__ == "__main__":
+    setlocale(LC_ALL, "")
+    MemoryTest().run()

From 5d08e9e316f826e3b1c52e6424b583b6e4dd41c9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Tue, 22 Apr 2014 22:35:45 -0400
Subject: [PATCH 003/102] Created a release script; added a MANIFEST.in file.

---
 CHANGELOG          |   1 +
 MANIFEST.in        |   2 +
 docs/changelog.rst |   1 +
 scripts/release.sh | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 setup.py           |   3 +-
 5 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 MANIFEST.in
 create mode 100755 scripts/release.sh

diff --git a/CHANGELOG b/CHANGELOG
index 564b09c..98a1f96 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
 v0.4 (unreleased):
 
 - Added a script to test for memory leaks in scripts/memtest.py.
+- Added a script to do releases in scripts/release.sh.
 
 v0.3.3 (released April 22, 2014):
 
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..27e8a54
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include LICENSE CHANGELOG
+recursive-include tests *.py *.mwtest
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 5a59be0..3ce507e 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -8,6 +8,7 @@ Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.3...develop>`__):
 
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
+- Added a script to do releases in :file:`scripts/release.sh`.
 
 v0.3.3
 ------
diff --git a/scripts/release.sh b/scripts/release.sh
new file mode 100755
index 0000000..c10871d
--- /dev/null
+++ b/scripts/release.sh
@@ -0,0 +1,155 @@
+#! /usr/bin/env bash
+
+if [[ -z "$1" ]]; then
+    echo "usage: $0 1.2.3"
+    exit 1
+fi
+
+VERSION=$1
+SCRIPT_DIR=$(dirname "$0")
+RELEASE_DATE=$(date +"%B %d, %Y")
+
+check_git() {
+    if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then
+        echo "Aborting: dirty working directory."
+        exit 1
+    fi
+    if [[ "$(git rev-parse --abbrev-ref HEAD)" != "develop" ]]; then
+        echo "Aborting: not on develop."
+        exit 1
+    fi
+    echo -n "Are you absolutely ready to release? [yN] "
+    read confirm
+    if [[ ${confirm,,} != "y" ]]; then
+        exit 1
+    fi
+}
+
+update_version() {
+    echo -n "Updating mwparserfromhell.__version__..."
+    sed -e 's/__version__ = .*/__version__ = "'$VERSION'"/' -i "" mwparserfromhell/__init__.py
+    echo " done."
+}
+
+update_changelog() {
+    filename="CHANGELOG"
+    echo -n "Updating $filename..."
+    sed -e '1s/.*/v'$VERSION' (released '$RELEASE_DATE'):/' -i "" $filename
+    echo " done."
+}
+
+update_docs_changelog() {
+    filename="docs/changelog.rst"
+    echo -n "Updating $filename..."
+    dashes=$(seq 1 $(expr ${#VERSION} + 1) | sed 's/.*/-/' | tr -d '\n')
+    previous_lineno=$(expr $(grep -n -e "^---" $filename | sed '2q;d' | cut -d ':' -f 1) - 1)
+    previous_version=$(sed $previous_lineno'q;d' $filename)
+    sed \
+        -e '4s/.*/v'$VERSION \
+        -e '5s/.*/'$dashes \
+        -e '7s/.*/`Released '$RELEASE_DATE' <https:\/\/github.com\/earwig\/mwparserfromhell\/tree\/v'$VERSION'>`_/' \
+        -e '8s/.*/(`changes <https:\/\/github.com\/earwig\/mwparserfromhell\/compare\/v'$previous_version'...v'$VERSION'>`__):/' \
+        -i "" $filename
+    echo " done."
+}
+
+do_git_stuff() {
+    echo -n "Git: committing, tagging, and merging release..."
+    git commit -qam "release/$VERSION"
+    git tag v$VERSION -s -m "version $VERSION"
+    git checkout -q master
+    git merge -q --no-ff develop -m "Merge branch 'develop'"
+    echo -n " pushing..."
+    git push -q --tags origin master
+    git checkout -q develop
+    git push -q origin develop
+    echo " done."
+}
+
+build_sdist() {
+    echo -n "Uploading to PyPI..."
+    python setup.py register sdist upload -s
+    python setup.py upload_docs
+    echo " done."
+}
+
+post_release() {
+    echo
+    echo "*** Release completed."
+    echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION"
+    echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell"
+    echo "*** Verify: https://mwparserfromhell.readthedocs.org"
+    echo "*** Press enter to sanity-check the release."
+    read
+}
+
+test_release() {
+    echo
+    echo "Checking mwparserfromhell v$VERSION..."
+    echo -n "Creating a virtualenv..."
+    virtdir="mwparser-test-env"
+    virtualenv -q $virtdir
+    cd $virtdir
+    source bin/activate
+    echo " done."
+    echo -n "Installing mwparserfromhell with pip..."
+    pip -q install mwparserfromhell
+    echo " done."
+    echo -n "Checking version..."
+    reported_version=$(python -c 'print __import__("mwparserfromhell").__version__')
+    if [[ "$reported_version" != "$VERSION" ]]; then
+        echo " error."
+        echo "*** ERROR: mwparserfromhell is reporting its version as $reported_version, not $VERSION!"
+        deactivate
+        cd ..
+        rm -rf $virtdir
+        exit 1
+    else
+        echo " done."
+    fi
+    pip -q uninstall -y mwparserfromhell
+    echo -n "Downloading mwparserfromhell source tarball and GPG signature..."
+    curl -sL "https://pypi.python.org/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz" -o "mwparserfromhell.tar.gz"
+    curl -sL "https://pypi.python.org/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz.asc" -o "mwparserfromhell.tar.gz.asc"
+    echo " done."
+    echo "Verifying tarball..."
+    gpg --verify mwparserfromhell.tar.gz.asc
+    if [[ "$?" != "0" ]]; then
+        echo "*** ERROR: GPG signature verification failed!"
+        deactivate
+        cd ..
+        rm -rf $virtdir
+        exit 1
+    fi
+    tar -xf mwparserfromhell.tar.gz
+    rm mwparserfromhell.tar.gz mwparserfromhell.tar.gz.asc
+    cd mwparserfromhell-$VERSION
+    echo "Running unit tests..."
+    python setup.py -q test
+    if [[ "$?" != "0" ]]; then
+        echo "*** ERROR: Unit tests failed!"
+        deactivate
+        cd ../..
+        rm -rf $virtdir
+        exit 1
+    fi
+    echo -n "Everything looks good. Cleaning up..."
+    deactivate
+    cd ../..
+    rm -rf $virtdir
+    echo " done."
+}
+
+echo "Preparing mwparserfromhell v$VERSION..."
+cd "$SCRIPT_DIR/.."
+
+check_git
+update_version
+update_changelog
+update_docs_changelog
+do_git_stuff
+post_release
+test_release
+
+echo "All done."
+exit 0
diff --git a/setup.py b/setup.py
index 5a45902..6dbe783 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,8 @@ with open("README.rst") as fp:
     long_docs = fp.read()
 
 tokenizer = Extension("mwparserfromhell.parser._tokenizer",
-                      sources = ["mwparserfromhell/parser/tokenizer.c"])
+                      sources = ["mwparserfromhell/parser/tokenizer.c"],
+                      depends = ["mwparserfromhell/parser/tokenizer.h"])
 
 setup(
     name = "mwparserfromhell",

From d342831af8fd976bee6793fde6c3a781bc9fbb46 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 22 May 2014 21:53:33 -0400
Subject: [PATCH 004/102] Allow passing skip_style_tags to parse() (fixes #73)

---
 mwparserfromhell/parser/__init__.py |  2 +-
 mwparserfromhell/utils.py           | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 093e501..67f6eeb 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -57,7 +57,7 @@ class Parser(object):
         """Parse *text*, returning a :py:class:`~.Wikicode` object tree.
 
         If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
-        parsed, but instead be treated as plain text.
+        parsed, but instead will be treated as plain text.
         """
         tokens = self._tokenizer.tokenize(text, context, skip_style_tags)
         code = self._builder.build(tokens)
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index 486170d..c6fd627 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -33,7 +33,7 @@ from .smart_list import SmartList
 
 __all__ = ["parse_anything"]
 
-def parse_anything(value, context=0):
+def parse_anything(value, context=0, skip_style_tags=False):
     """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.
 
     This differs from :py:meth:`.Parser.parse` in that we accept more than just
@@ -50,6 +50,9 @@ def parse_anything(value, context=0):
     For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url`
     setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to
     prevent the URL itself from becoming an :py:class:`~.ExternalLink`.
+
+    If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
+    parsed, but instead will be treated as plain text.
     """
     from .parser import Parser
     from .wikicode import Wikicode
@@ -59,17 +62,17 @@ def parse_anything(value, context=0):
     elif isinstance(value, Node):
         return Wikicode(SmartList([value]))
     elif isinstance(value, str):
-        return Parser().parse(value, context)
+        return Parser().parse(value, context, skip_style_tags)
     elif isinstance(value, bytes):
-        return Parser().parse(value.decode("utf8"), context)
+        return Parser().parse(value.decode("utf8"), context, skip_style_tags)
     elif isinstance(value, int):
-        return Parser().parse(str(value), context)
+        return Parser().parse(str(value), context, skip_style_tags)
     elif value is None:
         return Wikicode(SmartList())
     try:
         nodelist = SmartList()
         for item in value:
-            nodelist += parse_anything(item, context).nodes
+            nodelist += parse_anything(item, context, skip_style_tags).nodes
     except TypeError:
         error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}"
         raise ValueError(error.format(type(value).__name__, value))

From 9108d49d68e6024c751e723fcf8e118cdee77af0 Mon Sep 17 00:00:00 2001
From: Ricordisamoa <ricordisamoa@live.it>
Date: Sat, 24 May 2014 03:08:06 +0200
Subject: [PATCH 005/102] =?UTF-8?q?fix=20some=20typos:=20occurance=20?=
 =?UTF-8?q?=E2=86=92=20occurrence,=20parasable=20=E2=86=92=20parsable?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mwparserfromhell/nodes/tag.py      |  2 +-
 mwparserfromhell/nodes/template.py |  6 +++---
 mwparserfromhell/wikicode.py       | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index 661304e..f283d46 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -240,7 +240,7 @@ class Tag(Node):
             pad_before_eq="", pad_after_eq=""):
         """Add an attribute with the given *name* and *value*.
 
-        *name* and *value* can be anything parasable by
+        *name* and *value* can be anything parsable by
         :py:func:`.utils.parse_anything`; *value* can be omitted if the
         attribute is valueless. *quoted* is a bool telling whether to wrap the
         *value* in double quotes (this is recommended). *pad_first*,
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index d1a0b0e..3b5b35c 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -95,7 +95,7 @@ class Template(Node):
     def _select_theory(self, theories):
         """Return the most likely spacing convention given different options.
 
-        Given a dictionary of convention options as keys and their occurance as
+        Given a dictionary of convention options as keys and their occurrence as
         values, return the convention that occurs the most, or ``None`` if
         there is no clear preferred style.
         """
@@ -208,7 +208,7 @@ class Template(Node):
             preserve_spacing=True):
         """Add a parameter to the template with a given *name* and *value*.
 
-        *name* and *value* can be anything parasable by
+        *name* and *value* can be anything parsable by
         :py:func:`.utils.parse_anything`; pipes and equal signs are
         automatically escaped from *value* when appropriate.
 
@@ -226,7 +226,7 @@ class Template(Node):
         name), then we will place the parameter immediately before this one.
         Otherwise, it will be added at the end. If *before* is a name and
         exists multiple times in the template, we will place it before the last
-        occurance. If *before* is not in the template, :py:exc:`ValueError` is
+        occurrence. If *before* is not in the template, :py:exc:`ValueError` is
         raised. The argument is ignored if the new parameter already exists.
 
         If *preserve_spacing* is ``False``, we will avoid preserving spacing
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 44515a6..f728248 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -294,7 +294,7 @@ class Wikicode(StringMixIn):
     def insert(self, index, value):
         """Insert *value* at *index* in the list of nodes.
 
-        *value* can be anything parasable by :py:func:`.parse_anything`, which
+        *value* can be anything parsable by :py:func:`.parse_anything`, which
         includes strings or other :py:class:`~.Wikicode` or :py:class:`~.Node`
         objects.
         """
@@ -309,7 +309,7 @@ class Wikicode(StringMixIn):
         :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
         for example). If *obj* is a string, we will operate on all instances
         of that string within the code, otherwise only on the specific instance
-        given. *value* can be anything parasable by :py:func:`.parse_anything`.
+        given. *value* can be anything parsable by :py:func:`.parse_anything`.
         If *recursive* is ``True``, we will try to find *obj* within our child
         nodes even if it is not a direct descendant of this
         :py:class:`~.Wikicode` object. If *obj* is not found,
@@ -333,7 +333,7 @@ class Wikicode(StringMixIn):
         :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
         for example). If *obj* is a string, we will operate on all instances
         of that string within the code, otherwise only on the specific instance
-        given. *value* can be anything parasable by :py:func:`.parse_anything`.
+        given. *value* can be anything parsable by :py:func:`.parse_anything`.
         If *recursive* is ``True``, we will try to find *obj* within our child
         nodes even if it is not a direct descendant of this
         :py:class:`~.Wikicode` object. If *obj* is not found,
@@ -357,7 +357,7 @@ class Wikicode(StringMixIn):
         :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
         for example). If *obj* is a string, we will operate on all instances
         of that string within the code, otherwise only on the specific instance
-        given. *value* can be anything parasable by :py:func:`.parse_anything`.
+        given. *value* can be anything parsable by :py:func:`.parse_anything`.
         If *recursive* is ``True``, we will try to find *obj* within our child
         nodes even if it is not a direct descendant of this
         :py:class:`~.Wikicode` object. If *obj* is not found,
@@ -380,7 +380,7 @@ class Wikicode(StringMixIn):
     def append(self, value):
         """Insert *value* at the end of the list of nodes.
 
-        *value* can be anything parasable by :py:func:`.parse_anything`.
+        *value* can be anything parsable by :py:func:`.parse_anything`.
         """
         nodes = parse_anything(value).nodes
         for node in nodes:

From 0497b54f03072effb42ac81dd9e1480042c03c76 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 24 May 2014 21:13:26 -0400
Subject: [PATCH 006/102] Fix _handle_single_tag_end()'s token search order
 (fixes #74)

---
 mwparserfromhell/parser/tokenizer.c  | 2 +-
 mwparserfromhell/parser/tokenizer.py | 8 +++++---
 tests/tokenizer/tags.mwtest          | 7 +++++++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index de58e72..d8a505f 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1899,7 +1899,7 @@ static PyObject* Tokenizer_handle_single_tag_end(Tokenizer* self)
     int is_instance;
 
     len = PyList_GET_SIZE(self->topstack->stack);
-    for (index = 0; index < len; index++) {
+    for (index = len - 1; index >= 0; index--) {
         token = PyList_GET_ITEM(self->topstack->stack, index);
         is_instance = PyObject_IsInstance(token, TagCloseOpen);
         if (is_instance == -1)
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 29a7e25..93d53e7 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -21,6 +21,7 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+from itertools import izip
 from math import log
 import re
 
@@ -751,11 +752,12 @@ class Tokenizer(object):
 
     def _handle_single_tag_end(self):
         """Handle the stream end when inside a single-supporting HTML tag."""
-        gen = enumerate(self._stack)
+        stack = self._stack
+        gen = izip(xrange(len(stack) - 1, -1, -1), reversed(stack))
         index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen))
-        padding = self._stack[index].padding
+        padding = stack[index].padding
         token = tokens.TagCloseSelfclose(padding=padding, implicit=True)
-        self._stack[index] = token
+        stack[index] = token
         return self._pop()
 
     def _really_parse_tag(self):
diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest
index a8ca2f0..26e569b 100644
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -124,6 +124,13 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
 
 ---
 
+name:   nested_tag_selfclosing
+label:  a tag nested within the attributes of another; outer tag implicitly self-closing
+input:  "<li <b></b></li>"
+output: [TagOpenOpen(), Text(text="li"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TagOpenOpen(), Text(text="b"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="</li"), TagCloseSelfclose(padding="", implicit=True)]
+
+---
+
 name:   invalid_space_begin_open
 label:  invalid tag: a space at the beginning of the open tag
 input:  "< ref>test</ref>"

From 0101c038fbc6e9048256d165e488ddf9218a4660 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 24 May 2014 21:21:47 -0400
Subject: [PATCH 007/102] Python 3, grr.

---
 mwparserfromhell/compat.py           | 2 ++
 mwparserfromhell/parser/tokenizer.py | 5 ++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 4384ace..94e0db3 100644
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -20,6 +20,7 @@ if py3k:
     range = range
     maxsize = sys.maxsize
     import html.entities as htmlentities
+    zip = zip
 
 else:
     bytes = str
@@ -27,5 +28,6 @@ else:
     range = xrange
     maxsize = sys.maxint
     import htmlentitydefs as htmlentities
+    from itertools import izip as zip
 
 del sys
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 93d53e7..33722fa 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -21,12 +21,11 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
-from itertools import izip
 from math import log
 import re
 
 from . import contexts, tokens
-from ..compat import htmlentities, range
+from ..compat import htmlentities, range, zip
 from ..definitions import (get_html_tag, is_parsable, is_single,
                            is_single_only, is_scheme)
 
@@ -753,7 +752,7 @@ class Tokenizer(object):
     def _handle_single_tag_end(self):
         """Handle the stream end when inside a single-supporting HTML tag."""
         stack = self._stack
-        gen = izip(xrange(len(stack) - 1, -1, -1), reversed(stack))
+        gen = zip(range(len(stack) - 1, -1, -1), reversed(stack))
         index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen))
         padding = stack[index].padding
         token = tokens.TagCloseSelfclose(padding=padding, implicit=True)

From b4b62026f810b4eacfd13c5e503d757b96bcdb8a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 25 May 2014 15:50:12 -0400
Subject: [PATCH 008/102] Update changelog, docs.

---
 CHANGELOG                           | 4 ++++
 docs/changelog.rst                  | 4 ++++
 mwparserfromhell/parser/__init__.py | 7 +++++++
 mwparserfromhell/utils.py           | 9 +--------
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 98a1f96..289c413 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,10 @@ v0.4 (unreleased):
 
 - Added a script to test for memory leaks in scripts/memtest.py.
 - Added a script to do releases in scripts/release.sh.
+- skip_style_tags can now be passed to mwparserfromhell.parse() (previously,
+  only Parser().parse() allowed it).
+- Fixed a parser bug involving nested tags.
+- Updated and fixed some documentation.
 
 v0.3.3 (released April 22, 2014):
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3ce507e..21f0629 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,6 +9,10 @@ Unreleased
 
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.
+- *skip_style_tags* can now be passed to :py:func:`mwparserfromhell.parse()
+  <.parse_anything>` (previously, only :py:meth:`.Parser.parse` allowed it).
+- Fixed a parser bug involving nested tags.
+- Updated and fixed some documentation.
 
 v0.3.3
 ------
diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 67f6eeb..8bac295 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -56,6 +56,13 @@ class Parser(object):
     def parse(self, text, context=0, skip_style_tags=False):
         """Parse *text*, returning a :py:class:`~.Wikicode` object tree.
 
+        If given, *context* will be passed as a starting context to the parser.
+        This is helpful when this function is used inside node attribute
+        setters. For example, :py:class:`~.ExternalLink`\ 's
+        :py:attr:`~.ExternalLink.url` setter sets *context* to
+        :py:mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself
+        from becoming an :py:class:`~.ExternalLink`.
+
         If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
         parsed, but instead will be treated as plain text.
         """
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index c6fd627..fd54ad0 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -45,14 +45,7 @@ def parse_anything(value, context=0, skip_style_tags=False):
     :py:class:`~.Template`, such as :py:meth:`wikicode.insert()
     <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.
 
-    If given, *context* will be passed as a starting context to the parser.
-    This is helpful when this function is used inside node attribute setters.
-    For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url`
-    setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to
-    prevent the URL itself from becoming an :py:class:`~.ExternalLink`.
-
-    If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
-    parsed, but instead will be treated as plain text.
+    Additional arguments are passed directly to :py:meth:`.Parser.parse`.
     """
     from .parser import Parser
     from .wikicode import Wikicode

From c95802f9cc124cdd8e5b87a733a673dcaf20c2da Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 25 May 2014 22:27:31 -0400
Subject: [PATCH 009/102] Allow recursing through everything except the forced
 type (fixes #70)

---
 CHANGELOG                    |  4 ++++
 docs/changelog.rst           |  5 ++++
 mwparserfromhell/wikicode.py | 56 +++++++++++++++++++++++++++-----------------
 tests/test_wikicode.py       | 21 ++++++++++++-----
 4 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 289c413..7da4968 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,10 @@ v0.4 (unreleased):
 - Added a script to do releases in scripts/release.sh.
 - skip_style_tags can now be passed to mwparserfromhell.parse() (previously,
   only Parser().parse() allowed it).
+- The 'recursive' argument to Wikicode's filter methods now accepts a third
+  option, RECURSE_OTHERS, which recurses over all children except instances of
+  'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)`
+  returns all un-nested templates).
 - Fixed a parser bug involving nested tags.
 - Updated and fixed some documentation.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 21f0629..8416204 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -11,6 +11,11 @@ Unreleased
 - Added a script to do releases in :file:`scripts/release.sh`.
 - *skip_style_tags* can now be passed to :py:func:`mwparserfromhell.parse()
   <.parse_anything>` (previously, only :py:meth:`.Parser.parse` allowed it).
+- The *recursive* argument to :py:class:`Wikicode's <.Wikicode>`
+  :py:meth:`.filter` methods now accepts a third option, ``RECURSE_OTHERS``,
+  which recurses over all children except instances of *forcetype* (for
+  example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
+  templates).
 - Fixed a parser bug involving nested tags.
 - Updated and fixed some documentation.
 
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index f728248..d7736ff 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -44,6 +44,7 @@ class Wikicode(StringMixIn):
     <ifilter>` series of functions is very useful for extracting and iterating
     over, for example, all of the templates in the object.
     """
+    RECURSE_OTHERS = 2
 
     def __init__(self, nodes):
         super(Wikicode, self).__init__()
@@ -53,12 +54,15 @@ class Wikicode(StringMixIn):
         return "".join([str(node) for node in self.nodes])
 
     @staticmethod
-    def _get_children(node, contexts=False, parent=None):
+    def _get_children(node, contexts=False, restrict=None, parent=None):
         """Iterate over all child :py:class:`.Node`\ s of a given *node*."""
         yield (parent, node) if contexts else node
+        if restrict and isinstance(node, restrict):
+            return
         for code in node.__children__():
             for child in code.nodes:
-                for result in Wikicode._get_children(child, contexts, code):
+                sub = Wikicode._get_children(child, contexts, restrict, code)
+                for result in sub:
                     yield result
 
     @staticmethod
@@ -79,7 +83,7 @@ class Wikicode(StringMixIn):
         if matches:
             if callable(matches):
                 return matches
-            return lambda obj: re.search(matches, str(obj), flags)  # r
+            return lambda obj: re.search(matches, str(obj), flags)
         return lambda obj: True
 
     def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS,
@@ -93,8 +97,9 @@ class Wikicode(StringMixIn):
         """
         match = self._build_matcher(matches, flags)
         if recursive:
+            restrict = forcetype if recursive == self.RECURSE_OTHERS else None
             def getter(i, node):
-                for ch in self._get_children(node):
+                for ch in self._get_children(node, restrict=restrict):
                     yield (i, ch)
             inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
         else:
@@ -222,10 +227,10 @@ class Wikicode(StringMixIn):
         This is equivalent to :py:meth:`{1}` with *forcetype* set to
         :py:class:`~{2.__module__}.{2.__name__}`.
         """
-        make_ifilter = lambda ftype: (lambda self, **kw:
-                                      self.ifilter(forcetype=ftype, **kw))
-        make_filter = lambda ftype: (lambda self, **kw:
-                                     self.filter(forcetype=ftype, **kw))
+        make_ifilter = lambda ftype: (lambda self, *a, **kw:
+                                      self.ifilter(forcetype=ftype, *a, **kw))
+        make_filter = lambda ftype: (lambda self, *a, **kw:
+                                     self.filter(forcetype=ftype, *a, **kw))
         for name, ftype in (meths.items() if py3k else meths.iteritems()):
             ifilter = make_ifilter(ftype)
             filter = make_filter(ftype)
@@ -435,27 +440,36 @@ class Wikicode(StringMixIn):
                 forcetype=None):
         """Iterate over nodes in our list matching certain conditions.
 
-        If *recursive* is ``True``, we will iterate over our children and all
-        of their descendants, otherwise just our immediate children. If
-        *forcetype* is given, only nodes that are instances of this type are
-        yielded. *matches* can be used to further restrict the nodes, either as
-        a function (taking a single :py:class:`.Node` and returning a boolean)
-        or a regular expression (matched against the node's string
-        representation with :py:func:`re.search`). If *matches* is a regex, the
-        flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`,
+        If *forcetype* is given, only nodes that are instances of this type (or
+        tuple of types) are yielded. Setting *recursive* to ``True`` will
+        iterate over all children and their descendants. ``RECURSE_OTHERS``
+        will only iterate over children that are not the instances of
+        *forcetype*. ``False`` will only iterate over immediate children.
+
+        ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates,
+        even if they are inside of HTML tags, like so:
+
+            >>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
+            >>> code.filter_templates(code.RECURSE_OTHERS)
+            ["{{foo}}", "{{foo|{{bar}}}}"]
+
+        *matches* can be used to further restrict the nodes, either as a
+        function (taking a single :py:class:`.Node` and returning a boolean) or
+        a regular expression (matched against the node's string representation
+        with :py:func:`re.search`). If *matches* is a regex, the flags passed
+        to :py:func:`re.search` are :py:const:`re.IGNORECASE`,
         :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can
         be specified by passing *flags*.
         """
-        return (node for i, node in
-                self._indexed_ifilter(recursive, matches, flags, forcetype))
+        gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
+        return (node for i, node in gen)
 
-    def filter(self, recursive=True, matches=None, flags=FLAGS,
-               forcetype=None):
+    def filter(self, *args, **kwargs):
         """Return a list of nodes within our list matching certain conditions.
 
         This is equivalent to calling :py:func:`list` on :py:meth:`ifilter`.
         """
-        return list(self.ifilter(recursive, matches, flags, forcetype))
+        return list(self.ifilter(*args, **kwargs))
 
     def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False,
                      include_lead=None, include_headings=True):
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 9ff5949..a7c3eb3 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -319,11 +319,14 @@ class TestWikicode(TreeEqualityTestCase):
             self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z"))
             self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}"))
 
-        self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"],
-                         code2.filter_templates(recursive=False))
-        self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
-                          "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
-                         code2.filter_templates(recursive=True))
+        exp_rec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
+                        "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"]
+        exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"]
+        self.assertEqual(exp_rec, code2.filter_templates())
+        self.assertEqual(exp_unrec, code2.filter_templates(recursive=False))
+        self.assertEqual(exp_rec, code2.filter_templates(recursive=True))
+        self.assertEqual(exp_rec, code2.filter_templates(True))
+        self.assertEqual(exp_unrec, code2.filter_templates(False))
 
         self.assertEqual(["{{foobar}}"], code3.filter_templates(
             matches=lambda node: node.name.matches("Foobar")))
@@ -332,9 +335,15 @@ class TestWikicode(TreeEqualityTestCase):
         self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z"))
         self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0))
 
-        self.assertRaises(TypeError, code.filter_templates, 100)
         self.assertRaises(TypeError, code.filter_templates, a=42)
         self.assertRaises(TypeError, code.filter_templates, forcetype=Template)
+        self.assertRaises(TypeError, code.filter_templates, 1, 0, 0, Template)
+
+        code4 = parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
+        actual1 = code4.filter_templates(recursive=code4.RECURSE_OTHERS)
+        actual2 = code4.filter_templates(code4.RECURSE_OTHERS)
+        self.assertEqual(["{{foo}}", "{{foo|{{bar}}}}"], actual1)
+        self.assertEqual(["{{foo}}", "{{foo|{{bar}}}}"], actual2)
 
     def test_get_sections(self):
         """test Wikicode.get_sections()"""

From 51df09ccf0b0b6c0c0cb6d47e64f3937437f8bc5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 31 May 2014 21:02:16 -0400
Subject: [PATCH 010/102] Really minor documentation fixes.

---
 README.rst           | 6 ++++--
 docs/integration.rst | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 5b4cfe1..1b25c42 100644
--- a/README.rst
+++ b/README.rst
@@ -123,19 +123,21 @@ If you're using Pywikipedia_, your code might look like this::
 
     import mwparserfromhell
     import wikipedia as pywikibot
+
     def parse(title):
         site = pywikibot.getSite()
         page = pywikibot.Page(site, title)
         text = page.get()
         return mwparserfromhell.parse(text)
 
-If you're not using a library, you can parse templates in any page using the
-following code (via the API_)::
+If you're not using a library, you can parse any page using the following code
+(via the API_)::
 
     import json
     import urllib
     import mwparserfromhell
     API_URL = "http://en.wikipedia.org/w/api.php"
+
     def parse(title):
         data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                 "rvprop": "content", "format": "json", "titles": title}
diff --git a/docs/integration.rst b/docs/integration.rst
index 78810b8..a09334d 100644
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -11,19 +11,21 @@ If you're using Pywikipedia_, your code might look like this::
 
     import mwparserfromhell
     import wikipedia as pywikibot
+
     def parse(title):
         site = pywikibot.getSite()
         page = pywikibot.Page(site, title)
         text = page.get()
         return mwparserfromhell.parse(text)
 
-If you're not using a library, you can parse templates in any page using the
-following code (via the API_)::
+If you're not using a library, you can parse any page using the following code
+(via the API_)::
 
     import json
     import urllib
     import mwparserfromhell
     API_URL = "http://en.wikipedia.org/w/api.php"
+
     def parse(title):
         raw = urllib.urlopen(API_URL, data).read()
         res = json.loads(raw)

From 34a6c7cc4c8ee6bc9c2b5628a2e81ec2f971884c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 15 Jun 2014 22:20:28 -0400
Subject: [PATCH 011/102] Typo fix.

---
 mwparserfromhell/parser/tokenizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 33722fa..aa7499a 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -112,7 +112,7 @@ class Tokenizer(object):
             self._textbuffer = []
 
     def _pop(self, keep_context=False):
-        """Pop the current stack/context/textbuffer, returing the stack.
+        """Pop the current stack/context/textbuffer, returning the stack.
 
         If *keep_context* is ``True``, then we will replace the underlying
         stack's context with the current stack's.

From 02eff0fc490d6f46309a96d24e338f4ee69b8381 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 23 Jun 2014 23:32:47 -0400
Subject: [PATCH 012/102] Fully fix #74. Add another tokenizer test.

---
 mwparserfromhell/compat.py           |  2 --
 mwparserfromhell/parser/tokenizer.c  | 18 +++++++++++++-----
 mwparserfromhell/parser/tokenizer.py | 17 ++++++++++++-----
 tests/tokenizer/integration.mwtest   |  7 +++++++
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 94e0db3..4384ace 100644
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -20,7 +20,6 @@ if py3k:
     range = range
     maxsize = sys.maxsize
     import html.entities as htmlentities
-    zip = zip
 
 else:
     bytes = str
@@ -28,6 +27,5 @@ else:
     range = xrange
     maxsize = sys.maxint
     import htmlentitydefs as htmlentities
-    from itertools import izip as zip
 
 del sys
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d8a505f..41ce5ac 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1896,18 +1896,26 @@ static PyObject* Tokenizer_handle_single_tag_end(Tokenizer* self)
 {
     PyObject *token = 0, *padding, *kwargs;
     Py_ssize_t len, index;
-    int is_instance;
+    int depth = 1, is_instance;
 
     len = PyList_GET_SIZE(self->topstack->stack);
-    for (index = len - 1; index >= 0; index--) {
+    for (index = 2; index < len; index++) {
         token = PyList_GET_ITEM(self->topstack->stack, index);
-        is_instance = PyObject_IsInstance(token, TagCloseOpen);
+        is_instance = PyObject_IsInstance(token, TagOpenOpen);
         if (is_instance == -1)
             return NULL;
         else if (is_instance == 1)
-            break;
+            depth++;
+        is_instance = PyObject_IsInstance(token, TagCloseOpen);
+        if (is_instance == -1)
+            return NULL;
+        else if (is_instance == 1) {
+            depth--;
+            if (depth == 0)
+                break;
+        }
     }
-    if (!token)
+    if (!token || depth > 0)
         return NULL;
     padding = PyObject_GetAttrString(token, "padding");
     if (!padding)
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index aa7499a..e69a823 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -25,7 +25,7 @@ from math import log
 import re
 
 from . import contexts, tokens
-from ..compat import htmlentities, range, zip
+from ..compat import htmlentities, range
 from ..definitions import (get_html_tag, is_parsable, is_single,
                            is_single_only, is_scheme)
 
@@ -752,11 +752,18 @@ class Tokenizer(object):
     def _handle_single_tag_end(self):
         """Handle the stream end when inside a single-supporting HTML tag."""
         stack = self._stack
-        gen = zip(range(len(stack) - 1, -1, -1), reversed(stack))
-        index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen))
+        # We need to find the index of the TagCloseOpen token corresponding to
+        # the TagOpenOpen token located at index 0:
+        depth = 1
+        for index, token in enumerate(stack[2:], 2):
+            if isinstance(token, tokens.TagOpenOpen):
+                depth += 1
+            elif isinstance(token, tokens.TagCloseOpen):
+                depth -= 1
+                if depth == 0:
+                    break
         padding = stack[index].padding
-        token = tokens.TagCloseSelfclose(padding=padding, implicit=True)
-        stack[index] = token
+        stack[index] = tokens.TagCloseSelfclose(padding=padding, implicit=True)
         return self._pop()
 
     def _really_parse_tag(self):
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index bf19f4d..5e1a409 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -178,3 +178,10 @@ name:   external_link_inside_wikilink_title
 label:  an external link inside a wikilink title, which is invalid
 input:  "[[File:Example.png http://example.com]]"
 output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()]
+
+---
+
+name:   italics_inside_external_link_inside_incomplete_list
+label:  italic text inside an external link inside an incomplete list
+input:  "<li>[http://www.example.com ''example'']"
+output: [TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding="", implicit=True), ExternalLinkOpen(brackets=True), Text(text="http://www.example.com"), ExternalLinkSeparator(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="example"), TagOpenClose(), Text(text="i"), TagCloseClose(), ExternalLinkClose()]

From 9412579d862451e2b8d14f0010f16df7ecce61f5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 27 Jun 2014 15:17:48 -0400
Subject: [PATCH 013/102] Remove unnecessary unicode_literals.

---
 mwparserfromhell/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index e7459e3..9c29fd2 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -26,8 +26,6 @@ Parser from Hell) is a Python package that provides an easy-to-use and
 outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 """
 
-from __future__ import unicode_literals
-
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic"
 __license__ = "MIT License"

From 3dd29097e4d436f4fa7a01e2c4213c528168b242 Mon Sep 17 00:00:00 2001
From: Merlijn van Deen <valhallasw@arctus.nl>
Date: Fri, 27 Jun 2014 15:21:16 -0700
Subject: [PATCH 014/102] _test_tokenizer: force utf-8 file encoding

---
 tests/_test_tokenizer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 7487241..313b959 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -25,6 +25,8 @@ from os import listdir, path
 import sys
 
 from mwparserfromhell.compat import py3k
+if not py3k:
+	from codecs import open
 from mwparserfromhell.parser import tokens
 
 class _TestParseError(Exception):
@@ -109,10 +111,8 @@ class TokenizerTestCase(object):
     def build(cls):
         """Load and install all tests from the 'tokenizer' directory."""
         def load_file(filename):
-            with open(filename, "rU") as fp:
+            with open(filename, "rU", encoding='utf8') as fp:
                 text = fp.read()
-                if not py3k:
-                    text = text.decode("utf8")
                 name = path.split(filename)[1][:0-len(extension)]
                 cls._load_tests(filename, name, text)
 

From b135e8e473837909c6847f8a52711527409b5224 Mon Sep 17 00:00:00 2001
From: Merlijn van Deen <valhallasw@arctus.nl>
Date: Fri, 27 Jun 2014 15:21:37 -0700
Subject: [PATCH 015/102] Add windows build tools

---
 tools/build_mwpfh.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 tools/build_mwpfh.py

diff --git a/tools/build_mwpfh.py b/tools/build_mwpfh.py
new file mode 100644
index 0000000..a090b4d
--- /dev/null
+++ b/tools/build_mwpfh.py
@@ -0,0 +1,43 @@
+from __future__ import print_function
+
+import subprocess
+import sys
+import os
+
+path = os.path.split(__file__)[0]
+if path:
+	os.chdir(path)
+
+environments = ['26', '27', '32', '33', '34']
+
+target = "pypi" if "--push" in sys.argv else "test"
+
+returnvalues = {}
+
+def run(pyver, cmds, target=None):
+	cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds
+	if target:
+		cmd += ["-r", target]
+
+	print(" ".join(cmd), end=" ")
+	retval = subprocess.call(cmd, stdout=open("%s%s.log" % (cmds[0], pyver), 'w'), stderr=subprocess.STDOUT, cwd="..")
+	if not retval:
+		print("[OK]")
+	else:
+		print("[FAILED (%i)]" % retval)
+	return retval
+
+run("27", ["register"], target)
+
+if 'failed' in open('register27.log').read():
+	raise Exception
+
+for pyver in environments:
+	print()
+	try:
+		os.unlink('mwparserfromhell/parser/_tokenizer.pyd')
+	except WindowsError:
+		pass
+	
+	if run(pyver, ["test"]) == 0:
+		run(pyver, ["bdist_wheel", "upload"], target)
\ No newline at end of file

From 05d048762f2e0c81a9e425425269dcdde4bec251 Mon Sep 17 00:00:00 2001
From: Merlijn van Deen <valhallasw@arctus.nl>
Date: Fri, 27 Jun 2014 15:21:45 -0700
Subject: [PATCH 016/102] Improve .gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4068716..8790182 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.pyc
+*.pyd
 *.so
 *.dll
 *.egg
@@ -8,3 +9,4 @@ __pycache__
 build
 dist
 docs/_build
+tools/*.log

From 581ca9a2213d6329a45d3b927873febe9e5ad479 Mon Sep 17 00:00:00 2001
From: Merlijn van Deen <valhallasw@gmail.com>
Date: Sat, 28 Jun 2014 00:35:50 +0200
Subject: [PATCH 017/102] Update README.rst for the new Windows wheels

---
 README.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 5b4cfe1..d0f67c9 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,10 @@ Installation
 
 The easiest way to install the parser is through the `Python Package Index`_,
 so you can install the latest release with ``pip install mwparserfromhell``
-(`get pip`_). Alternatively, get the latest development version::
+(`get pip`_). On Windows, make sure you have the latest version of pip
+installed by running `pip install --upgrade pip`.
+
+Alternatively, get the latest development version::
 
     git clone https://github.com/earwig/mwparserfromhell.git
     cd mwparserfromhell

From 5e9930b8a060d2ad80713809e312eae913ce7a4f Mon Sep 17 00:00:00 2001
From: Merlijn van Deen <valhallasw@gmail.com>
Date: Sat, 28 Jun 2014 00:37:31 +0200
Subject: [PATCH 018/102] Fix tabs in update_mwpfh.py

---
 tools/build_mwpfh.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tools/build_mwpfh.py b/tools/build_mwpfh.py
index a090b4d..4a86241 100644
--- a/tools/build_mwpfh.py
+++ b/tools/build_mwpfh.py
@@ -6,7 +6,7 @@ import os
 
 path = os.path.split(__file__)[0]
 if path:
-	os.chdir(path)
+    os.chdir(path)
 
 environments = ['26', '27', '32', '33', '34']
 
@@ -15,29 +15,29 @@ target = "pypi" if "--push" in sys.argv else "test"
 returnvalues = {}
 
 def run(pyver, cmds, target=None):
-	cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds
-	if target:
-		cmd += ["-r", target]
-
-	print(" ".join(cmd), end=" ")
-	retval = subprocess.call(cmd, stdout=open("%s%s.log" % (cmds[0], pyver), 'w'), stderr=subprocess.STDOUT, cwd="..")
-	if not retval:
-		print("[OK]")
-	else:
-		print("[FAILED (%i)]" % retval)
-	return retval
+cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds
+if target:
+    cmd += ["-r", target]
+
+print(" ".join(cmd), end=" ")
+retval = subprocess.call(cmd, stdout=open("%s%s.log" % (cmds[0], pyver), 'w'), stderr=subprocess.STDOUT, cwd="..")
+if not retval:
+    print("[OK]")
+else:
+    print("[FAILED (%i)]" % retval)
+return retval
 
 run("27", ["register"], target)
 
 if 'failed' in open('register27.log').read():
-	raise Exception
+    raise Exception
 
 for pyver in environments:
-	print()
-	try:
-		os.unlink('mwparserfromhell/parser/_tokenizer.pyd')
-	except WindowsError:
-		pass
-	
-	if run(pyver, ["test"]) == 0:
-		run(pyver, ["bdist_wheel", "upload"], target)
\ No newline at end of file
+    print()
+    try:
+        os.unlink('mwparserfromhell/parser/_tokenizer.pyd')
+    except WindowsError:
+        pass
+    
+    if run(pyver, ["test"]) == 0:
+        run(pyver, ["bdist_wheel", "upload"], target)

From fb16781659080a7a38888e4579430192e66347cb Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 27 Jun 2014 18:49:44 -0400
Subject: [PATCH 019/102] 3.4 should work on Travis now.

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 31090f2..5fe3760 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,5 +4,6 @@ python:
     - "2.7"
     - "3.2"
     - "3.3"
+    - "3.4"
 install:  python setup.py build
 script:   python setup.py test -q

From efcd59e0972e2957d70c23ac3d071d82bfa4d88d Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 27 Jun 2014 18:51:22 -0400
Subject: [PATCH 020/102] Put -q earlier so Travis generates fewer messages.

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 5fe3760..de041fa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,4 +6,4 @@ python:
     - "3.3"
     - "3.4"
 install:  python setup.py build
-script:   python setup.py test -q
+script:   python setup.py -q test

From 9b207dc7e232f4599f4d30a66fea2e4510d0f825 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 27 Jun 2014 18:52:24 -0400
Subject: [PATCH 021/102] Use the newer SVG.

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 1b25c42..755bcbd 100644
--- a/README.rst
+++ b/README.rst
@@ -1,7 +1,7 @@
 mwparserfromhell
 ================
 
-.. image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop
+.. image:: https://api.travis-ci.org/earwig/mwparserfromhell.svg?branch=develop
   :alt: Build Status
   :target: http://travis-ci.org/earwig/mwparserfromhell
 

From d8adb62454f464f39b59c179f52ddb17621a2e18 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 27 Jun 2014 19:56:18 -0400
Subject: [PATCH 022/102] Some tweaks, but no change in functionality.

---
 .gitignore               |  2 +-
 README.rst               | 11 +++--------
 scripts/win_build.py     | 36 ++++++++++++++++++++++++++++++++++++
 setup.py                 |  4 ++--
 tests/_test_tokenizer.py |  5 ++---
 tools/build_mwpfh.py     | 43 -------------------------------------------
 6 files changed, 44 insertions(+), 57 deletions(-)
 create mode 100644 scripts/win_build.py
 delete mode 100644 tools/build_mwpfh.py

diff --git a/.gitignore b/.gitignore
index 8790182..f7f7bd9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,4 @@ __pycache__
 build
 dist
 docs/_build
-tools/*.log
+scripts/*.log
diff --git a/README.rst b/README.rst
index d0f67c9..c112afd 100644
--- a/README.rst
+++ b/README.rst
@@ -15,10 +15,10 @@ Full documentation is available on ReadTheDocs_. Development occurs on GitHub_.
 Installation
 ------------
 
-The easiest way to install the parser is through the `Python Package Index`_,
-so you can install the latest release with ``pip install mwparserfromhell``
+The easiest way to install the parser is through the `Python Package Index`_;
+you can install the latest release with ``pip install mwparserfromhell``
 (`get pip`_). On Windows, make sure you have the latest version of pip
-installed by running `pip install --upgrade pip`.
+installed by running ``pip install --upgrade pip``.
 
 Alternatively, get the latest development version::
 
@@ -26,11 +26,6 @@ Alternatively, get the latest development version::
     cd mwparserfromhell
     python setup.py install
 
-If you get ``error: Unable to find vcvarsall.bat`` while installing, this is
-because Windows can't find the compiler for C extensions. Consult this
-`StackOverflow question`_ for help. You can also set ``ext_modules`` in
-``setup.py`` to an empty list to prevent the extension from building.
-
 You can run the comprehensive unit testing suite with
 ``python setup.py test -q``.
 
diff --git a/scripts/win_build.py b/scripts/win_build.py
new file mode 100644
index 0000000..c70dedc
--- /dev/null
+++ b/scripts/win_build.py
@@ -0,0 +1,36 @@
+from __future__ import print_function
+import os
+from subprocess import call, STDOUT
+
+ENVIRONMENTS = ["26", "27", "32", "33", "34"]
+
+def run(pyver, cmds):
+    cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds
+    print(" ".join(cmd), end=" ")
+
+    with open("%s%s.log" % (cmds[0], pyver), "w") as logfile:
+        retval = call(cmd, stdout=logfile, stderr=STDOUT, cwd="..")
+    if not retval:
+        print("[OK]")
+    else:
+        print("[FAILED (%i)]" % retval)
+    return retval
+
+def main():
+    path = os.path.split(__file__)[0]
+    if path:
+        os.chdir(path)
+
+    print("Building Windows wheels for Python %s:" % ", ".join(ENVIRONMENTS))
+    for pyver in ENVIRONMENTS:
+        print()
+        try:
+            os.unlink("mwparserfromhell/parser/_tokenizer.pyd")
+        except OSError:
+            pass
+
+        if run(pyver, ["test"]) == 0:
+            run(pyver, ["bdist_wheel", "upload"])
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
index 5a45902..07fb330 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ import sys
 
 if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \
    (sys.version_info[1] == 3 and sys.version_info[1] < 2):
-    raise Exception('mwparserfromhell needs Python 2.6+ or 3.2+')
+    raise Exception("mwparserfromhell needs Python 2.6+ or 3.2+")
 
 from setuptools import setup, find_packages, Extension
 
@@ -36,7 +36,7 @@ with open("README.rst") as fp:
     long_docs = fp.read()
 
 tokenizer = Extension("mwparserfromhell.parser._tokenizer",
-                      sources = ["mwparserfromhell/parser/tokenizer.c"])
+                      sources=["mwparserfromhell/parser/tokenizer.c"])
 
 setup(
     name = "mwparserfromhell",
diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 313b959..bfd4857 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -21,12 +21,11 @@
 # SOFTWARE.
 
 from __future__ import print_function, unicode_literals
+import codecs
 from os import listdir, path
 import sys
 
 from mwparserfromhell.compat import py3k
-if not py3k:
-	from codecs import open
 from mwparserfromhell.parser import tokens
 
 class _TestParseError(Exception):
@@ -111,7 +110,7 @@ class TokenizerTestCase(object):
     def build(cls):
         """Load and install all tests from the 'tokenizer' directory."""
         def load_file(filename):
-            with open(filename, "rU", encoding='utf8') as fp:
+            with codecs.open(filename, "rU", encoding="utf8") as fp:
                 text = fp.read()
                 name = path.split(filename)[1][:0-len(extension)]
                 cls._load_tests(filename, name, text)
diff --git a/tools/build_mwpfh.py b/tools/build_mwpfh.py
deleted file mode 100644
index 4a86241..0000000
--- a/tools/build_mwpfh.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import print_function
-
-import subprocess
-import sys
-import os
-
-path = os.path.split(__file__)[0]
-if path:
-    os.chdir(path)
-
-environments = ['26', '27', '32', '33', '34']
-
-target = "pypi" if "--push" in sys.argv else "test"
-
-returnvalues = {}
-
-def run(pyver, cmds, target=None):
-cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds
-if target:
-    cmd += ["-r", target]
-
-print(" ".join(cmd), end=" ")
-retval = subprocess.call(cmd, stdout=open("%s%s.log" % (cmds[0], pyver), 'w'), stderr=subprocess.STDOUT, cwd="..")
-if not retval:
-    print("[OK]")
-else:
-    print("[FAILED (%i)]" % retval)
-return retval
-
-run("27", ["register"], target)
-
-if 'failed' in open('register27.log').read():
-    raise Exception
-
-for pyver in environments:
-    print()
-    try:
-        os.unlink('mwparserfromhell/parser/_tokenizer.pyd')
-    except WindowsError:
-        pass
-    
-    if run(pyver, ["test"]) == 0:
-        run(pyver, ["bdist_wheel", "upload"], target)

From 38e423b1407c97aec3b1495902a6ab9ef517e17b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 27 Jun 2014 20:40:36 -0400
Subject: [PATCH 023/102] Update release script.

---
 scripts/release.sh   | 14 ++++++++++++--
 scripts/win_build.py |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/scripts/release.sh b/scripts/release.sh
index c10871d..4becf1a 100755
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -66,13 +66,21 @@ do_git_stuff() {
     echo " done."
 }
 
-build_sdist() {
-    echo -n "Uploading to PyPI..."
+upload_to_pypi() {
+    # TODO: check whether these commands give output
+    echo -n "PyPI: uploading source tarball and docs..."
     python setup.py register sdist upload -s
     python setup.py upload_docs
     echo " done."
 }
 
+windows_build() {
+    echo "PyPI: building/uploading Windows binaries..."
+    echo "*** Run in Windows: ./scripts/win_build.py"
+    echo "*** Press enter when done."
+    read
+}
+
 post_release() {
     echo
     echo "*** Release completed."
@@ -148,6 +156,8 @@ update_version
 update_changelog
 update_docs_changelog
 do_git_stuff
+upload_to_pypi
+windows_build
 post_release
 test_release
 
diff --git a/scripts/win_build.py b/scripts/win_build.py
index c70dedc..143f060 100644
--- a/scripts/win_build.py
+++ b/scripts/win_build.py
@@ -30,7 +30,7 @@ def main():
             pass
 
         if run(pyver, ["test"]) == 0:
-            run(pyver, ["bdist_wheel", "upload"])
+            run(pyver, ["bdist_wheel", "upload"])  # TODO: add "-s" to GPG sign
 
 if __name__ == "__main__":
     main()

From ad03f60140e011dc1f47d3813693e6f4cea604d8 Mon Sep 17 00:00:00 2001
From: Merlijn van Deen <valhallasw@gmail.com>
Date: Sat, 28 Jun 2014 11:11:11 +0200
Subject: [PATCH 024/102] Add build requirements

---
 scripts/win_build.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/scripts/win_build.py b/scripts/win_build.py
index 143f060..2d51909 100644
--- a/scripts/win_build.py
+++ b/scripts/win_build.py
@@ -1,3 +1,25 @@
+# Build requirements:
+#
+# Python 2.6-3.2: Visual C++ Express Edition 2008:
+#                 http://go.microsoft.com/?linkid=7729279
+#
+# Python 3.3+: Visual C++ Express Edition 2010:
+#              http://go.microsoft.com/?linkid=9709949
+#
+# x64 builds: Microsoft Windows SDK for Windows 7 and .NET Framework 3.5 SP1:
+#             http://www.microsoft.com/en-us/download/details.aspx?id=3138
+#
+# Python interpreter, 2.6, 2.7, 3.2-3.4:
+# https://www.python.org/downloads/
+#
+# Pip, setuptools, wheel:
+# https://bootstrap.pypa.io/get-pip.py
+# and run *for each* Python version:
+# c:\pythonXX\python get-pip.py
+# c:\pythonXX\scripts\pip install wheel
+#
+# Afterwards, run this script with any of the python interpreters (2.7 suggested)
+
 from __future__ import print_function
 import os
 from subprocess import call, STDOUT

From 25e7e7da700d926fdbfc55e4e44dafa29f330dc6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 3 Jul 2014 16:59:49 -0400
Subject: [PATCH 025/102] Test coveralls.

---
 .travis.yml | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index de041fa..c8dbb88 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,9 +1,14 @@
 language: python
 python:
-    - "2.6"
-    - "2.7"
-    - "3.2"
-    - "3.3"
-    - "3.4"
-install:  python setup.py build
-script:   python setup.py -q test
+    - 2.6
+    - 2.7
+    - 3.2
+    - 3.3
+    - 3.4
+install:
+    - pip install coveralls
+    - python setup.py build
+script:
+    - coverage run --source=mwparserfromhell setup.py -q test
+after_success:
+    - coveralls

From 50515f3e7729b23c0f27fcb13b2d50244b93f4bd Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 3 Jul 2014 17:21:00 -0400
Subject: [PATCH 026/102] README badge thing.

---
 README.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 572afba..b6bf7e7 100644
--- a/README.rst
+++ b/README.rst
@@ -1,10 +1,14 @@
 mwparserfromhell
 ================
 
-.. image:: https://api.travis-ci.org/earwig/mwparserfromhell.svg?branch=develop
+.. image:: https://img.shields.io/travis/earwig/mwparserfromhell/develop.svg
   :alt: Build Status
   :target: http://travis-ci.org/earwig/mwparserfromhell
 
+.. image:: https://img.shields.io/coveralls/earwig/mwparserfromhell/develop.svg
+  :alt: Coverage Status
+  :target: https://coveralls.io/r/earwig/mwparserfromhell
+
 **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package
 that provides an easy-to-use and outrageously powerful parser for MediaWiki_
 wikicode. It supports Python 2 and Python 3.

From 08cafc05766afe98c612aca21caa1882e5c2e5c7 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 4 Jul 2014 16:57:00 -0400
Subject: [PATCH 027/102] Raise ParserError for internal problems. Improve
 coverage. Cleanup.

---
 .coveragerc                          |  4 +++
 .gitignore                           |  2 ++
 CHANGELOG                            |  3 ++
 docs/changelog.rst                   |  3 ++
 mwparserfromhell/nodes/__init__.py   |  4 +--
 mwparserfromhell/parser/__init__.py  | 31 ++++++++++++++++++---
 mwparserfromhell/parser/builder.py   | 23 +++++++---------
 mwparserfromhell/parser/tokenizer.c  | 53 ++++++++++++++++++++++++++++++------
 mwparserfromhell/parser/tokenizer.h  |  3 ++
 mwparserfromhell/parser/tokenizer.py |  7 +++--
 mwparserfromhell/parser/tokens.py    |  2 +-
 mwparserfromhell/utils.py            |  2 +-
 tests/test_builder.py                |  8 +++++-
 13 files changed, 113 insertions(+), 32 deletions(-)
 create mode 100644 .coveragerc

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..0a92f19
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,4 @@
+[report]
+exclude_lines =
+	pragma: no cover
+	raise NotImplementedError()
diff --git a/.gitignore b/.gitignore
index f7f7bd9..3da2db3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,9 +4,11 @@
 *.dll
 *.egg
 *.egg-info
+.coverage
 .DS_Store
 __pycache__
 build
 dist
 docs/_build
 scripts/*.log
+htmlcov/
diff --git a/CHANGELOG b/CHANGELOG
index 4f4f77b..d733cee 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,9 @@ v0.4 (unreleased):
   option, RECURSE_OTHERS, which recurses over all children except instances of
   'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)`
   returns all un-nested templates).
+- If something goes wrong while parsing, ParserError will now be raised.
+  Previously, the parser would produce an unclear BadRoute exception or allow
+  an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags.
 - Updated and fixed some documentation.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 0576d29..a530733 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -18,6 +18,9 @@ Unreleased
   which recurses over all children except instances of *forcetype* (for
   example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
   templates).
+- If something goes wrong while parsing, :py:exc:`.ParserError` will now be
+  raised. Previously, the parser would produce an unclear :py:exc:`.BadRoute`
+  exception or allow an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags.
 - Updated and fixed some documentation.
 
diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py
index 223cc67..d6f60bd 100644
--- a/mwparserfromhell/nodes/__init__.py
+++ b/mwparserfromhell/nodes/__init__.py
@@ -55,8 +55,8 @@ class Node(StringMixIn):
         raise NotImplementedError()
 
     def __children__(self):
-        return  # Funny generator-that-yields-nothing syntax
-        yield
+        return
+        yield  # pragma: no cover (this is a generator that yields nothing)
 
     def __strip__(self, normalize, collapse):
         return None
diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 8bac295..467d5df 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -26,6 +26,19 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module
 joins them together under one interface.
 """
 
+class ParserError(Exception):
+    """Exception raised when an internal error occurs while parsing.
+
+    This does not mean that the wikicode was invalid, because invalid markup
+    should still be parsed correctly. This means that the parser caught itself
+    with an impossible internal state and is bailing out before other problems
+    can happen. Its appearance indicates a bug.
+    """
+    def __init__(self, extra):
+        msg = "This is a bug and should be reported. Info: {0}.".format(extra)
+        super(ParserError, self).__init__(msg)
+
+
 from .builder import Builder
 from .tokenizer import Tokenizer
 try:
@@ -35,15 +48,22 @@ except ImportError:
     CTokenizer = None
     use_c = False
 
-__all__ = ["use_c", "Parser"]
+__all__ = ["use_c", "Parser", "ParserError"]
 
 class Parser(object):
     """Represents a parser for wikicode.
 
     Actual parsing is a two-step process: first, the text is split up into a
-    series of tokens by the :py:class:`~.Tokenizer`, and then the tokens are
-    converted into trees of :py:class:`~.Wikicode` objects and
-    :py:class:`~.Node`\ s by the :py:class:`~.Builder`.
+    series of tokens by the :py:class:`.Tokenizer`, and then the tokens are
+    converted into trees of :py:class:`.Wikicode` objects and
+    :py:class:`.Node`\ s by the :py:class:`.Builder`.
+
+    Instances of this class or its dependents (:py:class:`.Tokenizer` and
+    :py:class:`.Builder`) should not be shared between threads.
+    :py:meth:`parse` can be called multiple times as long as it is not done
+    concurrently. In general, there is no need to do this because parsing
+    should be done through :py:func:`mwparserfromhell.parse`, which creates a
+    new :py:class:`.Parser` object as necessary.
     """
 
     def __init__(self):
@@ -65,6 +85,9 @@ class Parser(object):
 
         If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
         parsed, but instead will be treated as plain text.
+
+        If there is an internal error while parsing, :py:exc:`.ParserError`
+        will be raised.
         """
         tokens = self._tokenizer.tokenize(text, context, skip_style_tags)
         code = self._builder.build(tokens)
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 5f8ce45..559bd54 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -22,7 +22,7 @@
 
 from __future__ import unicode_literals
 
-from . import tokens
+from . import tokens, ParserError
 from ..compat import str
 from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag,
                      Template, Text, Wikilink)
@@ -33,33 +33,28 @@ from ..wikicode import Wikicode
 __all__ = ["Builder"]
 
 class Builder(object):
-    """Combines a sequence of tokens into a tree of ``Wikicode`` objects.
+    """Builds a tree of nodes out of a sequence of tokens.
 
     To use, pass a list of :py:class:`~.Token`\ s to the :py:meth:`build`
     method. The list will be exhausted as it is parsed and a
-    :py:class:`~.Wikicode` object will be returned.
+    :py:class:`.Wikicode` object containing the node tree will be returned.
     """
 
     def __init__(self):
         self._tokens = []
         self._stacks = []
 
-    def _wrap(self, nodes):
-        """Properly wrap a list of nodes in a ``Wikicode`` object."""
-        return Wikicode(SmartList(nodes))
-
     def _push(self):
         """Push a new node list onto the stack."""
         self._stacks.append([])
 
-    def _pop(self, wrap=True):
+    def _pop(self):
         """Pop the current node list off of the stack.
 
-        If *wrap* is ``True``, we will call :py:meth:`_wrap` on the list.
+        The raw node list is wrapped in a :py:class:`.SmartList` and then in a
+        :py:class:`.Wikicode` object.
         """
-        if wrap:
-            return self._wrap(self._stacks.pop())
-        return self._stacks.pop()
+        return Wikicode(SmartList(self._stacks.pop()))
 
     def _write(self, item):
         """Append a node to the current node list."""
@@ -84,7 +79,7 @@ class Builder(object):
                 self._tokens.append(token)
                 value = self._pop()
                 if key is None:
-                    key = self._wrap([Text(str(default))])
+                    key = Wikicode(SmartList([Text(str(default))]))
                 return Parameter(key, value, showkey)
             else:
                 self._write(self._handle_token(token))
@@ -270,6 +265,8 @@ class Builder(object):
             return self._handle_comment()
         elif isinstance(token, tokens.TagOpenOpen):
             return self._handle_tag(token)
+        err = "_handle_token() got unexpected {0}".format(type(token).__name__)
+        raise ParserError(err)
 
     def build(self, tokenlist):
         """Build a Wikicode object from a list tokens and return it."""
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 41ce5ac..6ab8570 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -347,7 +347,7 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self)
 
 /*
     Fail the current tokenization route. Discards the current
-    stack/context/textbuffer and raises a BadRoute exception.
+    stack/context/textbuffer and sets the BAD_ROUTE flag.
 */
 static void* Tokenizer_fail_route(Tokenizer* self)
 {
@@ -2681,7 +2681,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 */
 static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
-    PyObject *text, *temp;
+    PyObject *text, *temp, *tokens;
     int context = 0, skip_style_tags = 0;
 
     if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) {
@@ -2704,13 +2704,29 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
         Py_XDECREF(temp);
         self->text = text;
     }
+
     self->head = self->global = self->depth = self->cycles = 0;
     self->length = PyList_GET_SIZE(self->text);
     self->skip_style_tags = skip_style_tags;
-    return Tokenizer_parse(self, context, 1);
+    tokens = Tokenizer_parse(self, context, 1);
+
+    if (!tokens && !PyErr_Occurred()) {
+        if (!ParserError) {
+            if (load_exceptions())
+                return NULL;
+        }
+        if (BAD_ROUTE) {
+            RESET_ROUTE();
+            PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE");
+        }
+        else
+            PyErr_SetString(ParserError, "C tokenizer exited unexpectedly");
+        return NULL;
+    }
+    return tokens;
 }
 
-static int load_entitydefs(void)
+static int load_entities(void)
 {
     PyObject *tempmod, *defmap, *deflist;
     unsigned numdefs, i;
@@ -2814,7 +2830,7 @@ static int load_tokens(void)
     return 0;
 }
 
-static int load_definitions(void)
+static int load_defs(void)
 {
     PyObject *tempmod,
              *globals = PyEval_GetGlobals(),
@@ -2835,6 +2851,29 @@ static int load_definitions(void)
     return 0;
 }
 
+static int load_exceptions(void)
+{
+    PyObject *tempmod, *parsermod,
+             *globals = PyEval_GetGlobals(),
+             *locals = PyEval_GetLocals(),
+             *fromlist = PyList_New(1),
+             *modname = IMPORT_NAME_FUNC("parser");
+    char *name = "mwparserfromhell";
+
+    if (!fromlist || !modname)
+        return -1;
+    PyList_SET_ITEM(fromlist, 0, modname);
+    tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
+    Py_DECREF(fromlist);
+    if (!tempmod)
+        return -1;
+    parsermod = PyObject_GetAttrString(tempmod, "parser");
+    Py_DECREF(tempmod);
+    ParserError = PyObject_GetAttrString(parsermod, "ParserError");
+    Py_DECREF(parsermod);
+    return 0;
+}
+
 PyMODINIT_FUNC INIT_FUNC_NAME(void)
 {
     PyObject *module;
@@ -2851,9 +2890,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void)
     PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True);
     EMPTY = PyUnicode_FromString("");
     NOARGS = PyTuple_New(0);
-    if (!EMPTY || !NOARGS)
-        INIT_ERROR;
-    if (load_entitydefs() || load_tokens() || load_definitions())
+    if (!EMPTY || !NOARGS || load_entities() || load_tokens() || load_defs())
         INIT_ERROR;
 #ifdef IS_PY3K
     return module;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 032480d..4312e2f 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -62,6 +62,7 @@ static char** entitydefs;
 
 static PyObject* EMPTY;
 static PyObject* NOARGS;
+static PyObject* ParserError;
 static PyObject* definitions;
 
 
@@ -268,6 +269,8 @@ static int Tokenizer_parse_tag(Tokenizer*);
 static PyObject* Tokenizer_parse(Tokenizer*, int, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 
+static int load_exceptions(void);
+
 
 /* Macros for Python 2/3 compatibility: */
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index e69a823..9af9204 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -24,7 +24,7 @@ from __future__ import unicode_literals
 from math import log
 import re
 
-from . import contexts, tokens
+from . import contexts, tokens, ParserError
 from ..compat import htmlentities, range
 from ..definitions import (get_html_tag, is_parsable, is_single,
                            is_single_only, is_scheme)
@@ -1154,4 +1154,7 @@ class Tokenizer(object):
         split = self.regex.split(text)
         self._text = [segment for segment in split if segment]
         self._head = self._global = self._depth = self._cycles = 0
-        return self._parse(context)
+        try:
+            return self._parse(context)
+        except BadRoute:  # pragma: no cover (untestable/exceptional case)
+            raise ParserError("Python tokenizer exited with BadRoute")
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index 40e5158..c7cc3ef 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -34,7 +34,7 @@ from ..compat import py3k, str
 
 __all__ = ["Token"]
 
-class Token (dict):
+class Token(dict):
     """A token stores the semantic meaning of a unit of wikicode."""
 
     def __repr__(self):
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index fd54ad0..8dc5e4e 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -66,7 +66,7 @@ def parse_anything(value, context=0, skip_style_tags=False):
         nodelist = SmartList()
         for item in value:
             nodelist += parse_anything(item, context, skip_style_tags).nodes
+        return Wikicode(nodelist)
     except TypeError:
         error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}"
         raise ValueError(error.format(type(value).__name__, value))
-    return Wikicode(nodelist)
diff --git a/tests/test_builder.py b/tests/test_builder.py
index c8fdca3..ed306f7 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -30,7 +30,7 @@ except ImportError:
 from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading,
                                     HTMLEntity, Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
-from mwparserfromhell.parser import tokens
+from mwparserfromhell.parser import tokens, ParserError
 from mwparserfromhell.parser.builder import Builder
 
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
@@ -420,5 +420,11 @@ class TestBuilder(TreeEqualityTestCase):
             named=True)]))])])
         self.assertWikicodeEqual(valid, self.builder.build(test))
 
+    def test_parser_error(self):
+        """test whether ParserError gets thrown for bad input"""
+        msg = r"_handle_token\(\) got unexpected TemplateClose"
+        self.assertRaisesRegexp(
+            ParserError, msg, self.builder.build, [tokens.TemplateClose()])
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 8bc7ea669da21e9a17e5bc94cbb4329db1220315 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 4 Jul 2014 20:47:08 -0400
Subject: [PATCH 028/102] Improve test coverage; fix some node-related bugs.

* Parameters with non-integer keys can no longer be created with
  showkey=False, nor have the value of this attribute be set to False
  later.
* Calling Template.remove() with a Parameter object that is not part of
  the template now raises ValueError instead of doing nothing.
* Added tests for HTMLEntity._unichr() being called with out-of-range
  codepoints.
* Added tests for Tag.__children__() and Tag.__showtree__() involving
  attributes that have no values.
---
 .coveragerc                                |   4 +
 CHANGELOG                                  |   5 +
 docs/changelog.rst                         |   7 ++
 mwparserfromhell/nodes/extras/parameter.py |  13 ++-
 mwparserfromhell/nodes/html_entity.py      |  22 ++--
 mwparserfromhell/nodes/template.py         |  11 +-
 tests/test_builder.py                      |   5 +-
 tests/test_html_entity.py                  |   5 +
 tests/test_parameter.py                    |   5 +-
 tests/test_tag.py                          |  18 ++--
 tests/test_template.py                     | 159 ++++++++++++++---------------
 11 files changed, 140 insertions(+), 114 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index 0a92f19..909a0e2 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -2,3 +2,7 @@
 exclude_lines =
 	pragma: no cover
 	raise NotImplementedError()
+partial_branches =
+    pragma: no branch
+    if py3k:
+    if not py3k:
diff --git a/CHANGELOG b/CHANGELOG
index d733cee..1200575 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,10 +10,15 @@ v0.4 (unreleased):
   option, RECURSE_OTHERS, which recurses over all children except instances of
   'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)`
   returns all un-nested templates).
+- Calling Template.remove() with a Parameter object that is not part of the
+  template now raises ValueError instead of doing nothing.
+- Parameters with non-integer keys can no longer be created with
+  'showkey=False', nor have the value of this attribute be set to False later.
 - If something goes wrong while parsing, ParserError will now be raised.
   Previously, the parser would produce an unclear BadRoute exception or allow
   an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags.
+- Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
 v0.3.3 (released April 22, 2014):
diff --git a/docs/changelog.rst b/docs/changelog.rst
index a530733..ba26722 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -18,10 +18,17 @@ Unreleased
   which recurses over all children except instances of *forcetype* (for
   example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
   templates).
+- Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that
+  is not part of the template now raises :py:exc:`ValueError` instead of doing
+  nothing.
+- :py:class:`.Parameter`\ s with non-integer keys can no longer be created with
+  *showkey=False*, nor have the value of this attribute be set to *False*
+  later.
 - If something goes wrong while parsing, :py:exc:`.ParserError` will now be
   raised. Previously, the parser would produce an unclear :py:exc:`.BadRoute`
   exception or allow an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags.
+- Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
 v0.3.3
diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py
index e273af9..5a67ae0 100644
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -21,6 +21,7 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+import re
 
 from ...compat import str
 from ...string_mixin import StringMixIn
@@ -39,6 +40,8 @@ class Parameter(StringMixIn):
 
     def __init__(self, name, value, showkey=True):
         super(Parameter, self).__init__()
+        if not showkey and not self.can_hide_key(name):
+            raise ValueError("key {0!r} cannot be hidden".format(name))
         self._name = name
         self._value = value
         self._showkey = showkey
@@ -48,6 +51,11 @@ class Parameter(StringMixIn):
             return str(self.name) + "=" + str(self.value)
         return str(self.value)
 
+    @staticmethod
+    def can_hide_key(key):
+        """Return whether or not the given key can be hidden."""
+        return re.match(r"[1-9][0-9]*$", str(key).strip())
+
     @property
     def name(self):
         """The name of the parameter as a :py:class:`~.Wikicode` object."""
@@ -73,4 +81,7 @@ class Parameter(StringMixIn):
 
     @showkey.setter
     def showkey(self, newval):
-        self._showkey = bool(newval)
+        newval = bool(newval)
+        if not newval and not self.can_hide_key(self.name):
+            raise ValueError("parameter key cannot be hidden")
+        self._showkey = newval
diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index c75cb99..95f1492 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -77,17 +77,17 @@ class HTMLEntity(Node):
                 # Test whether we're on the wide or narrow Python build. Check
                 # the length of a non-BMP code point
                 # (U+1F64A, SPEAK-NO-EVIL MONKEY):
-                if len("\U0001F64A") == 2:
-                    # Ensure this is within the range we can encode:
-                    if value > 0x10FFFF:
-                        raise ValueError("unichr() arg not in range(0x110000)")
-                    code = value - 0x10000
-                    if value < 0:  # Invalid code point
-                        raise
-                    lead = 0xD800 + (code >> 10)
-                    trail = 0xDC00 + (code % (1 << 10))
-                    return unichr(lead) + unichr(trail)
-                raise
+                if len("\U0001F64A") == 1:  # pragma: no cover
+                    raise
+                # Ensure this is within the range we can encode:
+                if value > 0x10FFFF:
+                    raise ValueError("unichr() arg not in range(0x110000)")
+                code = value - 0x10000
+                if value < 0:  # Invalid code point
+                    raise
+                lead = 0xD800 + (code >> 10)
+                trail = 0xDC00 + (code % (1 << 10))
+                return unichr(lead) + unichr(trail)
 
     @property
     def value(self):
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 3b5b35c..c0fda5d 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -155,6 +155,7 @@ class Template(Node):
                 else:
                     self.params.pop(i)
                 return
+        raise ValueError(needle)
 
     @property
     def name(self):
@@ -254,21 +255,19 @@ class Template(Node):
             return existing
 
         if showkey is None:
-            try:
+            if Parameter.can_hide_key(name):
                 int_name = int(str(name))
-            except ValueError:
-                showkey = True
-            else:
                 int_keys = set()
                 for param in self.params:
                     if not param.showkey:
-                        if re.match(r"[1-9][0-9]*$", param.name.strip()):
-                            int_keys.add(int(str(param.name)))
+                        int_keys.add(int(str(param.name)))
                 expected = min(set(range(1, len(int_keys) + 2)) - int_keys)
                 if expected == int_name:
                     showkey = False
                 else:
                     showkey = True
+            else:
+                showkey = True
         if not showkey:
             self._surface_escape(value, "=")
 
diff --git a/tests/test_builder.py b/tests/test_builder.py
index ed306f7..58e3d1e 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -27,6 +27,7 @@ try:
 except ImportError:
     import unittest
 
+from mwparserfromhell.compat import py3k
 from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading,
                                     HTMLEntity, Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
@@ -422,9 +423,9 @@ class TestBuilder(TreeEqualityTestCase):
 
     def test_parser_error(self):
         """test whether ParserError gets thrown for bad input"""
+        func = self.assertRaisesRegex if py3k else self.assertRaisesRegexp
         msg = r"_handle_token\(\) got unexpected TemplateClose"
-        self.assertRaisesRegexp(
-            ParserError, msg, self.builder.build, [tokens.TemplateClose()])
+        func(ParserError, msg, self.builder.build, [tokens.TemplateClose()])
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index eb6f606..3df596a 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -108,6 +108,7 @@ class TestHTMLEntity(TreeEqualityTestCase):
         self.assertRaises(ValueError, setattr, node3, "value", -1)
         self.assertRaises(ValueError, setattr, node1, "value", 110000)
         self.assertRaises(ValueError, setattr, node1, "value", "1114112")
+        self.assertRaises(ValueError, setattr, node1, "value", "12FFFF")
 
     def test_named(self):
         """test getter/setter for the named attribute"""
@@ -163,10 +164,14 @@ class TestHTMLEntity(TreeEqualityTestCase):
         node2 = HTMLEntity("107")
         node3 = HTMLEntity("e9")
         node4 = HTMLEntity("1f648")
+        node5 = HTMLEntity("-2")
+        node6 = HTMLEntity("110000", named=False, hexadecimal=True)
         self.assertEqual("\xa0", node1.normalize())
         self.assertEqual("k", node2.normalize())
         self.assertEqual("é", node3.normalize())
         self.assertEqual("\U0001F648", node4.normalize())
+        self.assertRaises(ValueError, node5.normalize)
+        self.assertRaises(ValueError, node6.normalize)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_parameter.py b/tests/test_parameter.py
index ee52b59..2a4bb75 100644
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -71,9 +71,10 @@ class TestParameter(TreeEqualityTestCase):
         self.assertFalse(node1.showkey)
         self.assertTrue(node2.showkey)
         node1.showkey = True
-        node2.showkey = ""
         self.assertTrue(node1.showkey)
-        self.assertFalse(node2.showkey)
+        node1.showkey = ""
+        self.assertFalse(node1.showkey)
+        self.assertRaises(ValueError, setattr, node2, "showkey", False)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 111511a..0eae713 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -33,6 +33,7 @@ from mwparserfromhell.nodes.extras import Attribute
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
 agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
+agennv = lambda name: Attribute(wraptext(name))
 agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False)
 agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c)
 agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c)
@@ -74,10 +75,10 @@ class TestTag(TreeEqualityTestCase):
         node1 = Tag(wraptext("ref"), wraptext("foobar"))
         # '''bold text'''
         node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''")
-        # <img id="foo" class="bar" />
+        # <img id="foo" class="bar" selected />
         node3 = Tag(wraptext("img"),
-                    attrs=[Attribute(wraptext("id"), wraptext("foo")),
-                           Attribute(wraptext("class"), wraptext("bar"))],
+                    attrs=[agen("id", "foo"), agen("class", "bar"),
+                           agennv("selected")],
                     self_closing=True, padding=" ")
 
         gen1 = node1.__children__()
@@ -89,6 +90,7 @@ class TestTag(TreeEqualityTestCase):
         self.assertEqual(node3.attributes[0].value, next(gen3))
         self.assertEqual(node3.attributes[1].name, next(gen3))
         self.assertEqual(node3.attributes[1].value, next(gen3))
+        self.assertEqual(node3.attributes[2].name, next(gen3))
         self.assertEqual(node1.contents, next(gen1))
         self.assertEqual(node2.contents, next(gen2))
         self.assertEqual(node1.closing_tag, next(gen1))
@@ -113,7 +115,8 @@ class TestTag(TreeEqualityTestCase):
         getter, marker = object(), object()
         get = lambda code: output.append((getter, code))
         mark = lambda: output.append(marker)
-        node1 = Tag(wraptext("ref"), wraptext("text"), [agen("name", "foo")])
+        node1 = Tag(wraptext("ref"), wraptext("text"),
+                    [agen("name", "foo"), agennv("selected")])
         node2 = Tag(wraptext("br"), self_closing=True, padding=" ")
         node3 = Tag(wraptext("br"), self_closing=True, invalid=True,
                     implicit=True, padding=" ")
@@ -122,9 +125,10 @@ class TestTag(TreeEqualityTestCase):
         node3.__showtree__(output.append, get, mark)
         valid = [
             "<", (getter, node1.tag), (getter, node1.attributes[0].name),
-            "    = ", marker, (getter, node1.attributes[0].value), ">",
-            (getter, node1.contents), "</", (getter, node1.closing_tag), ">",
-            "<", (getter, node2.tag), "/>", "</", (getter, node3.tag), ">"]
+            "    = ", marker, (getter, node1.attributes[0].value),
+            (getter, node1.attributes[1].name), ">", (getter, node1.contents),
+            "</", (getter, node1.closing_tag), ">", "<", (getter, node2.tag),
+            "/>", "</", (getter, node3.tag), ">"]
         self.assertEqual(valid, output)
 
     def test_tag(self):
diff --git a/tests/test_template.py b/tests/test_template.py
index 584b02f..e015a6a 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -130,6 +130,8 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertTrue(node4.has("b", False))
         self.assertTrue(node3.has("b", True))
         self.assertFalse(node4.has("b", True))
+        self.assertFalse(node1.has_param("foobar", False))
+        self.assertTrue(node2.has_param(1, False))
 
     def test_get(self):
         """test Template.get()"""
@@ -176,52 +178,41 @@ class TestTemplate(TreeEqualityTestCase):
             pgens("b  ", " c\n"), pgens("\nd  ", " e"), pgens("\nf  ", "g ")])
         node16 = Template(wraptext("a"), [
             pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
-        node17 = Template(wraptext("a"), [
-            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
-        node18 = Template(wraptext("a\n"), [
-            pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"),
-            pgens("h ", " i\n")])
-        node19 = Template(wraptext("a"), [
-            pgens("b  ", " c\n"), pgens("\nd  ", " e"), pgens("\nf  ", "g ")])
-        node20 = Template(wraptext("a"), [
-            pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
-        node21 = Template(wraptext("a"), [pgenh("1", "b")])
-        node22 = Template(wraptext("a"), [pgenh("1", "b")])
-        node23 = Template(wraptext("a"), [pgenh("1", "b")])
-        node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
+        node17 = Template(wraptext("a"), [pgenh("1", "b")])
+        node18 = Template(wraptext("a"), [pgenh("1", "b")])
+        node19 = Template(wraptext("a"), [pgenh("1", "b")])
+        node20 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
                                           pgenh("3", "d"), pgenh("4", "e")])
-        node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
+        node21 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
                                           pgens("4", "d"), pgens("5", "e")])
-        node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
+        node22 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"),
                                           pgens("4", "d"), pgens("5", "e")])
+        node23 = Template(wraptext("a"), [pgenh("1", "b")])
+        node24 = Template(wraptext("a"), [pgenh("1", "b")])
+        node25 = Template(wraptext("a"), [pgens("b", "c")])
+        node26 = Template(wraptext("a"), [pgenh("1", "b")])
         node27 = Template(wraptext("a"), [pgenh("1", "b")])
-        node28 = Template(wraptext("a"), [pgenh("1", "b")])
-        node29 = Template(wraptext("a"), [pgens("b", "c")])
-        node30 = Template(wraptext("a"), [pgenh("1", "b")])
-        node31 = Template(wraptext("a"), [pgenh("1", "b")])
-        node32 = Template(wraptext("a"), [pgens("1", "b")])
-        node33 = Template(wraptext("a"), [
+        node28 = Template(wraptext("a"), [pgens("1", "b")])
+        node29 = Template(wraptext("a"), [
             pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")])
-        node34 = Template(wraptext("a\n"), [
+        node30 = Template(wraptext("a\n"), [
             pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"),
             pgens("h ", " i\n")])
-        node35 = Template(wraptext("a"), [
+        node31 = Template(wraptext("a"), [
             pgens("b  ", " c\n"), pgens("\nd  ", " e"), pgens("\nf  ", "g ")])
-        node36 = Template(wraptext("a"), [
+        node32 = Template(wraptext("a"), [
             pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")])
-        node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
-                                          pgens("b", "f"), pgens("b", "h"),
-                                          pgens("i", "j")])
-        node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
+        node33 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
                                           pgens("b", "f"), pgens("b", "h"),
                                           pgens("i", "j")])
-        node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"),
+        node34 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"),
                                           pgens("1", "c"), pgens("2", "d")])
-        node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"),
+        node35 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"),
                                           pgenh("1", "c"), pgenh("2", "d")])
-        node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
+        node36 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
                                           pgens("f", "g")])
-        node41 = Template(wraptext("a"), [pgenh("1", "")])
+        node37 = Template(wraptext("a"), [pgenh("1", "")])
+        node38 = Template(wraptext("abc"))
 
         node1.add("e", "f", showkey=True)
         node2.add(2, "g", showkey=False)
@@ -241,31 +232,29 @@ class TestTemplate(TreeEqualityTestCase):
         node14.add("j", "k", showkey=True)
         node15.add("h", "i", showkey=True)
         node16.add("h", "i", showkey=True, preserve_spacing=False)
-        node17.add("h", "i", showkey=False)
-        node18.add("j", "k", showkey=False)
-        node19.add("h", "i", showkey=False)
-        node20.add("h", "i", showkey=False, preserve_spacing=False)
-        node21.add("2", "c")
-        node22.add("3", "c")
-        node23.add("c", "d")
-        node24.add("5", "f")
-        node25.add("3", "f")
-        node26.add("6", "f")
-        node27.add("c", "foo=bar")
-        node28.add("2", "foo=bar")
-        node29.add("b", "d")
-        node30.add("1", "foo=bar")
-        node31.add("1", "foo=bar", showkey=True)
-        node32.add("1", "foo=bar", showkey=False)
-        node33.add("d", "foo")
-        node34.add("f", "foo")
-        node35.add("f", "foo")
-        node36.add("d", "foo", preserve_spacing=False)
-        node37.add("b", "k")
-        node38.add("1", "e")
-        node39.add("1", "e")
-        node40.add("d", "h", before="b")
-        node41.add(1, "b")
+        node17.add("2", "c")
+        node18.add("3", "c")
+        node19.add("c", "d")
+        node20.add("5", "f")
+        node21.add("3", "f")
+        node22.add("6", "f")
+        node23.add("c", "foo=bar")
+        node24.add("2", "foo=bar")
+        node25.add("b", "d")
+        node26.add("1", "foo=bar")
+        node27.add("1", "foo=bar", showkey=True)
+        node28.add("1", "foo=bar", showkey=False)
+        node29.add("d", "foo")
+        node30.add("f", "foo")
+        node31.add("f", "foo")
+        node32.add("d", "foo", preserve_spacing=False)
+        node33.add("b", "k")
+        node34.add("1", "e")
+        node35.add("1", "e")
+        node36.add("d", "h", before="b")
+        node37.add(1, "b")
+        node38.add("1", "foo")
+        self.assertRaises(ValueError, node38.add, "z", "bar", showkey=False)
 
         self.assertEqual("{{a|b=c|d|e=f}}", node1)
         self.assertEqual("{{a|b=c|d|g}}", node2)
@@ -285,34 +274,31 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
         self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =g |h  =i}}", node15)
         self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
-        self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17)
-        self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18)
-        self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =g |i}}", node19)
-        self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20)
-        self.assertEqual("{{a|b|c}}", node21)
-        self.assertEqual("{{a|b|3=c}}", node22)
-        self.assertEqual("{{a|b|c=d}}", node23)
-        self.assertEqual("{{a|b|c|d|e|f}}", node24)
-        self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25)
-        self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26)
-        self.assertEqual("{{a|b|c=foo=bar}}", node27)
-        self.assertEqual("{{a|b|foo&#61;bar}}", node28)
-        self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity)
-        self.assertEqual("{{a|b=d}}", node29)
-        self.assertEqual("{{a|foo&#61;bar}}", node30)
-        self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity)
-        self.assertEqual("{{a|1=foo=bar}}", node31)
-        self.assertEqual("{{a|foo&#61;bar}}", node32)
-        self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity)
-        self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33)
-        self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34)
-        self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =foo }}", node35)
-        self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36)
-        self.assertEqual("{{a|b=k|d=e|i=j}}", node37)
-        self.assertEqual("{{a|1=e|x=y|2=d}}", node38)
-        self.assertEqual("{{a|x=y|e|d}}", node39)
-        self.assertEqual("{{a|b=c|d=h|f=g}}", node40)
-        self.assertEqual("{{a|b}}", node41)
+        self.assertEqual("{{a|b|c}}", node17)
+        self.assertEqual("{{a|b|3=c}}", node18)
+        self.assertEqual("{{a|b|c=d}}", node19)
+        self.assertEqual("{{a|b|c|d|e|f}}", node20)
+        self.assertEqual("{{a|b|c|4=d|5=e|f}}", node21)
+        self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node22)
+        self.assertEqual("{{a|b|c=foo=bar}}", node23)
+        self.assertEqual("{{a|b|foo&#61;bar}}", node24)
+        self.assertIsInstance(node24.params[1].value.get(1), HTMLEntity)
+        self.assertEqual("{{a|b=d}}", node25)
+        self.assertEqual("{{a|foo&#61;bar}}", node26)
+        self.assertIsInstance(node26.params[0].value.get(1), HTMLEntity)
+        self.assertEqual("{{a|1=foo=bar}}", node27)
+        self.assertEqual("{{a|foo&#61;bar}}", node28)
+        self.assertIsInstance(node28.params[0].value.get(1), HTMLEntity)
+        self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node29)
+        self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node30)
+        self.assertEqual("{{a|b  = c\n|\nd  = e|\nf  =foo }}", node31)
+        self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node32)
+        self.assertEqual("{{a|b=k|d=e|i=j}}", node33)
+        self.assertEqual("{{a|1=e|x=y|2=d}}", node34)
+        self.assertEqual("{{a|x=y|e|d}}", node35)
+        self.assertEqual("{{a|b=c|d=h|f=g}}", node36)
+        self.assertEqual("{{a|b}}", node37)
+        self.assertEqual("{{abc|foo}}", node38)
 
     def test_remove(self):
         """test Template.remove()"""
@@ -373,6 +359,8 @@ class TestTemplate(TreeEqualityTestCase):
         node26 = Template(wraptext("foo"), [
             pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"),
             pgens("a", "b")])
+        node27 = Template(wraptext("foo"), [pgenh("1", "bar")])
+        node28 = Template(wraptext("foo"), [pgenh("1", "bar")])
 
         node2.remove("1")
         node2.remove("abc")
@@ -430,6 +418,7 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertEqual("{{foo|a=|c=d|e=f|a=b|a=b}}", node24)
         self.assertEqual("{{foo|a=b|c=d|e=f|a=b}}", node25)
         self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26)
+        self.assertRaises(ValueError, node27.remove, node28.get(1))
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From a4c2fd023adfe95fdd5552cc2bab90a0bbc16a2a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 5 Jul 2014 01:00:11 -0400
Subject: [PATCH 029/102] Remove some useless code in the tokenizers.

---
 mwparserfromhell/parser/tokenizer.c  | 4 +---
 mwparserfromhell/parser/tokenizer.py | 8 +++-----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 6ab8570..963e7d7 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -832,8 +832,6 @@ static int Tokenizer_parse_wikilink(Tokenizer* self)
     Py_DECREF(wikilink);
     if (Tokenizer_emit(self, WikilinkClose))
         return -1;
-    if (self->topstack->context & LC_FAIL_NEXT)
-        self->topstack->context ^= LC_FAIL_NEXT;
     return 0;
 }
 
@@ -1718,7 +1716,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
                 return -1;
         }
     }
-    else if (data->context & TAG_ATTR_VALUE) {
+    else {  // data->context & TAG_ATTR_VALUE assured
         escaped = (Tokenizer_READ_BACKWARDS(self, 1) == '\\' &&
                    Tokenizer_READ_BACKWARDS(self, 2) != '\\');
         if (data->context & TAG_NOTE_QUOTE) {
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 9af9204..6430f0f 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -255,7 +255,7 @@ class Tokenizer(object):
             self._context ^= contexts.TEMPLATE_NAME
         elif self._context & contexts.TEMPLATE_PARAM_VALUE:
             self._context ^= contexts.TEMPLATE_PARAM_VALUE
-        elif self._context & contexts.TEMPLATE_PARAM_KEY:
+        else:
             self._emit_all(self._pop(keep_context=True))
         self._context |= contexts.TEMPLATE_PARAM_KEY
         self._emit(tokens.TemplateParamSeparator())
@@ -296,8 +296,6 @@ class Tokenizer(object):
             self._head = reset
             self._emit_text("[[")
         else:
-            if self._context & contexts.FAIL_NEXT:
-                self._context ^= contexts.FAIL_NEXT
             self._emit(tokens.WikilinkOpen())
             self._emit_all(wikilink)
             self._emit(tokens.WikilinkClose())
@@ -687,7 +685,7 @@ class Tokenizer(object):
                     self._push_tag_buffer(data)
                     data.context = data.CX_ATTR_NAME
                     self._push(contexts.TAG_ATTR)
-            elif data.context & data.CX_ATTR_VALUE:
+            else:  # data.context & data.CX_ATTR_VALUE assured
                 escaped = self._read(-1) == "\\" and self._read(-2) != "\\"
                 if data.context & data.CX_NOTE_QUOTE:
                     data.context ^= data.CX_NOTE_QUOTE
@@ -943,7 +941,7 @@ class Tokenizer(object):
         elif ticks == 3:
             if self._parse_bold():
                 return self._pop()
-        elif ticks == 5:
+        else:  # ticks == 5
             self._parse_italics_and_bold()
         self._head -= 1
 

From b997e4cd7131b541dbf9027dbf67ebc46ed356ea Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 5 Jul 2014 04:21:56 -0400
Subject: [PATCH 030/102] Support attributes quoted with '; add required quotes
 in value setter.

---
 CHANGELOG                                  |  4 +++
 docs/changelog.rst                         |  5 +++
 mwparserfromhell/nodes/extras/attribute.py | 51 +++++++++++++++++++-------
 mwparserfromhell/nodes/tag.py              | 17 +++++----
 mwparserfromhell/parser/builder.py         |  6 ++--
 mwparserfromhell/parser/tokenizer.c        | 21 +++++++----
 mwparserfromhell/parser/tokenizer.h        |  1 +
 mwparserfromhell/parser/tokenizer.py       | 12 ++++---
 mwparserfromhell/parser/tokens.py          |  2 +-
 tests/_test_tree_equality.py               |  2 +-
 tests/test_attribute.py                    | 48 +++++++++++++++++--------
 tests/test_builder.py                      | 16 ++++-----
 tests/test_tag.py                          | 29 ++++++++-------
 tests/tokenizer/integration.mwtest         |  2 +-
 tests/tokenizer/tags.mwtest                | 58 ++++++++++++++++++++++--------
 15 files changed, 189 insertions(+), 85 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 1200575..f7dcb8a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,10 @@ v0.4 (unreleased):
   option, RECURSE_OTHERS, which recurses over all children except instances of
   'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)`
   returns all un-nested templates).
+- The parser now understands HTML tag attributes quoted with single quotes.
+  When setting a tag attribute's value, quotes will be added if necessary. As
+  part of this, Attribute's 'quoted' attribute has been changed to 'quotes',
+  and is now either a string or None.
 - Calling Template.remove() with a Parameter object that is not part of the
   template now raises ValueError instead of doing nothing.
 - Parameters with non-integer keys can no longer be created with
diff --git a/docs/changelog.rst b/docs/changelog.rst
index ba26722..3bc4ce7 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -18,6 +18,11 @@ Unreleased
   which recurses over all children except instances of *forcetype* (for
   example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
   templates).
+- The parser now understands HTML tag attributes quoted with single quotes.
+  When setting a tag attribute's value, quotes will be added if necessary. As
+  part of this, :py:class:`.Attribute`\ 's :py:attr:`~.Attribute.quoted`
+  attribute has been changed to :py:attr:`~.Attribute.quotes`, and is now
+  either a string or ``None``.
 - Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that
   is not part of the template now raises :py:exc:`ValueError` instead of doing
   nothing.
diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py
index 4b7c668..6256138 100644
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -36,12 +36,14 @@ class Attribute(StringMixIn):
     whose value is ``"foo"``.
     """
 
-    def __init__(self, name, value=None, quoted=True, pad_first=" ",
+    def __init__(self, name, value=None, quotes='"', pad_first=" ",
                  pad_before_eq="", pad_after_eq=""):
         super(Attribute, self).__init__()
+        if not quotes and self._value_needs_quotes(value):
+            raise ValueError("given value {0!r} requires quotes".format(value))
         self._name = name
         self._value = value
-        self._quoted = quoted
+        self._quotes = quotes
         self._pad_first = pad_first
         self._pad_before_eq = pad_before_eq
         self._pad_after_eq = pad_after_eq
@@ -50,11 +52,18 @@ class Attribute(StringMixIn):
         result = self.pad_first + str(self.name) + self.pad_before_eq
         if self.value is not None:
             result += "=" + self.pad_after_eq
-            if self.quoted:
-                return result + '"' + str(self.value) + '"'
+            if self.quotes:
+                return result + self.quotes + str(self.value) + self.quotes
             return result + str(self.value)
         return result
 
+    @staticmethod
+    def _value_needs_quotes(val):
+        """Return the preferred quotes for the given value, or None."""
+        if val and any(char.isspace() for char in val):
+            return ('"' in val and "'" in val) or ("'" if '"' in val else '"')
+        return None
+
     def _set_padding(self, attr, value):
         """Setter for the value of a padding attribute."""
         if not value:
@@ -65,6 +74,14 @@ class Attribute(StringMixIn):
                 raise ValueError("padding must be entirely whitespace")
             setattr(self, attr, value)
 
+    @staticmethod
+    def coerce_quotes(quotes):
+        """Coerce a quote type into an acceptable value, or raise an error."""
+        orig, quotes = quotes, str(quotes) if quotes else None
+        if quotes not in [None, '"', "'"]:
+            raise ValueError("{0!r} is not a valid quote type".format(orig))
+        return quotes
+
     @property
     def name(self):
         """The name of the attribute as a :py:class:`~.Wikicode` object."""
@@ -76,9 +93,9 @@ class Attribute(StringMixIn):
         return self._value
 
     @property
-    def quoted(self):
-        """Whether the attribute's value is quoted with double quotes."""
-        return self._quoted
+    def quotes(self):
+        """How to enclose the attribute value. ``"``, ``'``, or ``None``."""
+        return self._quotes
 
     @property
     def pad_first(self):
@@ -101,11 +118,21 @@ class Attribute(StringMixIn):
 
     @value.setter
     def value(self, newval):
-        self._value = None if newval is None else parse_anything(newval)
-
-    @quoted.setter
-    def quoted(self, value):
-        self._quoted = bool(value)
+        if newval is None:
+            self._value = None
+        else:
+            code = parse_anything(newval)
+            quotes = self._value_needs_quotes(code)
+            if quotes in ['"', "'"] or (quotes is True and not self.quotes):
+                self._quotes = quotes
+            self._value = code
+
+    @quotes.setter
+    def quotes(self, value):
+        value = self.coerce_quotes(value)
+        if not value and self._value_needs_quotes(self.value):
+            raise ValueError("attribute value requires quotes")
+        self._quotes = value
 
     @pad_first.setter
     def pad_first(self, value):
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index f283d46..1b8efb8 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -236,21 +236,24 @@ class Tag(Node):
                 return attr
         raise ValueError(name)
 
-    def add(self, name, value=None, quoted=True, pad_first=" ",
+    def add(self, name, value=None, quotes='"', pad_first=" ",
             pad_before_eq="", pad_after_eq=""):
         """Add an attribute with the given *name* and *value*.
 
         *name* and *value* can be anything parsable by
         :py:func:`.utils.parse_anything`; *value* can be omitted if the
-        attribute is valueless. *quoted* is a bool telling whether to wrap the
-        *value* in double quotes (this is recommended). *pad_first*,
-        *pad_before_eq*, and *pad_after_eq* are whitespace used as padding
-        before the name, before the equal sign (or after the name if no value),
-        and after the equal sign (ignored if no value), respectively.
+        attribute is valueless. If *quotes* is not ``None``, it should be a
+        string (either ``"`` or ``'``) that *value* will be wrapped in (this is
+        recommended). ``None`` is only legal if *value* contains no spacing.
+
+        *pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as
+        padding before the name, before the equal sign (or after the name if no
+        value), and after the equal sign (ignored if no value), respectively.
         """
         if value is not None:
             value = parse_anything(value)
-        attr = Attribute(parse_anything(name), value, quoted)
+        quotes = Attribute.coerce_quotes(quotes)
+        attr = Attribute(parse_anything(name), value, quotes)
         attr.pad_first = pad_first
         attr.pad_before_eq = pad_before_eq
         attr.pad_after_eq = pad_after_eq
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 559bd54..c9a930b 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -193,7 +193,7 @@ class Builder(object):
 
     def _handle_attribute(self, start):
         """Handle a case where a tag attribute is at the head of the tokens."""
-        name, quoted = None, False
+        name = quotes = None
         self._push()
         while self._tokens:
             token = self._tokens.pop()
@@ -201,7 +201,7 @@ class Builder(object):
                 name = self._pop()
                 self._push()
             elif isinstance(token, tokens.TagAttrQuote):
-                quoted = True
+                quotes = token.char
             elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen,
                                     tokens.TagCloseSelfclose)):
                 self._tokens.append(token)
@@ -209,7 +209,7 @@ class Builder(object):
                     value = self._pop()
                 else:
                     name, value = self._pop(), None
-                return Attribute(name, value, quoted, start.pad_first,
+                return Attribute(name, value, quotes, start.pad_first,
                                  start.pad_before_eq, start.pad_after_eq)
             else:
                 self._write(self._handle_token(token))
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 963e7d7..4c6414e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -173,7 +173,7 @@ static TagData* TagData_new(void)
     ALLOC_BUFFER(self->pad_first)
     ALLOC_BUFFER(self->pad_before_eq)
     ALLOC_BUFFER(self->pad_after_eq)
-    self->reset = 0;
+    self->quoter = self->reset = 0;
     return self;
 }
 
@@ -1566,10 +1566,18 @@ static int Tokenizer_parse_comment(Tokenizer* self)
 */
 static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
 {
-    PyObject *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq;
+    PyObject *tokens, *kwargs, *tmp, *pad_first, *pad_before_eq, *pad_after_eq;
 
     if (data->context & TAG_QUOTED) {
-        if (Tokenizer_emit_first(self, TagAttrQuote))
+        kwargs = PyDict_New();
+        if (!kwargs)
+            return -1;
+        tmp = PyUnicode_FromUnicode(&data->quoter, 1);
+        if (!tmp)
+            return -1;
+        PyDict_SetItemString(kwargs, "char", tmp);
+        Py_DECREF(tmp);
+        if (Tokenizer_emit_first_kwargs(self, TagAttrQuote, kwargs))
             return -1;
         tokens = Tokenizer_pop(self);
         if (!tokens)
@@ -1721,16 +1729,17 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
                    Tokenizer_READ_BACKWARDS(self, 2) != '\\');
         if (data->context & TAG_NOTE_QUOTE) {
             data->context ^= TAG_NOTE_QUOTE;
-            if (chunk == '"' && !escaped) {
+            if ((chunk == '"' || chunk == '\'') && !escaped) {
                 data->context |= TAG_QUOTED;
+                data->quoter = chunk;
+                data->reset = self->head;
                 if (Tokenizer_push(self, self->topstack->context))
                     return -1;
-                data->reset = self->head;
                 return 0;
             }
         }
         else if (data->context & TAG_QUOTED) {
-            if (chunk == '"' && !escaped) {
+            if (chunk == data->quoter && !escaped) {
                 data->context |= TAG_NOTE_SPACE;
                 return 0;
             }
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 4312e2f..dde6464 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -206,6 +206,7 @@ typedef struct {
     struct Textbuffer* pad_first;
     struct Textbuffer* pad_before_eq;
     struct Textbuffer* pad_after_eq;
+    Py_UNICODE quoter;
     Py_ssize_t reset;
 } TagData;
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 6430f0f..4422b5c 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -53,6 +53,7 @@ class _TagOpenData(object):
     def __init__(self):
         self.context = self.CX_NAME
         self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""}
+        self.quoter = None
         self.reset = 0
 
 
@@ -66,7 +67,7 @@ class Tokenizer(object):
     MAX_DEPTH = 40
     MAX_CYCLES = 100000
     regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
-    tag_splitter = re.compile(r"([\s\"\\]+)")
+    tag_splitter = re.compile(r"([\s\"\'\\]+)")
 
     def __init__(self):
         self._text = None
@@ -612,7 +613,7 @@ class Tokenizer(object):
     def _push_tag_buffer(self, data):
         """Write a pending tag attribute from *data* to the stack."""
         if data.context & data.CX_QUOTED:
-            self._emit_first(tokens.TagAttrQuote())
+            self._emit_first(tokens.TagAttrQuote(char=data.quoter))
             self._emit_all(self._pop())
         buf = data.padding_buffer
         self._emit_first(tokens.TagAttrStart(pad_first=buf["first"],
@@ -689,13 +690,14 @@ class Tokenizer(object):
                 escaped = self._read(-1) == "\\" and self._read(-2) != "\\"
                 if data.context & data.CX_NOTE_QUOTE:
                     data.context ^= data.CX_NOTE_QUOTE
-                    if chunk == '"' and not escaped:
+                    if chunk in "'\"" and not escaped:
                         data.context |= data.CX_QUOTED
-                        self._push(self._context)
+                        data.quoter = chunk
                         data.reset = self._head
+                        self._push(self._context)
                         continue
                 elif data.context & data.CX_QUOTED:
-                    if chunk == '"' and not escaped:
+                    if chunk == data.quoter and not escaped:
                         data.context |= data.CX_NOTE_SPACE
                         continue
             self._handle_tag_text(chunk)
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index c7cc3ef..e567731 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -100,7 +100,7 @@ CommentEnd = make("CommentEnd")                                     # -->
 TagOpenOpen = make("TagOpenOpen")                                   # <
 TagAttrStart = make("TagAttrStart")
 TagAttrEquals = make("TagAttrEquals")                               # =
-TagAttrQuote = make("TagAttrQuote")                                 # "
+TagAttrQuote = make("TagAttrQuote")                                 # ", '
 TagCloseOpen = make("TagCloseOpen")                                 # >
 TagCloseSelfclose = make("TagCloseSelfclose")                       # />
 TagOpenClose = make("TagOpenClose")                                 # </
diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 10d491e..bb713c2 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -98,7 +98,7 @@ class TreeEqualityTestCase(TestCase):
             self.assertWikicodeEqual(exp_attr.name, act_attr.name)
             if exp_attr.value is not None:
                 self.assertWikicodeEqual(exp_attr.value, act_attr.value)
-                self.assertIs(exp_attr.quoted, act_attr.quoted)
+                self.assertEqual(exp_attr.quotes, act_attr.quotes)
             self.assertEqual(exp_attr.pad_first, act_attr.pad_first)
             self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq)
             self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq)
diff --git a/tests/test_attribute.py b/tests/test_attribute.py
index 50eed74..15e546d 100644
--- a/tests/test_attribute.py
+++ b/tests/test_attribute.py
@@ -42,12 +42,14 @@ class TestAttribute(TreeEqualityTestCase):
         self.assertEqual(" foo", str(node))
         node2 = Attribute(wraptext("foo"), wraptext("bar"))
         self.assertEqual(' foo="bar"', str(node2))
-        node3 = Attribute(wraptext("a"), wraptext("b"), True, "", " ", "   ")
+        node3 = Attribute(wraptext("a"), wraptext("b"), '"', "", " ", "   ")
         self.assertEqual('a =   "b"', str(node3))
-        node3 = Attribute(wraptext("a"), wraptext("b"), False, "", " ", "   ")
-        self.assertEqual("a =   b", str(node3))
-        node4 = Attribute(wraptext("a"), wrap([]), False, " ", "", " ")
-        self.assertEqual(" a= ", str(node4))
+        node4 = Attribute(wraptext("a"), wraptext("b"), "'", "", " ", "   ")
+        self.assertEqual("a =   'b'", str(node4))
+        node5 = Attribute(wraptext("a"), wraptext("b"), None, "", " ", "   ")
+        self.assertEqual("a =   b", str(node5))
+        node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ")
+        self.assertEqual(" a= ", str(node6))
 
     def test_name(self):
         """test getter/setter for the name attribute"""
@@ -66,17 +68,35 @@ class TestAttribute(TreeEqualityTestCase):
         self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value)
         node.value = None
         self.assertIs(None, node.value)
+        node2 = Attribute(wraptext("id"), wraptext("foo"), None)
+        node2.value = "foo bar baz"
+        self.assertWikicodeEqual(wraptext("foo bar baz"), node2.value)
+        self.assertEqual('"', node2.quotes)
+        node2.value = 'foo "bar" baz'
+        self.assertWikicodeEqual(wraptext('foo "bar" baz'), node2.value)
+        self.assertEqual("'", node2.quotes)
+        node2.value = "foo 'bar' baz"
+        self.assertWikicodeEqual(wraptext("foo 'bar' baz"), node2.value)
+        self.assertEqual('"', node2.quotes)
+        node2.value = "fo\"o 'bar' b\"az"
+        self.assertWikicodeEqual(wraptext("fo\"o 'bar' b\"az"), node2.value)
+        self.assertEqual('"', node2.quotes)
 
-    def test_quoted(self):
-        """test getter/setter for the quoted attribute"""
-        node1 = Attribute(wraptext("id"), wraptext("foo"), False)
+    def test_quotes(self):
+        """test getter/setter for the quotes attribute"""
+        node1 = Attribute(wraptext("id"), wraptext("foo"), None)
         node2 = Attribute(wraptext("id"), wraptext("bar"))
-        self.assertFalse(node1.quoted)
-        self.assertTrue(node2.quoted)
-        node1.quoted = True
-        node2.quoted = ""
-        self.assertTrue(node1.quoted)
-        self.assertFalse(node2.quoted)
+        node3 = Attribute(wraptext("id"), wraptext("foo bar baz"))
+        self.assertIs(None, node1.quotes)
+        self.assertEqual('"', node2.quotes)
+        node1.quotes = "'"
+        node2.quotes = None
+        self.assertEqual("'", node1.quotes)
+        self.assertIs(None, node2.quotes)
+        self.assertRaises(ValueError, setattr, node1, "quotes", "foobar")
+        self.assertRaises(ValueError, setattr, node3, "quotes", None)
+        self.assertRaises(ValueError, Attribute, wraptext("id"),
+                          wraptext("foo bar baz"), None)
 
     def test_padding(self):
         """test getter/setter for the padding attributes"""
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 58e3d1e..8f71ede 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -270,7 +270,7 @@ class TestBuilder(TreeEqualityTestCase):
               tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
                                   pad_after_eq=""),
               tokens.Text(text="name"), tokens.TagAttrEquals(),
-              tokens.TagAttrQuote(), tokens.Text(text="abc"),
+              tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"),
               tokens.TagCloseSelfclose(padding=" ")],
              wrap([Tag(wraptext("ref"),
                        attrs=[Attribute(wraptext("name"), wraptext("abc"))],
@@ -298,7 +298,7 @@ class TestBuilder(TreeEqualityTestCase):
              wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])),
 
             # <ref name={{abc}}   foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}}
-            #      mno =  "{{p}} [[q]] {{r}}">[[Source]]</ref>
+            #      mno =  '{{p}} [[q]] {{r}}'>[[Source]]</ref>
             ([tokens.TagOpenOpen(), tokens.Text(text="ref"),
               tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
                                   pad_after_eq=""),
@@ -308,7 +308,7 @@ class TestBuilder(TreeEqualityTestCase):
               tokens.TagAttrStart(pad_first="   ", pad_before_eq="",
                                   pad_after_eq=""),
               tokens.Text(text="foo"), tokens.TagAttrEquals(),
-              tokens.TagAttrQuote(), tokens.Text(text="bar "),
+              tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "),
               tokens.TemplateOpen(), tokens.Text(text="baz"),
               tokens.TemplateClose(),
               tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
@@ -326,7 +326,7 @@ class TestBuilder(TreeEqualityTestCase):
               tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ",
                                   pad_after_eq="  "),
               tokens.Text(text="mno"), tokens.TagAttrEquals(),
-              tokens.TagAttrQuote(), tokens.TemplateOpen(),
+              tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(),
               tokens.Text(text="p"), tokens.TemplateClose(),
               tokens.Text(text=" "), tokens.WikilinkOpen(),
               tokens.Text(text="q"), tokens.WikilinkClose(),
@@ -338,17 +338,17 @@ class TestBuilder(TreeEqualityTestCase):
               tokens.TagCloseClose()],
              wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [
                     Attribute(wraptext("name"),
-                              wrap([Template(wraptext("abc"))]), False),
+                              wrap([Template(wraptext("abc"))]), None),
                     Attribute(wraptext("foo"), wrap([Text("bar "),
                               Template(wraptext("baz"))]), pad_first="   "),
                     Attribute(wraptext("abc"), wrap([Template(wraptext("de")),
-                              Text("f")]), False),
+                              Text("f")]), None),
                     Attribute(wraptext("ghi"), wrap([Text("j"),
                               Template(wraptext("k")),
-                              Template(wraptext("l"))]), False),
+                              Template(wraptext("l"))]), None),
                     Attribute(wraptext("mno"), wrap([Template(wraptext("p")),
                               Text(" "), Wikilink(wraptext("q")), Text(" "),
-                              Template(wraptext("r"))]), True, " \n ", " ",
+                              Template(wraptext("r"))]), "'", " \n ", " ",
                               "  ")])])),
 
             # "''italic text''"
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 0eae713..7577cce 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -34,9 +34,9 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 
 agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
 agennv = lambda name: Attribute(wraptext(name))
-agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False)
-agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c)
-agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c)
+agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None)
+agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c)
+agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c)
 
 class TestTag(TreeEqualityTestCase):
     """Test cases for the Tag node."""
@@ -276,28 +276,33 @@ class TestTag(TreeEqualityTestCase):
         """test Tag.add()"""
         node = Tag(wraptext("ref"), wraptext("cite"))
         node.add("name", "value")
-        node.add("name", "value", quoted=False)
+        node.add("name", "value", quotes=None)
+        node.add("name", "value", quotes="'")
         node.add("name")
         node.add(1, False)
         node.add("style", "{{foobar}}")
-        node.add("name", "value", True, "\n", " ", "   ")
+        node.add("name", "value", '"', "\n", " ", "   ")
         attr1 = ' name="value"'
         attr2 = " name=value"
-        attr3 = " name"
-        attr4 = ' 1="False"'
-        attr5 = ' style="{{foobar}}"'
-        attr6 = '\nname =   "value"'
+        attr3 = " name='value'"
+        attr4 = " name"
+        attr5 = ' 1="False"'
+        attr6 = ' style="{{foobar}}"'
+        attr7 = '\nname =   "value"'
         self.assertEqual(attr1, node.attributes[0])
         self.assertEqual(attr2, node.attributes[1])
         self.assertEqual(attr3, node.attributes[2])
         self.assertEqual(attr4, node.attributes[3])
         self.assertEqual(attr5, node.attributes[4])
         self.assertEqual(attr6, node.attributes[5])
-        self.assertEqual(attr6, node.get("name"))
+        self.assertEqual(attr7, node.attributes[6])
+        self.assertEqual(attr7, node.get("name"))
         self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]),
-                                 node.attributes[4].value)
+                                 node.attributes[5].value)
         self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5,
-                                  attr6, ">cite</ref>")), node)
+                                  attr6, attr7, ">cite</ref>")), node)
+        self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar")
+        self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None)
 
     def test_remove(self):
         """test Tag.remove()"""
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index 5e1a409..372a367 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -43,7 +43,7 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t
 name:   rich_tags
 label:  a HTML tag with tons of other things in it
 input:  "{{dubious claim}}<ref name={{abc}}   foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno =  "{{p}} [[q]] {{r}}">[[Source]]</ref>"
-output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first="   ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq="  "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first="   ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq="  "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]
 
 ---
 
diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest
index 26e569b..f979329 100644
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -57,7 +57,14 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
 name:   attribute_quoted
 label:  a tag with a single quoted attribute
 input:  "<ref name="foo bar"></ref>"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+
+---
+
+name:   attribute_single_quoted
+label:  a tag with a single singly-quoted attribute
+input:  "<ref name='foo bar'></ref>"
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
 
 ---
 
@@ -71,7 +78,7 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
 name:   attribute_quoted_hyphen
 label:  a tag with a single quoted attribute, containing a hyphen
 input:  "<ref name="foo-bar"></ref>"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
 
 ---
 
@@ -92,21 +99,21 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
 name:   attribute_selfclosing_value_quoted
 label:  a self-closing tag with a single quoted attribute
 input:  "<ref name="foo"/>"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(padding="")]
 
 ---
 
 name:   nested_tag
 label:  a tag nested within the attributes of another
 input:  "<ref name=<span style="color: red;">foo</span>>citation</ref>"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
 
 ---
 
 name:   nested_tag_quoted
 label:  a tag nested within the attributes of another, quoted
 input:  "<ref name="<span style="color: red;">foo</span>">citation</ref>"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
 
 ---
 
@@ -120,7 +127,7 @@ output: [Text(text="<ref name=</ ><//>>citation</ref>")]
 name:   nested_troll_tag_quoted
 label:  a bogus tag that appears to be nested within the attributes of another, quoted
 input:  "<ref name="</ ><//>">citation</ref>"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
 
 ---
 
@@ -222,6 +229,27 @@ output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_befor
 
 ---
 
+name:   quotes_in_quotes
+label:  singly-quoted text inside a doubly-quoted attribute
+input:  "<span foo="bar 'baz buzz' biz">stuff</span>"
+output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar 'baz buzz' biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]
+
+---
+
+name:   quotes_in_quotes_2
+label:  doubly-quoted text inside a singly-quoted attribute
+input:  "<span foo='bar "baz buzz" biz'>stuff</span>"
+output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]
+
+---
+
+name:   quotes_in_quotes_3
+label:  doubly-quoted text inside a singly-quoted attribute, with backslashes
+input:  "<span foo='bar "baz buzz\\" biz'>stuff</span>"
+output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\\\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]
+
+---
+
 name:   incomplete_lbracket
 label:  incomplete tags: just a left bracket
 input:  "<"
@@ -407,28 +435,28 @@ output: [Text(text="junk <></>")]
 name:   backslash_premature_before
 label:  a backslash before a quote before a space
 input:  "<foo attribute="this is\\" quoted">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
 name:   backslash_premature_after
 label:  a backslash before a quote after a space
 input:  "<foo attribute="this is \\"quoted">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
 name:   backslash_premature_middle
 label:  a backslash before a quote in the middle of a word
 input:  "<foo attribute="this i\\"s quoted">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
 name:   backslash_adjacent
 label:  escaped quotes next to unescaped quotes
 input:  "<foo attribute="\\"this is quoted\\"">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
@@ -442,21 +470,21 @@ output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before
 name:   backslash_double
 label:  two adjacent backslashes, which do *not* affect the quote
 input:  "<foo attribute="this is\\\\" quoted">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
 name:   backslash_triple
 label:  three adjacent backslashes, which do *not* affect the quote
 input:  "<foo attribute="this is\\\\\\" quoted">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
 name:   backslash_unaffecting
 label:  backslashes near quotes, but not immediately adjacent, thus having no effect
 input:  "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
 
 ---
 
@@ -477,7 +505,7 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(t
 name:   unparsable_attributed
 label:  a tag that should not be put through the normal parser; parsed attributes
 input:  "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}"
-output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]
+output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]
 
 ---
 
@@ -575,7 +603,7 @@ output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseS
 name:   single_only_close_attribute
 label:  a tag that can only be single; presented as a close tag with an attribute
 input:  "</br id="break">"
-output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)]
+output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)]
 
 ---
 

From 963cb2f780bb3b2918810b5fb260e347ae6cd1ff Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 5 Jul 2014 15:53:57 -0400
Subject: [PATCH 031/102] Store builder handlers in a dictionary. ~5% speedup.

---
 mwparserfromhell/parser/builder.py | 68 ++++++++++++++++++++++++--------------
 tests/test_builder.py              | 13 +++++++-
 2 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index c9a930b..e0109e6 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -32,6 +32,19 @@ from ..wikicode import Wikicode
 
 __all__ = ["Builder"]
 
+_HANDLERS = {
+    tokens.Text: lambda self, token: Text(token.text)
+}
+
+def _add_handler(token_type):
+    """Create a decorator that adds a handler function to the lookup table."""
+    def decorator(func):
+        """Add a handler function to the lookup table."""
+        _HANDLERS[token_type] = func
+        return func
+    return decorator
+
+
 class Builder(object):
     """Builds a tree of nodes out of a sequence of tokens.
 
@@ -83,8 +96,10 @@ class Builder(object):
                 return Parameter(key, value, showkey)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_parameter() missed a close token")
 
-    def _handle_template(self):
+    @_add_handler(tokens.TemplateOpen)
+    def _handle_template(self, token):
         """Handle a case where a template is at the head of the tokens."""
         params = []
         default = 1
@@ -104,8 +119,10 @@ class Builder(object):
                 return Template(name, params)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_template() missed a close token")
 
-    def _handle_argument(self):
+    @_add_handler(tokens.ArgumentOpen)
+    def _handle_argument(self, token):
         """Handle a case where an argument is at the head of the tokens."""
         name = None
         self._push()
@@ -120,8 +137,10 @@ class Builder(object):
                 return Argument(self._pop())
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_argument() missed a close token")
 
-    def _handle_wikilink(self):
+    @_add_handler(tokens.WikilinkOpen)
+    def _handle_wikilink(self, token):
         """Handle a case where a wikilink is at the head of the tokens."""
         title = None
         self._push()
@@ -136,7 +155,9 @@ class Builder(object):
                 return Wikilink(self._pop())
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_wikilink() missed a close token")
 
+    @_add_handler(tokens.ExternalLinkOpen)
     def _handle_external_link(self, token):
         """Handle when an external link is at the head of the tokens."""
         brackets, url = token.brackets, None
@@ -152,8 +173,10 @@ class Builder(object):
                 return ExternalLink(self._pop(), brackets=brackets)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_external_link() missed a close token")
 
-    def _handle_entity(self):
+    @_add_handler(tokens.HTMLEntityStart)
+    def _handle_entity(self, token):
         """Handle a case where an HTML entity is at the head of the tokens."""
         token = self._tokens.pop()
         if isinstance(token, tokens.HTMLEntityNumeric):
@@ -168,6 +191,7 @@ class Builder(object):
         self._tokens.pop()  # Remove HTMLEntityEnd
         return HTMLEntity(token.text, named=True, hexadecimal=False)
 
+    @_add_handler(tokens.HeadingStart)
     def _handle_heading(self, token):
         """Handle a case where a heading is at the head of the tokens."""
         level = token.level
@@ -179,8 +203,10 @@ class Builder(object):
                 return Heading(title, level)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_heading() missed a close token")
 
-    def _handle_comment(self):
+    @_add_handler(tokens.CommentStart)
+    def _handle_comment(self, token):
         """Handle a case where an HTML comment is at the head of the tokens."""
         self._push()
         while self._tokens:
@@ -190,6 +216,7 @@ class Builder(object):
                 return Comment(contents)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_comment() missed a close token")
 
     def _handle_attribute(self, start):
         """Handle a case where a tag attribute is at the head of the tokens."""
@@ -213,7 +240,9 @@ class Builder(object):
                                  start.pad_before_eq, start.pad_after_eq)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_attribute() missed a close token")
 
+    @_add_handler(tokens.TagOpenOpen)
     def _handle_tag(self, token):
         """Handle a case where a tag is at the head of the tokens."""
         close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
@@ -244,29 +273,15 @@ class Builder(object):
                            invalid, implicit, padding, closing_tag)
             else:
                 self._write(self._handle_token(token))
+        raise ParserError("_handle_tag() missed a close token")
 
     def _handle_token(self, token):
         """Handle a single token."""
-        if isinstance(token, tokens.Text):
-            return Text(token.text)
-        elif isinstance(token, tokens.TemplateOpen):
-            return self._handle_template()
-        elif isinstance(token, tokens.ArgumentOpen):
-            return self._handle_argument()
-        elif isinstance(token, tokens.WikilinkOpen):
-            return self._handle_wikilink()
-        elif isinstance(token, tokens.ExternalLinkOpen):
-            return self._handle_external_link(token)
-        elif isinstance(token, tokens.HTMLEntityStart):
-            return self._handle_entity()
-        elif isinstance(token, tokens.HeadingStart):
-            return self._handle_heading(token)
-        elif isinstance(token, tokens.CommentStart):
-            return self._handle_comment()
-        elif isinstance(token, tokens.TagOpenOpen):
-            return self._handle_tag(token)
-        err = "_handle_token() got unexpected {0}".format(type(token).__name__)
-        raise ParserError(err)
+        try:
+            return _HANDLERS[type(token)](self, token)
+        except KeyError:
+            err = "_handle_token() got unexpected {0}"
+            raise ParserError(err.format(type(token).__name__))
 
     def build(self, tokenlist):
         """Build a Wikicode object from a list tokens and return it."""
@@ -277,3 +292,6 @@ class Builder(object):
             node = self._handle_token(self._tokens.pop())
             self._write(node)
         return self._pop()
+
+
+del _add_handler
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 8f71ede..d4e6f73 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -421,11 +421,22 @@ class TestBuilder(TreeEqualityTestCase):
             named=True)]))])])
         self.assertWikicodeEqual(valid, self.builder.build(test))
 
-    def test_parser_error(self):
+    def test_parser_errors(self):
         """test whether ParserError gets thrown for bad input"""
+        missing_closes = [
+            [tokens.TemplateOpen(), tokens.TemplateParamSeparator()],
+            [tokens.TemplateOpen()], [tokens.ArgumentOpen()],
+            [tokens.WikilinkOpen()], [tokens.ExternalLinkOpen()],
+            [tokens.HeadingStart()], [tokens.CommentStart()],
+            [tokens.TagOpenOpen(), tokens.TagAttrStart()],
+            [tokens.TagOpenOpen()]
+        ]
+
         func = self.assertRaisesRegex if py3k else self.assertRaisesRegexp
         msg = r"_handle_token\(\) got unexpected TemplateClose"
         func(ParserError, msg, self.builder.build, [tokens.TemplateClose()])
+        for test in missing_closes:
+            self.assertRaises(ParserError, self.builder.build, test)
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From ded89fb14ef4ebcf5c493e61f794ad8ac0288ec5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 5 Jul 2014 19:27:26 -0400
Subject: [PATCH 032/102] Add a few unit tests for untested code; remove a
 useless conditional.

---
 mwparserfromhell/parser/tokenizer.c    | 5 +----
 mwparserfromhell/parser/tokenizer.py   | 5 +----
 tests/tokenizer/comments.mwtest        | 7 +++++++
 tests/tokenizer/external_links.mwtest  | 4 ++--
 tests/tokenizer/integration.mwtest     | 7 +++++++
 tests/tokenizer/tags_wikimarkup.mwtest | 7 +++++++
 6 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 4c6414e..d62b965 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2508,10 +2508,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
         }
         else if (context & LC_FAIL_ON_RBRACE) {
             if (data == '}') {
-                if (context & LC_TEMPLATE)
-                    self->topstack->context |= LC_FAIL_ON_EQUALS;
-                else
-                    self->topstack->context |= LC_FAIL_NEXT;
+                self->topstack->context |= LC_FAIL_NEXT;
                 return 0;
             }
             self->topstack->context ^= LC_FAIL_ON_RBRACE;
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 4422b5c..09eb799 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1040,10 +1040,7 @@ class Tokenizer(object):
                 self._context ^= contexts.FAIL_ON_LBRACE
             elif context & contexts.FAIL_ON_RBRACE:
                 if this == "}":
-                    if context & contexts.TEMPLATE:
-                        self._context |= contexts.FAIL_ON_EQUALS
-                    else:
-                        self._context |= contexts.FAIL_NEXT
+                    self._context |= contexts.FAIL_NEXT
                     return True
                 self._context ^= contexts.FAIL_ON_RBRACE
             elif this == "{":
diff --git a/tests/tokenizer/comments.mwtest b/tests/tokenizer/comments.mwtest
index ea2e89f..4bf82a9 100644
--- a/tests/tokenizer/comments.mwtest
+++ b/tests/tokenizer/comments.mwtest
@@ -37,3 +37,10 @@ name:   incomplete_partial_close
 label:  a comment that doesn't close, with a partial close
 input:  "<!-- foo --\x01>"
 output: [Text(text="<!-- foo --\x01>")]
+
+---
+
+name:   incomplete_part_only
+label:  a comment that only has a < and !
+input:  "<!foo"
+output: [Text(text="<!foo")]
diff --git a/tests/tokenizer/external_links.mwtest b/tests/tokenizer/external_links.mwtest
index af7a570..1abc74f 100644
--- a/tests/tokenizer/external_links.mwtest
+++ b/tests/tokenizer/external_links.mwtest
@@ -124,8 +124,8 @@ output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com."), Exte
 
 ---
 
-name:   brackets_colons_after
-label:  colons after a bracket-enclosed link that are included
+name:   brackets_punct_after
+label:  punctuation after a bracket-enclosed link that are included
 input:  "[http://example.com/foo:bar.:;baz!?, Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo:bar.:;baz!?,"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]
 
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index 372a367..c16fe53 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -185,3 +185,10 @@ name:   italics_inside_external_link_inside_incomplete_list
 label:  italic text inside an external link inside an incomplete list
 input:  "<li>[http://www.example.com ''example'']"
 output: [TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding="", implicit=True), ExternalLinkOpen(brackets=True), Text(text="http://www.example.com"), ExternalLinkSeparator(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="example"), TagOpenClose(), Text(text="i"), TagCloseClose(), ExternalLinkClose()]
+
+---
+
+name:   nodes_inside_external_link_after_punct
+label:  various complex nodes inside an external link following punctuation
+input:  "http://example.com/foo.{{bar}}baz.&biz;<!--hello-->bingo"
+output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo."), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz.&biz;"), CommentStart(), Text(text="hello"), CommentEnd(), Text(text="bingo"), ExternalLinkClose()]
diff --git a/tests/tokenizer/tags_wikimarkup.mwtest b/tests/tokenizer/tags_wikimarkup.mwtest
index feff9c5..04f617a 100644
--- a/tests/tokenizer/tags_wikimarkup.mwtest
+++ b/tests/tokenizer/tags_wikimarkup.mwtest
@@ -244,6 +244,13 @@ output: [Text(text="''"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagClos
 
 ---
 
+name:   unending_bold_and_italics
+label:  five ticks (bold and italics) that don't end
+input:  "'''''testing"
+output: [Text(text="'''''testing")]
+
+---
+
 name:   complex_ul
 label:  ul with a lot in it
 input:  "* this is a&nbsp;test of an [[Unordered list|ul]] with {{plenty|of|stuff}}"

From 871d48c688bf8133f886e1d84de7de536e252ae6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 10 Jul 2014 19:20:48 -0400
Subject: [PATCH 033/102] Solve a couple more coverage issues; tighten.

---
 .coveragerc                          |  1 +
 mwparserfromhell/parser/tokenizer.py |  8 ++++++--
 tests/tokenizer/templates.mwtest     | 14 ++++++++++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index 909a0e2..48a64ce 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -6,3 +6,4 @@ partial_branches =
     pragma: no branch
     if py3k:
     if not py3k:
+    if py26:
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 09eb799..d867234 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -369,9 +369,11 @@ class Tokenizer(object):
         if "(" in this and ")" in punct:
             punct = punct[:-1]  # ')' is not longer valid punctuation
         if this.endswith(punct):
-            for i in reversed(range(-len(this), 0)):
-                if i == -len(this) or this[i - 1] not in punct:
+            for i in range(len(this) - 1, 0, -1):
+                if this[i - 1] not in punct:
                     break
+            else:
+                i = 0
             stripped = this[:i]
             if stripped and tail:
                 self._emit_text(tail)
@@ -762,6 +764,8 @@ class Tokenizer(object):
                 depth -= 1
                 if depth == 0:
                     break
+        else:  # pragma: no cover (untestable/exceptional case)
+            raise ParserError("_handle_single_tag_end() missed a TagCloseOpen")
         padding = stack[index].padding
         stack[index] = tokens.TagCloseSelfclose(padding=padding, implicit=True)
         return self._pop()
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index 78d7883..ff8a308 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -376,6 +376,20 @@ output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}
 
 ---
 
+name:   newlines_spaces
+label:  newlines in the middle of a template name, followed by spaces
+input:  "{{foo\n  }}"
+output: [TemplateOpen(), Text(text="foo\n  "), TemplateClose()]
+
+---
+
+name:   newlines_spaces_param
+label:  newlines in the middle of a template name, followed by spaces
+input:  "{{foo\n  }}"
+output: [TemplateOpen(), Text(text="foo\n  "), TemplateClose()]
+
+---
+
 name:   invalid_name_left_brace_middle
 label:  invalid characters in template name: left brace in middle
 input:  "{{foo{bar}}"

From fc529bdb57282d04cf6a660671a06489dcfe23a6 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 10 Jul 2014 19:48:12 -0400
Subject: [PATCH 034/102] Add unit tests for #59.

---
 tests/tokenizer/integration.mwtest | 35 +++++++++++++++++++++++++++++++++++
 tests/tokenizer/templates.mwtest   |  6 +++---
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index c16fe53..ef6d5c5 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -192,3 +192,38 @@ name:   nodes_inside_external_link_after_punct
 label:  various complex nodes inside an external link following punctuation
 input:  "http://example.com/foo.{{bar}}baz.&biz;<!--hello-->bingo"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo."), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz.&biz;"), CommentStart(), Text(text="hello"), CommentEnd(), Text(text="bingo"), ExternalLinkClose()]
+
+---
+
+name:   newline_and_comment_in_template_name
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!-- comment -->}}"
+output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()]
+
+---
+
+name:   newline_and_comment_in_template_name_2
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!-- comment -->|key=value}}"
+output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateParamSeparator(), Text(text="key"), TemplateParamEquals(), Text(text="value"), TemplateClose()]
+
+---
+
+name:   newline_and_comment_in_template_name_3
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!-- comment -->\n|key=value}}"
+output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\n"), TemplateParamSeparator(), Text(text="key"), TemplateParamEquals(), Text(text="value"), TemplateClose()]
+
+---
+
+name:   newline_and_comment_in_template_name_4
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!-- comment -->invalid|key=value}}"
+output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="invalid|key=value}}")]
+
+---
+
+name:   newline_and_comment_in_template_name_5
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!-- comment -->\ninvalid|key=value}}"
+output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")]
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index ff8a308..25e178a 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -384,9 +384,9 @@ output: [TemplateOpen(), Text(text="foo\n  "), TemplateClose()]
 ---
 
 name:   newlines_spaces_param
-label:  newlines in the middle of a template name, followed by spaces
-input:  "{{foo\n  }}"
-output: [TemplateOpen(), Text(text="foo\n  "), TemplateClose()]
+label:  newlines in the middle of a template name, followed by spaces, with a parameter
+input:  "{{foo\n  |bar=baz}}"
+output: [TemplateOpen(), Text(text="foo\n  "), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]
 
 ---
 

From 6954480263b537c775c960f9b64e3a9cd4706481 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 10 Jul 2014 20:17:45 -0400
Subject: [PATCH 035/102] Fix template parsing when comments are inside the
 name (fixes #59).

---
 mwparserfromhell/parser/tokenizer.c  | 17 ++++++++++++++---
 mwparserfromhell/parser/tokenizer.py |  8 ++++++++
 tests/tokenizer/integration.mwtest   | 14 ++++++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index d62b965..814ad50 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1553,6 +1553,12 @@ static int Tokenizer_parse_comment(Tokenizer* self)
                 return -1;
             Py_DECREF(comment);
             self->head += 2;
+            if (self->topstack->context & LC_FAIL_NEXT) {
+                /* _verify_safe() sets this flag while parsing a template name
+                   when it encounters what might be a comment -- we must unset
+                   it to let _verify_safe() know it was correct: */
+                self->topstack->context ^= LC_FAIL_NEXT;
+            }
             return 0;
         }
         if (Tokenizer_emit_char(self, this))
@@ -2478,8 +2484,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
             return 0;
         if (context & LC_HAS_TEXT) {
             if (context & LC_FAIL_ON_TEXT) {
-                if (!Py_UNICODE_ISSPACE(data))
+                if (!Py_UNICODE_ISSPACE(data)) {
+                    if (data == '<' && Tokenizer_READ(self, 1) == '!') {
+                        self->topstack->context |= LC_FAIL_NEXT;
+                        return 0;
+                    }
                     return -1;
+                }
             }
             else {
                 if (data == '\n')
@@ -2496,8 +2507,8 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
             }
         }
         else if (context & LC_FAIL_ON_LBRACE) {
-            if (data == '{' || (Tokenizer_READ(self, -1) == '{' &&
-                                 Tokenizer_READ(self, -2) == '{')) {
+            if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' &&
+                                 Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
                 if (context & LC_TEMPLATE)
                     self->topstack->context |= LC_FAIL_ON_EQUALS;
                 else
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index d867234..44f0d60 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -608,6 +608,11 @@ class Tokenizer(object):
                 self._emit(tokens.CommentEnd())
                 self._emit_all(self._pop())
                 self._head += 2
+                if self._context & contexts.FAIL_NEXT:
+                    # _verify_safe() sets this flag while parsing a template
+                    # name when it encounters what might be a comment -- we
+                    # must unset it to let _verify_safe() know it was correct:
+                    self._context ^= contexts.FAIL_NEXT
                 return
             self._emit_text(this)
             self._head += 1
@@ -1021,6 +1026,9 @@ class Tokenizer(object):
             if context & contexts.HAS_TEXT:
                 if context & contexts.FAIL_ON_TEXT:
                     if this is self.END or not this.isspace():
+                        if this == "<" and self._read(1) == "!":
+                            self._context |= contexts.FAIL_NEXT
+                            return True
                         return False
                 else:
                     if this == "\n":
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index ef6d5c5..1019175 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -227,3 +227,17 @@ name:   newline_and_comment_in_template_name_5
 label:  a template name containing a newline followed by a comment
 input:  "{{foobar\n<!-- comment -->\ninvalid|key=value}}"
 output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")]
+
+---
+
+name:   newline_and_comment_in_template_name_6
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!--|key=value}}"
+output: [Text(text="{{foobar\n<!--|key=value}}")]
+
+---
+
+name:   newline_and_comment_in_template_name_7
+label:  a template name containing a newline followed by a comment
+input:  "{{foobar\n<!|key=value}}"
+output: [Text(text="{{foobar\n<!|key=value}}")]

From e294ee6298f50783dc0671341f9c9d0ec26ef9ea Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 10 Jul 2014 22:34:51 -0400
Subject: [PATCH 036/102] Improve ListProxy detaching behavior.

---
 CHANGELOG                      |  5 +++-
 docs/changelog.rst             |  5 +++-
 mwparserfromhell/smart_list.py | 34 +++++++++++++++-------
 tests/test_smart_list.py       | 66 ++++++++++++++++++++++++++++--------------
 4 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index f7dcb8a..b4b01d6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -18,10 +18,13 @@ v0.4 (unreleased):
   template now raises ValueError instead of doing nothing.
 - Parameters with non-integer keys can no longer be created with
   'showkey=False', nor have the value of this attribute be set to False later.
+- _ListProxy.destroy() has been changed to _ListProxy.detach(), and now works
+  in a more useful way.
 - If something goes wrong while parsing, ParserError will now be raised.
   Previously, the parser would produce an unclear BadRoute exception or allow
   an incorrect node tree to be build.
-- Fixed a parser bug involving nested tags.
+- Fixed a parser bug involving nested tags, and another involving comments in
+  template names.
 - Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3bc4ce7..d793db1 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -29,10 +29,13 @@ Unreleased
 - :py:class:`.Parameter`\ s with non-integer keys can no longer be created with
   *showkey=False*, nor have the value of this attribute be set to *False*
   later.
+- :py:meth:`._ListProxy.destroy` has been changed to
+  :py:meth:`._ListProxy.detach`, and now works in a more useful way.
 - If something goes wrong while parsing, :py:exc:`.ParserError` will now be
   raised. Previously, the parser would produce an unclear :py:exc:`.BadRoute`
   exception or allow an incorrect node tree to be build.
-- Fixed a parser bug involving nested tags.
+- Fixed a parser bug involving nested tags, and another involving comments in
+  template names.
 - Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index 5fa9055..cedfb5c 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -41,6 +41,7 @@ def inheritdoc(method):
     method.__doc__ = getattr(list, method.__name__).__doc__
     return method
 
+
 class _SliceNormalizerMixIn(object):
     """MixIn that provides a private method to normalize slices."""
 
@@ -83,7 +84,9 @@ class SmartList(_SliceNormalizerMixIn, list):
     The parent needs to keep a list of its children in order to update them,
     which prevents them from being garbage-collected. If you are keeping the
     parent around for a while but creating many children, it is advisable to
-    call :py:meth:`~._ListProxy.destroy` when you're finished with them.
+    call :py:meth:`~._ListProxy.detach` when you're finished with them. Certain
+    parent methods, like :py:meth:`reverse` and :py:meth:`sort`, will do this
+    automatically.
     """
 
     def __init__(self, iterable=None):
@@ -151,10 +154,10 @@ class SmartList(_SliceNormalizerMixIn, list):
         self.extend(other)
         return self
 
-    def _release_children(self):
-        copy = list(self)
-        for child in self._children:
-            child._parent = copy
+    def _detach_children(self):
+        children = [val[0] for val in self._children.values()]
+        for child in children:
+            child.detach()
 
     @inheritdoc
     def append(self, item):
@@ -184,13 +187,13 @@ class SmartList(_SliceNormalizerMixIn, list):
 
     @inheritdoc
     def reverse(self):
-        self._release_children()
+        self._detach_children()
         super(SmartList, self).reverse()
 
     if py3k:
         @inheritdoc
         def sort(self, key=None, reverse=None):
-            self._release_children()
+            self._detach_children()
             kwargs = {}
             if key is not None:
                 kwargs["key"] = key
@@ -200,7 +203,7 @@ class SmartList(_SliceNormalizerMixIn, list):
     else:
         @inheritdoc
         def sort(self, cmp=None, key=None, reverse=None):
-            self._release_children()
+            self._detach_children()
             kwargs = {}
             if cmp is not None:
                 kwargs["cmp"] = cmp
@@ -223,6 +226,7 @@ class _ListProxy(_SliceNormalizerMixIn, list):
         super(_ListProxy, self).__init__()
         self._parent = parent
         self._sliceinfo = sliceinfo
+        self._detached = False
 
     def __repr__(self):
         return repr(self._render())
@@ -452,9 +456,17 @@ class _ListProxy(_SliceNormalizerMixIn, list):
             item.sort(**kwargs)
             self._parent[self._start:self._stop:self._step] = item
 
-    def destroy(self):
-        """Make the parent forget this child. The child will no longer work."""
-        self._parent._children.pop(id(self))
+    def detach(self):
+        """Detach the child so it operates like a normal list.
+
+        This allows children to be properly garbage-collected if their parent
+        is being kept around for a long time. This method has no effect if the
+        child is already detached.
+        """
+        if not self._detached:
+            self._parent._children.pop(id(self))
+            self._parent = list(self._parent)
+            self._detached = True
 
 
 del inheritdoc
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index b739d62..13d96d2 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -88,6 +88,10 @@ class TestSmartList(unittest.TestCase):
         self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2)
         self.assertRaises(ValueError, assign, list2, 0, 5, 2,
                           [100, 102, 104, 106])
+        with self.assertRaises(IndexError):
+            list2[7] = "foo"
+        with self.assertRaises(IndexError):
+            list2[-8] = "foo"
 
         del list2[2]
         self.assertEqual([0, 1, 3, 4, 5, 6], list2)
@@ -271,6 +275,13 @@ class TestSmartList(unittest.TestCase):
         list3.sort(key=lambda i: i[1], reverse=True)
         self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)
 
+    def _dispatch_test_for_children(self, meth):
+        """Run a test method on various different types of children."""
+        meth(lambda L: SmartList(list(L))[:])
+        meth(lambda L: SmartList([999] + list(L))[1:])
+        meth(lambda L: SmartList(list(L) + [999])[:-1])
+        meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2])
+
     def test_docs(self):
         """make sure the methods of SmartList/_ListProxy have docstrings"""
         methods = ["append", "count", "extend", "index", "insert", "pop",
@@ -300,8 +311,8 @@ class TestSmartList(unittest.TestCase):
         """make sure SmartList's add/radd/iadd work"""
         self._test_add_radd_iadd(SmartList)
 
-    def test_parent_unaffected_magics(self):
-        """sanity checks against SmartList features that were not modified"""
+    def test_parent_other_magics(self):
+        """make sure SmartList's other magically implemented features work"""
         self._test_other_magic_methods(SmartList)
 
     def test_parent_methods(self):
@@ -310,41 +321,29 @@ class TestSmartList(unittest.TestCase):
 
     def test_child_get_set_del(self):
         """make sure _ListProxy's getitem/setitem/delitem work"""
-        self._test_get_set_del_item(lambda L: SmartList(list(L))[:])
-        self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:])
-        self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1])
-        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
-        self._test_get_set_del_item(builder)
+        self._dispatch_test_for_children(self._test_get_set_del_item)
 
     def test_child_add(self):
         """make sure _ListProxy's add/radd/iadd work"""
-        self._test_add_radd_iadd(lambda L: SmartList(list(L))[:])
-        self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:])
-        self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1])
-        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
-        self._test_add_radd_iadd(builder)
+        self._dispatch_test_for_children(self._test_add_radd_iadd)
 
     def test_child_other_magics(self):
         """make sure _ListProxy's other magically implemented features work"""
-        self._test_other_magic_methods(lambda L: SmartList(list(L))[:])
-        self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:])
-        self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1])
-        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
-        self._test_other_magic_methods(builder)
+        self._dispatch_test_for_children(self._test_other_magic_methods)
 
     def test_child_methods(self):
         """make sure _ListProxy's non-magic methods work, like append()"""
-        self._test_list_methods(lambda L: SmartList(list(L))[:])
-        self._test_list_methods(lambda L: SmartList([999] + list(L))[1:])
-        self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1])
-        builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
-        self._test_list_methods(builder)
+        self._dispatch_test_for_children(self._test_list_methods)
 
     def test_influence(self):
         """make sure changes are propagated from parents to children"""
         parent = SmartList([0, 1, 2, 3, 4, 5])
         child1 = parent[2:]
         child2 = parent[2:5]
+        self.assertEqual([0, 1, 2, 3, 4, 5], parent)
+        self.assertEqual([2, 3, 4, 5], child1)
+        self.assertEqual([2, 3, 4], child2)
+        self.assertEqual(2, len(parent._children))
 
         parent.append(6)
         child1.append(7)
@@ -390,5 +389,28 @@ class TestSmartList(unittest.TestCase):
         self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
         self.assertEqual([4, 3, 2, 1.9, 1.8], child2)
 
+        child1.detach()
+        self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent)
+        self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
+        self.assertEqual([4, 3, 2, 1.9, 1.8], child2)
+        self.assertEqual(1, len(parent._children))
+
+        parent.remove(1.9)
+        parent.remove(1.8)
+        self.assertEqual([1, 4, 3, 2, 5, 6, 7, 8, 8.1, 8.2], parent)
+        self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
+        self.assertEqual([4, 3, 2], child2)
+
+        parent.reverse()
+        self.assertEqual([8.2, 8.1, 8, 7, 6, 5, 2, 3, 4, 1], parent)
+        self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
+        self.assertEqual([4, 3, 2], child2)
+        self.assertEqual(0, len(parent._children))
+
+        child2.detach()
+        self.assertEqual([8.2, 8.1, 8, 7, 6, 5, 2, 3, 4, 1], parent)
+        self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
+        self.assertEqual([4, 3, 2], child2)
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From bbd4cd97cf7d862ad267f342b73905446acfcc01 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 10 Jul 2014 23:35:36 -0400
Subject: [PATCH 037/102] Add tests for some missed things in Wikicode.

---
 mwparserfromhell/wikicode.py | 17 ++++++++---------
 tests/test_wikicode.py       | 19 ++++++++++++++-----
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index d7736ff..ffa6790 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -120,8 +120,8 @@ class Wikicode(StringMixIn):
         then it could be any :py:class:`.Wikicode` contained by a node within
         ``self``. If *obj* is not found, :py:exc:`ValueError` is raised.
         """
-        mkslice = lambda i: slice(i, i + 1)
         if isinstance(obj, Node):
+            mkslice = lambda i: slice(i, i + 1)
             if not recursive:
                 return self, mkslice(self.index(obj))
             for i, node in enumerate(self.nodes):
@@ -130,14 +130,13 @@ class Wikicode(StringMixIn):
                         if not context:
                             context = self
                         return context, mkslice(context.index(child))
-        else:
-            context, ind = self._do_strong_search(obj.get(0), recursive)
-            for i in range(1, len(obj.nodes)):
-                if obj.get(i) is not context.get(ind.start + i):
-                    break
-            else:
-                return context, slice(ind.start, ind.start + len(obj.nodes))
-        raise ValueError(obj)
+            raise ValueError(obj)
+
+        context, ind = self._do_strong_search(obj.get(0), recursive)
+        for i in range(1, len(obj.nodes)):
+            if obj.get(i) is not context.get(ind.start + i):
+                raise ValueError(obj)
+        return context, slice(ind.start, ind.start + len(obj.nodes))
 
     def _do_weak_search(self, obj, recursive):
         """Search for an element that looks like *obj* within the node list.
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index a7c3eb3..7a30a75 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -188,6 +188,13 @@ class TestWikicode(TreeEqualityTestCase):
         func("is {{some", "cd", recursive=True)
         self.assertEqual(expected[5], code6)
 
+        code7 = parse("{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}")
+        func = partial(meth, code7)
+        obj = wrap([code7.get(0), code7.get(2)])
+        self.assertRaises(ValueError, func, obj, "{{lol}}")
+        func("{{foo}}{{baz}}", "{{lol}}")
+        self.assertEqual(expected[6], code7)
+
     def test_insert_before(self):
         """test Wikicode.insert_before()"""
         meth = lambda code, *args, **kw: code.insert_before(*args, **kw)
@@ -197,7 +204,8 @@ class TestWikicode(TreeEqualityTestCase):
             "{{a|x{{b}}|{{c|d=y{{f}}}}}}",
             "{{a}}w{{b}}{{c}}x{{d}}{{e}}{{f}}{{g}}{{h}}yz{{i}}{{j}}",
             "{{a|x{{b}}{{c}}|{{f|{{g}}=y{{h}}{{i}}}}}}",
-            "here cdis {{some abtext and a {{template}}}}"]
+            "here cdis {{some abtext and a {{template}}}}",
+            "{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}"]
         self._test_search(meth, expected)
 
     def test_insert_after(self):
@@ -209,7 +217,8 @@ class TestWikicode(TreeEqualityTestCase):
             "{{a|{{b}}x|{{c|d={{f}}y}}}}",
             "{{a}}{{b}}{{c}}w{{d}}{{e}}x{{f}}{{g}}{{h}}{{i}}{{j}}yz",
             "{{a|{{b}}{{c}}x|{{f|{{g}}={{h}}{{i}}y}}}}",
-            "here is {{somecd text andab a {{template}}}}"]
+            "here is {{somecd text andab a {{template}}}}",
+            "{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}"]
         self._test_search(meth, expected)
 
     def test_replace(self):
@@ -218,7 +227,7 @@ class TestWikicode(TreeEqualityTestCase):
         expected = [
             "{{a}}xz[[y]]{{e}}", "dcdffe", "{{a|x|{{c|d=y}}}}",
             "{{a}}wx{{f}}{{g}}z", "{{a|x|{{f|{{g}}=y}}}}",
-            "here cd ab a {{template}}}}"]
+            "here cd ab a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{lol}}"]
         self._test_search(meth, expected)
 
     def test_append(self):
@@ -238,8 +247,8 @@ class TestWikicode(TreeEqualityTestCase):
         meth = lambda code, obj, value, **kw: code.remove(obj, **kw)
         expected = [
             "{{a}}{{c}}", "", "{{a||{{c|d=}}}}", "{{a}}{{f}}",
-            "{{a||{{f|{{g}}=}}}}", "here   a {{template}}}}"
-        ]
+            "{{a||{{f|{{g}}=}}}}", "here   a {{template}}}}",
+            "{{foo}}{{bar}}{{baz}}"]
         self._test_search(meth, expected)
 
     def test_matches(self):

From 2b1f7e5511341eaa5d87c444b21b4b3f79410fb1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 11 Jul 2014 00:08:21 -0400
Subject: [PATCH 038/102] Clean up/revise docs.

---
 docs/api/mwparserfromhell.nodes.rst |   1 -
 docs/api/mwparserfromhell.rst       |  12 ++--
 docs/changelog.rst                  | 125 ++++++++++++++++++------------------
 docs/index.rst                      |   6 +-
 docs/integration.rst                |  10 +--
 docs/usage.rst                      |  32 ++++-----
 6 files changed, 91 insertions(+), 95 deletions(-)

diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst
index 7043070..2cbaa1c 100644
--- a/docs/api/mwparserfromhell.nodes.rst
+++ b/docs/api/mwparserfromhell.nodes.rst
@@ -87,4 +87,3 @@ Subpackages
 .. toctree::
 
     mwparserfromhell.nodes.extras
-
diff --git a/docs/api/mwparserfromhell.rst b/docs/api/mwparserfromhell.rst
index 0da522e..63af111 100644
--- a/docs/api/mwparserfromhell.rst
+++ b/docs/api/mwparserfromhell.rst
@@ -15,6 +15,12 @@ mwparserfromhell Package
     :members:
     :undoc-members:
 
+:mod:`definitions` Module
+-------------------------
+
+.. automodule:: mwparserfromhell.definitions
+    :members:
+
 :mod:`smart_list` Module
 ------------------------
 
@@ -30,12 +36,6 @@ mwparserfromhell Package
     :members:
     :undoc-members:
 
-:mod:`definitions` Module
--------------------------
-
-.. automodule:: mwparserfromhell.definitions
-    :members:
-
 :mod:`utils` Module
 -------------------
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index d793db1..9fdfef2 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -11,28 +11,28 @@ Unreleased
   prevented Windows users from using the C tokenizer.
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.
-- *skip_style_tags* can now be passed to :py:func:`mwparserfromhell.parse()
-  <.parse_anything>` (previously, only :py:meth:`.Parser.parse` allowed it).
-- The *recursive* argument to :py:class:`Wikicode's <.Wikicode>`
-  :py:meth:`.filter` methods now accepts a third option, ``RECURSE_OTHERS``,
-  which recurses over all children except instances of *forcetype* (for
-  example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
+- *skip_style_tags* can now be passed to :func:`mwparserfromhell.parse()
+  <.parse_anything>` (previously, only :meth:`.Parser.parse` allowed it).
+- The *recursive* argument to :class:`Wikicode's <.Wikicode>` :meth:`.filter`
+  methods now accepts a third option, ``RECURSE_OTHERS``, which recurses over
+  all children except instances of *forcetype* (for example,
+  ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
   templates).
 - The parser now understands HTML tag attributes quoted with single quotes.
   When setting a tag attribute's value, quotes will be added if necessary. As
-  part of this, :py:class:`.Attribute`\ 's :py:attr:`~.Attribute.quoted`
-  attribute has been changed to :py:attr:`~.Attribute.quotes`, and is now
-  either a string or ``None``.
-- Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that
-  is not part of the template now raises :py:exc:`ValueError` instead of doing
+  part of this, :class:`.Attribute`\ 's :attr:`~.Attribute.quoted` attribute
+  has been changed to :attr:`~.Attribute.quotes`, and is now either a string or
+  ``None``.
+- Calling :meth:`.Template.remove` with a :class:`.Parameter` object that is
+  not part of the template now raises :exc:`ValueError` instead of doing
   nothing.
-- :py:class:`.Parameter`\ s with non-integer keys can no longer be created with
+- :class:`.Parameter`\ s with non-integer keys can no longer be created with
   *showkey=False*, nor have the value of this attribute be set to *False*
   later.
-- :py:meth:`._ListProxy.destroy` has been changed to
-  :py:meth:`._ListProxy.detach`, and now works in a more useful way.
-- If something goes wrong while parsing, :py:exc:`.ParserError` will now be
-  raised. Previously, the parser would produce an unclear :py:exc:`.BadRoute`
+- :meth:`._ListProxy.destroy` has been changed to :meth:`._ListProxy.detach`,
+  and now works in a more useful way.
+- If something goes wrong while parsing, :exc:`.ParserError` will now be
+  raised. Previously, the parser would produce an unclear :exc:`.BadRoute`
   exception or allow an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags, and another involving comments in
   template names.
@@ -46,22 +46,21 @@ v0.3.3
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.2...v0.3.3>`__):
 
 - Added support for Python 2.6 and 3.4.
-- :py:meth:`.Template.has` is now passed *ignore_empty=False* by default
+- :meth:`.Template.has` is now passed *ignore_empty=False* by default
   instead of *True*. This fixes a bug when adding parameters to templates with
   empty fields, **and is a breaking change if you rely on the default
   behavior.**
-- The *matches* argument of :py:class:`Wikicode's <.Wikicode>`
-  :py:meth:`.filter` methods now accepts a function (taking one argument, a
-  :py:class:`.Node`, and returning a bool) in addition to a regex.
-- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the
-  order in which it returns sections, and made it faster.
-- :py:meth:`.Wikicode.matches` now accepts a tuple or list of
-  strings/:py:class:`.Wikicode` objects instead of just a single string or
-  :py:class:`.Wikicode`.
+- The *matches* argument of :class:`Wikicode's <.Wikicode>` :meth:`.filter`
+  methods now accepts a function (taking one argument, a :class:`.Node`, and
+  returning a bool) in addition to a regex.
+- Re-added *flat* argument to :meth:`.Wikicode.get_sections`, fixed the order
+  in which it returns sections, and made it faster.
+- :meth:`.Wikicode.matches` now accepts a tuple or list of
+  strings/:class:`.Wikicode` objects instead of just a single string or
+  :class:`.Wikicode`.
 - Given the frequency of issues with the (admittedly insufficient) tag parser,
-  there's a temporary *skip_style_tags* argument to
-  :py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues
-  are corrected.
+  there's a temporary *skip_style_tags* argument to :meth:`~.Parser.parse` that
+  ignores ``''`` and ``'''`` until these issues are corrected.
 - Fixed a parser bug involving nested wikilinks and external links.
 - C code cleanup and speed improvements.
 
@@ -72,9 +71,9 @@ v0.3.2
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.1...v0.3.2>`__):
 
 - Added support for Python 3.2 (along with current support for 3.3 and 2.7).
-- Renamed :py:meth:`.Template.remove`\ 's first argument from *name* to
-  *param*, which now accepts :py:class:`.Parameter` objects in addition to
-  parameter name strings.
+- Renamed :meth:`.Template.remove`\ 's first argument from *name* to *param*,
+  which now accepts :class:`.Parameter` objects in addition to parameter name
+  strings.
 
 v0.3.1
 ------
@@ -91,24 +90,24 @@ v0.3
 `Released August 24, 2013 <https://github.com/earwig/mwparserfromhell/tree/v0.3>`_
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.2...v0.3>`__):
 
-- Added complete support for HTML :py:class:`Tags <.Tag>`, including forms like
+- Added complete support for HTML :class:`Tags <.Tag>`, including forms like
   ``<ref>foo</ref>``, ``<ref name="bar"/>``, and wiki-markup tags like bold
   (``'''``), italics (``''``), and lists (``*``, ``#``, ``;`` and ``:``).
-- Added support for :py:class:`.ExternalLink`\ s (``http://example.com/`` and
+- Added support for :class:`.ExternalLink`\ s (``http://example.com/`` and
   ``[http://example.com/ Example]``).
-- :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods are now passed
+- :class:`Wikicode's <.Wikicode>` :meth:`.filter` methods are now passed
   *recursive=True* by default instead of *False*. **This is a breaking change
   if you rely on any filter() methods being non-recursive by default.**
-- Added a :py:meth:`.matches` method to :py:class:`~.Wikicode` for
-  page/template name comparisons.
-- The *obj* param of :py:meth:`Wikicode.insert_before() <.insert_before>`,
-  :py:meth:`~.insert_after`, :py:meth:`~.Wikicode.replace`, and
-  :py:meth:`~.Wikicode.remove` now accepts :py:class:`~.Wikicode` objects and
-  strings representing parts of wikitext, instead of just nodes. These methods
-  also make all possible substitutions instead of just one.
-- Renamed :py:meth:`Template.has_param() <.has_param>` to
-  :py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's
-  other methods; :py:meth:`~.has_param` is now an alias.
+- Added a :meth:`.matches` method to :class:`.Wikicode` for page/template name
+  comparisons.
+- The *obj* param of :meth:`.Wikicode.insert_before`, :meth:`.insert_after`,
+  :meth:`~.Wikicode.replace`, and :meth:`~.Wikicode.remove` now accepts
+  :class:`.Wikicode` objects and strings representing parts of wikitext,
+  instead of just nodes. These methods also make all possible substitutions
+  instead of just one.
+- Renamed :meth:`.Template.has_param` to :meth:`~.Template.has` for consistency
+  with :class:`.Template`\ 's other methods; :meth:`.has_param` is now an
+  alias.
 - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
 - Various bugfixes, internal changes, and cleanup.
 
@@ -121,29 +120,27 @@ v0.2
 - The parser now fully supports Python 3 in addition to Python 2.7.
 - Added a C tokenizer extension that is significantly faster than its Python
   equivalent. It is enabled by default (if available) and can be toggled by
-  setting :py:attr:`mwparserfromhell.parser.use_c` to a boolean value.
+  setting :attr:`mwparserfromhell.parser.use_c` to a boolean value.
 - Added a complete set of unit tests covering parsing and wikicode
   manipulation.
-- Renamed :py:meth:`.filter_links` to :py:meth:`.filter_wikilinks` (applies to
-  :py:meth:`.ifilter` as well).
-- Added filter methods for :py:class:`Arguments <.Argument>`,
-  :py:class:`Comments <.Comment>`, :py:class:`Headings <.Heading>`, and
-  :py:class:`HTMLEntities <.HTMLEntity>`.
-- Added *before* param to :py:meth:`Template.add() <.Template.add>`; renamed
-  *force_nonconformity* to *preserve_spacing*.
-- Added *include_lead* param to :py:meth:`Wikicode.get_sections()
-  <.get_sections>`.
-- Removed *flat* param from :py:meth:`.get_sections`.
-- Removed *force_no_field* param from :py:meth:`Template.remove()
-  <.Template.remove>`.
+- Renamed :meth:`.filter_links` to :meth:`.filter_wikilinks` (applies to
+  :meth:`.ifilter` as well).
+- Added filter methods for :class:`Arguments <.Argument>`,
+  :class:`Comments <.Comment>`, :class:`Headings <.Heading>`, and
+  :class:`HTMLEntities <.HTMLEntity>`.
+- Added *before* param to :meth:`.Template.add`; renamed *force_nonconformity*
+  to *preserve_spacing*.
+- Added *include_lead* param to :meth:`.Wikicode.get_sections`.
+- Removed *flat* param from :meth:`.get_sections`.
+- Removed *force_no_field* param from :meth:`.Template.remove`.
 - Added support for Travis CI.
 - Added note about Windows build issue in the README.
 - The tokenizer will limit itself to a realistic recursion depth to prevent
   errors and unreasonably long parse times.
 - Fixed how some nodes' attribute setters handle input.
 - Fixed multiple bugs in the tokenizer's handling of invalid markup.
-- Fixed bugs in the implementation of :py:class:`.SmartList` and
-  :py:class:`.StringMixIn`.
+- Fixed bugs in the implementation of :class:`.SmartList` and
+  :class:`.StringMixIn`.
 - Fixed some broken example code in the README; other copyedits.
 - Other bugfixes and code cleanup.
 
@@ -153,12 +150,12 @@ v0.1.1
 `Released September 21, 2012 <https://github.com/earwig/mwparserfromhell/tree/v0.1.1>`_
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.1...v0.1.1>`__):
 
-- Added support for :py:class:`Comments <.Comment>` (``<!-- foo -->``) and
-  :py:class:`Wikilinks <.Wikilink>` (``[[foo]]``).
-- Added corresponding :py:meth:`.ifilter_links` and :py:meth:`.filter_links`
-  methods to :py:class:`.Wikicode`.
+- Added support for :class:`Comments <.Comment>` (``<!-- foo -->``) and
+  :class:`Wikilinks <.Wikilink>` (``[[foo]]``).
+- Added corresponding :meth:`.ifilter_links` and :meth:`.filter_links` methods
+  to :class:`.Wikicode`.
 - Fixed a bug when parsing incomplete templates.
-- Fixed :py:meth:`.strip_code` to affect the contents of headings.
+- Fixed :meth:`.strip_code` to affect the contents of headings.
 - Various copyedits in documentation and comments.
 
 v0.1
diff --git a/docs/index.rst b/docs/index.rst
index a6d2df3..988f5e7 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,9 +1,9 @@
 MWParserFromHell v\ |version| Documentation
 ===========================================
 
-:py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python
-package that provides an easy-to-use and outrageously powerful parser for
-MediaWiki_ wikicode. It supports Python 2 and Python 3.
+:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package
+that provides an easy-to-use and outrageously powerful parser for MediaWiki_
+wikicode. It supports Python 2 and Python 3.
 
 Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others.
 Development occurs on GitHub_.
diff --git a/docs/integration.rst b/docs/integration.rst
index a09334d..102b3b9 100644
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -1,11 +1,11 @@
 Integration
 ===========
 
-:py:mod:`mwparserfromhell` is used by and originally developed for EarwigBot_;
-:py:class:`~earwigbot.wiki.page.Page` objects have a
-:py:meth:`~earwigbot.wiki.page.Page.parse` method that essentially calls
-:py:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on
-:py:meth:`~earwigbot.wiki.page.Page.get`.
+:mod:`mwparserfromhell` is used by and originally developed for EarwigBot_;
+:class:`~earwigbot.wiki.page.Page` objects have a
+:meth:`~earwigbot.wiki.page.Page.parse` method that essentially calls
+:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on
+:meth:`~earwigbot.wiki.page.Page.get`.
 
 If you're using Pywikipedia_, your code might look like this::
 
diff --git a/docs/usage.rst b/docs/usage.rst
index 974c670..c471397 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -6,9 +6,9 @@ Normal usage is rather straightforward (where ``text`` is page text)::
     >>> import mwparserfromhell
     >>> wikicode = mwparserfromhell.parse(text)
 
-``wikicode`` is a :py:class:`mwparserfromhell.Wikicode <.Wikicode>` object,
-which acts like an ordinary ``unicode`` object (or ``str`` in Python 3) with
-some extra methods. For example::
+``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which
+acts like an ordinary ``unicode`` object (or ``str`` in Python 3) with some
+extra methods. For example::
 
     >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
     >>> wikicode = mwparserfromhell.parse(text)
@@ -33,9 +33,9 @@ Since nodes can contain other nodes, getting nested templates is trivial::
     >>> mwparserfromhell.parse(text).filter_templates()
     ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']
 
-You can also pass *recursive=False* to :py:meth:`~.filter_templates` and
-explore templates manually. This is possible because nodes can contain
-additional :py:class:`~.Wikicode` objects::
+You can also pass *recursive=False* to :meth:`.filter_templates` and explore
+templates manually. This is possible because nodes can contain additional
+:class:`.Wikicode` objects::
 
     >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
     >>> print code.filter_templates(recursive=False)
@@ -49,11 +49,11 @@ additional :py:class:`~.Wikicode` objects::
     template
 
 Templates can be easily modified to add, remove, or alter params.
-:py:class:`~.Wikicode` objects can be treated like lists, with
-:py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`,
-:py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more. They
-also have a :py:meth:`~.Wikicode.matches` method for comparing page or template
-names, which takes care of capitalization and whitespace::
+:class:`.Wikicode` objects can be treated like lists, with
+:meth:`~.Wikicode.append`, :meth:`~.Wikicode.insert`,
+:meth:`~.Wikicode.remove`, :meth:`~.Wikicode.replace`, and more. They also have
+a :meth:`~.Wikicode.matches` method for comparing page or template names, which
+takes care of capitalization and whitespace::
 
     >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
     >>> code = mwparserfromhell.parse(text)
@@ -69,8 +69,8 @@ names, which takes care of capitalization and whitespace::
     >>> print code.filter_templates()
     ['{{cleanup|date=July 2012}}', '{{bar-stub}}']
 
-You can then convert ``code`` back into a regular :py:class:`unicode` object
-(for saving the page!) by calling :py:func:`unicode` on it::
+You can then convert ``code`` back into a regular :class:`unicode` object (for
+saving the page!) by calling :func:`unicode` on it::
 
     >>> text = unicode(code)
     >>> print text
@@ -78,7 +78,7 @@ You can then convert ``code`` back into a regular :py:class:`unicode` object
     >>> text == code
     True
 
-(Likewise, use :py:func:`str(code) <str>` in Python 3.)
+(Likewise, use :func:`str(code) <str>` in Python 3.)
 
-For more tips, check out :py:class:`Wikicode's full method list <.Wikicode>`
-and the :py:mod:`list of Nodes <.nodes>`.
+For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and
+the :mod:`list of Nodes <.nodes>`.

From 87e0079512f3d85813541dc97a240713fc0b33c9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 11 Jul 2014 00:30:47 -0400
Subject: [PATCH 039/102] Take proper advantage of Sphinx's default domains.

---
 mwparserfromhell/compat.py                 |   8 +-
 mwparserfromhell/nodes/__init__.py         |  26 ++--
 mwparserfromhell/nodes/external_link.py    |   4 +-
 mwparserfromhell/nodes/extras/__init__.py  |   5 +-
 mwparserfromhell/nodes/extras/attribute.py |   6 +-
 mwparserfromhell/nodes/extras/parameter.py |   4 +-
 mwparserfromhell/nodes/heading.py          |   2 +-
 mwparserfromhell/nodes/tag.py              |  26 ++--
 mwparserfromhell/nodes/template.py         |  32 ++---
 mwparserfromhell/nodes/wikilink.py         |   4 +-
 mwparserfromhell/parser/__init__.py        |  36 +++---
 mwparserfromhell/parser/builder.py         |  10 +-
 mwparserfromhell/parser/contexts.py        |  88 +++++++-------
 mwparserfromhell/parser/tokenizer.py       |  12 +-
 mwparserfromhell/parser/tokens.py          |   4 +-
 mwparserfromhell/smart_list.py             |  22 ++--
 mwparserfromhell/string_mixin.py           |  11 +-
 mwparserfromhell/utils.py                  |  20 ++--
 mwparserfromhell/wikicode.py               | 184 ++++++++++++++---------------
 19 files changed, 248 insertions(+), 256 deletions(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 4384ace..590a271 100644
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -2,10 +2,10 @@
 
 """
 Implements support for both Python 2 and Python 3 by defining common types in
-terms of their Python 2/3 variants. For example, :py:class:`str` is set to
-:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
-:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
-types are meant to be imported directly from within the parser's modules.
+terms of their Python 2/3 variants. For example, :class:`str` is set to
+:class:`unicode` on Python 2 but :class:`str` on Python 3; likewise,
+:class:`bytes` is :class:`str` on 2 but :class:`bytes` on 3. These types are
+meant to be imported directly from within the parser's modules.
 """
 
 import sys
diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py
index d6f60bd..8e71c8b 100644
--- a/mwparserfromhell/nodes/__init__.py
+++ b/mwparserfromhell/nodes/__init__.py
@@ -21,12 +21,12 @@
 # SOFTWARE.
 
 """
-This package contains :py:class:`~.Wikicode` "nodes", which represent a single
-unit of wikitext, such as a Template, an HTML tag, a Heading, or plain text.
-The node "tree" is far from flat, as most types can contain additional
-:py:class:`~.Wikicode` types within them - and with that, more nodes. For
-example, the name of a :py:class:`~.Template` is a :py:class:`~.Wikicode`
-object that can contain text or more templates.
+This package contains :class:`.Wikicode` "nodes", which represent a single unit
+of wikitext, such as a Template, an HTML tag, a Heading, or plain text. The
+node "tree" is far from flat, as most types can contain additional
+:class:`.Wikicode` types within them - and with that, more nodes. For example,
+the name of a :class:`.Template` is a :class:`.Wikicode` object that can
+contain text or more templates.
 """
 
 from __future__ import unicode_literals
@@ -40,16 +40,16 @@ __all__ = ["Node", "Text", "Argument", "Heading", "HTMLEntity", "Tag",
 class Node(StringMixIn):
     """Represents the base Node type, demonstrating the methods to override.
 
-    :py:meth:`__unicode__` must be overridden. It should return a ``unicode``
-    or (``str`` in py3k) representation of the node. If the node contains
-    :py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__`
-    should be a generator that iterates over them. If the node is printable
-    (shown when the page is rendered), :py:meth:`__strip__` should return its
+    :meth:`__unicode__` must be overridden. It should return a ``unicode`` or
+    (``str`` in py3k) representation of the node. If the node contains
+    :class:`.Wikicode` objects inside of it, :meth:`__children__` should be a
+    generator that iterates over them. If the node is printable
+    (shown when the page is rendered), :meth:`__strip__` should return its
     printable version, stripping out any formatting marks. It does not have to
     return a string, but something that can be converted to a string with
-    ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a
+    ``str()``. Finally, :meth:`__showtree__` can be overridden to build a
     nice tree representation of the node, if desired, for
-    :py:meth:`~.Wikicode.get_tree`.
+    :meth:`~.Wikicode.get_tree`.
     """
     def __unicode__(self):
         raise NotImplementedError()
diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py
index d13376e..f98a1e5 100644
--- a/mwparserfromhell/nodes/external_link.py
+++ b/mwparserfromhell/nodes/external_link.py
@@ -67,12 +67,12 @@ class ExternalLink(Node):
 
     @property
     def url(self):
-        """The URL of the link target, as a :py:class:`~.Wikicode` object."""
+        """The URL of the link target, as a :class:`.Wikicode` object."""
         return self._url
 
     @property
     def title(self):
-        """The link title (if given), as a :py:class:`~.Wikicode` object."""
+        """The link title (if given), as a :class:`.Wikicode` object."""
         return self._title
 
     @property
diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py
index a131269..7c0262b 100644
--- a/mwparserfromhell/nodes/extras/__init__.py
+++ b/mwparserfromhell/nodes/extras/__init__.py
@@ -21,9 +21,8 @@
 # SOFTWARE.
 
 """
-This package contains objects used by
-:py:class:`~.Node`\ s, but are not nodes themselves. This includes the
-parameters of Templates or the attributes of HTML tags.
+This package contains objects used by :class:`.Node`\ s, but that are not nodes
+themselves. This includes template parameters and HTML tag attributes.
 """
 
 from .attribute import Attribute
diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py
index 6256138..cb50194 100644
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -31,7 +31,7 @@ __all__ = ["Attribute"]
 class Attribute(StringMixIn):
     """Represents an attribute of an HTML tag.
 
-    This is used by :py:class:`~.Tag` objects. For example, the tag
+    This is used by :class:`.Tag` objects. For example, the tag
     ``<ref name="foo">`` contains an Attribute whose name is ``"name"`` and
     whose value is ``"foo"``.
     """
@@ -84,12 +84,12 @@ class Attribute(StringMixIn):
 
     @property
     def name(self):
-        """The name of the attribute as a :py:class:`~.Wikicode` object."""
+        """The name of the attribute as a :class:`.Wikicode` object."""
         return self._name
 
     @property
     def value(self):
-        """The value of the attribute as a :py:class:`~.Wikicode` object."""
+        """The value of the attribute as a :class:`.Wikicode` object."""
         return self._value
 
     @property
diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py
index 5a67ae0..50c9ac0 100644
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -58,12 +58,12 @@ class Parameter(StringMixIn):
 
     @property
     def name(self):
-        """The name of the parameter as a :py:class:`~.Wikicode` object."""
+        """The name of the parameter as a :class:`.Wikicode` object."""
         return self._name
 
     @property
     def value(self):
-        """The value of the parameter as a :py:class:`~.Wikicode` object."""
+        """The value of the parameter as a :class:`.Wikicode` object."""
         return self._value
 
     @property
diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py
index 47c23a8..696b5ee 100644
--- a/mwparserfromhell/nodes/heading.py
+++ b/mwparserfromhell/nodes/heading.py
@@ -52,7 +52,7 @@ class Heading(Node):
 
     @property
     def title(self):
-        """The title of the heading, as a :py:class:`~.Wikicode` object."""
+        """The title of the heading, as a :class:`.Wikicode` object."""
         return self._title
 
     @property
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index 1b8efb8..7cbe78d 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -108,19 +108,19 @@ class Tag(Node):
 
     @property
     def tag(self):
-        """The tag itself, as a :py:class:`~.Wikicode` object."""
+        """The tag itself, as a :class:`.Wikicode` object."""
         return self._tag
 
     @property
     def contents(self):
-        """The contents of the tag, as a :py:class:`~.Wikicode` object."""
+        """The contents of the tag, as a :class:`.Wikicode` object."""
         return self._contents
 
     @property
     def attributes(self):
         """The list of attributes affecting the tag.
 
-        Each attribute is an instance of :py:class:`~.Attribute`.
+        Each attribute is an instance of :class:`.Attribute`.
         """
         return self._attrs
 
@@ -146,7 +146,7 @@ class Tag(Node):
         This makes the tag look like a lone close tag. It is technically
         invalid and is only parsable Wikicode when the tag itself is
         single-only, like ``<br>`` and ``<img>``. See
-        :py:func:`.definitions.is_single_only`.
+        :func:`.definitions.is_single_only`.
         """
         return self._invalid
 
@@ -155,8 +155,8 @@ class Tag(Node):
         """Whether the tag is implicitly self-closing, with no ending slash.
 
         This is only possible for specific "single" tags like ``<br>`` and
-        ``<li>``. See :py:func:`.definitions.is_single`. This field only has an
-        effect if :py:attr:`self_closing` is also ``True``.
+        ``<li>``. See :func:`.definitions.is_single`. This field only has an
+        effect if :attr:`self_closing` is also ``True``.
         """
         return self._implicit
 
@@ -167,9 +167,9 @@ class Tag(Node):
 
     @property
     def closing_tag(self):
-        """The closing tag, as a :py:class:`~.Wikicode` object.
+        """The closing tag, as a :class:`.Wikicode` object.
 
-        This will usually equal :py:attr:`tag`, unless there is additional
+        This will usually equal :attr:`tag`, unless there is additional
         spacing, comments, or the like.
         """
         return self._closing_tag
@@ -226,8 +226,8 @@ class Tag(Node):
     def get(self, name):
         """Get the attribute with the given *name*.
 
-        The returned object is a :py:class:`~.Attribute` instance. Raises
-        :py:exc:`ValueError` if no attribute has this name. Since multiple
+        The returned object is a :class:`.Attribute` instance. Raises
+        :exc:`ValueError` if no attribute has this name. Since multiple
         attributes can have the same name, we'll return the last match, since
         all but the last are ignored by the MediaWiki parser.
         """
@@ -241,9 +241,9 @@ class Tag(Node):
         """Add an attribute with the given *name* and *value*.
 
         *name* and *value* can be anything parsable by
-        :py:func:`.utils.parse_anything`; *value* can be omitted if the
-        attribute is valueless. If *quotes* is not ``None``, it should be a
-        string (either ``"`` or ``'``) that *value* will be wrapped in (this is
+        :func:`.utils.parse_anything`; *value* can be omitted if the attribute
+        is valueless. If *quotes* is not ``None``, it should be a string
+        (either ``"`` or ``'``) that *value* will be wrapped in (this is
         recommended). ``None`` is only legal if *value* contains no spacing.
 
         *pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index c0fda5d..a9b14aa 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -110,8 +110,8 @@ class Template(Node):
         """Try to determine the whitespace conventions for parameters.
 
         This will examine the existing parameters and use
-        :py:meth:`_select_theory` to determine if there are any preferred
-        styles for how much whitespace to put before or after the value.
+        :meth:`_select_theory` to determine if there are any preferred styles
+        for how much whitespace to put before or after the value.
         """
         before_theories = defaultdict(lambda: 0)
         after_theories = defaultdict(lambda: 0)
@@ -159,7 +159,7 @@ class Template(Node):
 
     @property
     def name(self):
-        """The name of the template, as a :py:class:`~.Wikicode` object."""
+        """The name of the template, as a :class:`.Wikicode` object."""
         return self._name
 
     @property
@@ -189,13 +189,13 @@ class Template(Node):
 
     has_param = lambda self, name, ignore_empty=False: \
                 self.has(name, ignore_empty)
-    has_param.__doc__ = "Alias for :py:meth:`has`."
+    has_param.__doc__ = "Alias for :meth:`has`."
 
     def get(self, name):
         """Get the parameter whose name is *name*.
 
-        The returned object is a :py:class:`~.Parameter` instance. Raises
-        :py:exc:`ValueError` if no parameter has this name. Since multiple
+        The returned object is a :class:`.Parameter` instance. Raises
+        :exc:`ValueError` if no parameter has this name. Since multiple
         parameters can have the same name, we'll return the last match, since
         the last parameter is the only one read by the MediaWiki parser.
         """
@@ -210,8 +210,8 @@ class Template(Node):
         """Add a parameter to the template with a given *name* and *value*.
 
         *name* and *value* can be anything parsable by
-        :py:func:`.utils.parse_anything`; pipes and equal signs are
-        automatically escaped from *value* when appropriate.
+        :func:`.utils.parse_anything`; pipes and equal signs are automatically
+        escaped from *value* when appropriate.
 
         If *showkey* is given, this will determine whether or not to show the
         parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of
@@ -221,13 +221,13 @@ class Template(Node):
         If *name* is already a parameter in the template, we'll replace its
         value while keeping the same whitespace around it. We will also try to
         guess the dominant spacing convention when adding a new parameter using
-        :py:meth:`_get_spacing_conventions`.
+        :meth:`_get_spacing_conventions`.
 
-        If *before* is given (either a :py:class:`~.Parameter` object or a
-        name), then we will place the parameter immediately before this one.
+        If *before* is given (either a :class:`.Parameter` object or a name),
+        then we will place the parameter immediately before this one.
         Otherwise, it will be added at the end. If *before* is a name and
         exists multiple times in the template, we will place it before the last
-        occurrence. If *before* is not in the template, :py:exc:`ValueError` is
+        occurrence. If *before* is not in the template, :exc:`ValueError` is
         raised. The argument is ignored if the new parameter already exists.
 
         If *preserve_spacing* is ``False``, we will avoid preserving spacing
@@ -289,9 +289,9 @@ class Template(Node):
     def remove(self, param, keep_field=False):
         """Remove a parameter from the template, identified by *param*.
 
-        If *param* is a :py:class:`.Parameter` object, it will be matched
-        exactly, otherwise it will be treated like the *name* argument to
-        :py:meth:`has` and :py:meth:`get`.
+        If *param* is a :class:`.Parameter` object, it will be matched exactly,
+        otherwise it will be treated like the *name* argument to :meth:`has`
+        and :meth:`get`.
 
         If *keep_field* is ``True``, we will keep the parameter's name, but
         blank its value. Otherwise, we will remove the parameter completely
@@ -300,7 +300,7 @@ class Template(Node):
         we expected, so ``{{foo||baz}}`` will be produced instead).
 
         If the parameter shows up multiple times in the template and *param* is
-        not a :py:class:`.Parameter` object, we will remove all instances of it
+        not a :class:`.Parameter` object, we will remove all instances of it
         (and keep only one if *keep_field* is ``True`` - the first instance if
         none have dependents, otherwise the one with dependents will be kept).
         """
diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py
index 4640f34..f9c221c 100644
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -62,12 +62,12 @@ class Wikilink(Node):
 
     @property
     def title(self):
-        """The title of the linked page, as a :py:class:`~.Wikicode` object."""
+        """The title of the linked page, as a :class:`.Wikicode` object."""
         return self._title
 
     @property
     def text(self):
-        """The text to display (if any), as a :py:class:`~.Wikicode` object."""
+        """The text to display (if any), as a :class:`.Wikicode` object."""
         return self._text
 
     @title.setter
diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 467d5df..36cb511 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -22,8 +22,8 @@
 
 """
 This package contains the actual wikicode parser, split up into two main
-modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module
-joins them together under one interface.
+modules: the :mod:`.tokenizer` and the :mod:`.builder`. This module joins them
+together into one interface.
 """
 
 class ParserError(Exception):
@@ -54,16 +54,16 @@ class Parser(object):
     """Represents a parser for wikicode.
 
     Actual parsing is a two-step process: first, the text is split up into a
-    series of tokens by the :py:class:`.Tokenizer`, and then the tokens are
-    converted into trees of :py:class:`.Wikicode` objects and
-    :py:class:`.Node`\ s by the :py:class:`.Builder`.
+    series of tokens by the :class:`.Tokenizer`, and then the tokens are
+    converted into trees of :class:`.Wikicode` objects and :class:`.Node`\ s by
+    the :class:`.Builder`.
 
-    Instances of this class or its dependents (:py:class:`.Tokenizer` and
-    :py:class:`.Builder`) should not be shared between threads.
-    :py:meth:`parse` can be called multiple times as long as it is not done
-    concurrently. In general, there is no need to do this because parsing
-    should be done through :py:func:`mwparserfromhell.parse`, which creates a
-    new :py:class:`.Parser` object as necessary.
+    Instances of this class or its dependents (:class:`.Tokenizer` and
+    :class:`.Builder`) should not be shared between threads. :meth:`parse` can
+    be called multiple times as long as it is not done concurrently. In
+    general, there is no need to do this because parsing should be done through
+    :func:`mwparserfromhell.parse`, which creates a new :class:`.Parser` object
+    as necessary.
     """
 
     def __init__(self):
@@ -74,20 +74,20 @@ class Parser(object):
         self._builder = Builder()
 
     def parse(self, text, context=0, skip_style_tags=False):
-        """Parse *text*, returning a :py:class:`~.Wikicode` object tree.
+        """Parse *text*, returning a :class:`.Wikicode` object tree.
 
         If given, *context* will be passed as a starting context to the parser.
         This is helpful when this function is used inside node attribute
-        setters. For example, :py:class:`~.ExternalLink`\ 's
-        :py:attr:`~.ExternalLink.url` setter sets *context* to
-        :py:mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself
-        from becoming an :py:class:`~.ExternalLink`.
+        setters. For example, :class:`.ExternalLink`\ 's
+        :attr:`~.ExternalLink.url` setter sets *context* to
+        :mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself
+        from becoming an :class:`.ExternalLink`.
 
         If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
         parsed, but instead will be treated as plain text.
 
-        If there is an internal error while parsing, :py:exc:`.ParserError`
-        will be raised.
+        If there is an internal error while parsing, :exc:`.ParserError` will
+        be raised.
         """
         tokens = self._tokenizer.tokenize(text, context, skip_style_tags)
         code = self._builder.build(tokens)
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index e0109e6..2d68036 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -48,9 +48,9 @@ def _add_handler(token_type):
 class Builder(object):
     """Builds a tree of nodes out of a sequence of tokens.
 
-    To use, pass a list of :py:class:`~.Token`\ s to the :py:meth:`build`
-    method. The list will be exhausted as it is parsed and a
-    :py:class:`.Wikicode` object containing the node tree will be returned.
+    To use, pass a list of :class:`.Token`\ s to the :meth:`build` method. The
+    list will be exhausted as it is parsed and a :class:`.Wikicode` object
+    containing the node tree will be returned.
     """
 
     def __init__(self):
@@ -64,8 +64,8 @@ class Builder(object):
     def _pop(self):
         """Pop the current node list off of the stack.
 
-        The raw node list is wrapped in a :py:class:`.SmartList` and then in a
-        :py:class:`.Wikicode` object.
+        The raw node list is wrapped in a :class:`.SmartList` and then in a
+        :class:`.Wikicode` object.
         """
         return Wikicode(SmartList(self._stacks.pop()))
 
diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 28023b5..f568fac 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -35,72 +35,72 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``).
 
 Local (stack-specific) contexts:
 
-* :py:const:`TEMPLATE`
+* :const:`TEMPLATE`
 
-    * :py:const:`TEMPLATE_NAME`
-    * :py:const:`TEMPLATE_PARAM_KEY`
-    * :py:const:`TEMPLATE_PARAM_VALUE`
+    * :const:`TEMPLATE_NAME`
+    * :const:`TEMPLATE_PARAM_KEY`
+    * :const:`TEMPLATE_PARAM_VALUE`
 
-* :py:const:`ARGUMENT`
+* :const:`ARGUMENT`
 
-    * :py:const:`ARGUMENT_NAME`
-    * :py:const:`ARGUMENT_DEFAULT`
+    * :const:`ARGUMENT_NAME`
+    * :const:`ARGUMENT_DEFAULT`
 
-* :py:const:`WIKILINK`
+* :const:`WIKILINK`
 
-    * :py:const:`WIKILINK_TITLE`
-    * :py:const:`WIKILINK_TEXT`
+    * :const:`WIKILINK_TITLE`
+    * :const:`WIKILINK_TEXT`
 
-* :py:const:`EXT_LINK`
+* :const:`EXT_LINK`
 
-    * :py:const:`EXT_LINK_URI`
-    * :py:const:`EXT_LINK_TITLE`
+    * :const:`EXT_LINK_URI`
+    * :const:`EXT_LINK_TITLE`
 
-* :py:const:`HEADING`
+* :const:`HEADING`
 
-    * :py:const:`HEADING_LEVEL_1`
-    * :py:const:`HEADING_LEVEL_2`
-    * :py:const:`HEADING_LEVEL_3`
-    * :py:const:`HEADING_LEVEL_4`
-    * :py:const:`HEADING_LEVEL_5`
-    * :py:const:`HEADING_LEVEL_6`
+    * :const:`HEADING_LEVEL_1`
+    * :const:`HEADING_LEVEL_2`
+    * :const:`HEADING_LEVEL_3`
+    * :const:`HEADING_LEVEL_4`
+    * :const:`HEADING_LEVEL_5`
+    * :const:`HEADING_LEVEL_6`
 
-* :py:const:`TAG`
+* :const:`TAG`
 
-    * :py:const:`TAG_OPEN`
-    * :py:const:`TAG_ATTR`
-    * :py:const:`TAG_BODY`
-    * :py:const:`TAG_CLOSE`
+    * :const:`TAG_OPEN`
+    * :const:`TAG_ATTR`
+    * :const:`TAG_BODY`
+    * :const:`TAG_CLOSE`
 
-* :py:const:`STYLE`
+* :const:`STYLE`
 
-    * :py:const:`STYLE_ITALICS`
-    * :py:const:`STYLE_BOLD`
-    * :py:const:`STYLE_PASS_AGAIN`
-    * :py:const:`STYLE_SECOND_PASS`
+    * :const:`STYLE_ITALICS`
+    * :const:`STYLE_BOLD`
+    * :const:`STYLE_PASS_AGAIN`
+    * :const:`STYLE_SECOND_PASS`
 
-* :py:const:`DL_TERM`
+* :const:`DL_TERM`
 
-* :py:const:`SAFETY_CHECK`
+* :const:`SAFETY_CHECK`
 
-    * :py:const:`HAS_TEXT`
-    * :py:const:`FAIL_ON_TEXT`
-    * :py:const:`FAIL_NEXT`
-    * :py:const:`FAIL_ON_LBRACE`
-    * :py:const:`FAIL_ON_RBRACE`
-    * :py:const:`FAIL_ON_EQUALS`
+    * :const:`HAS_TEXT`
+    * :const:`FAIL_ON_TEXT`
+    * :const:`FAIL_NEXT`
+    * :const:`FAIL_ON_LBRACE`
+    * :const:`FAIL_ON_RBRACE`
+    * :const:`FAIL_ON_EQUALS`
 
 Global contexts:
 
-* :py:const:`GL_HEADING`
+* :const:`GL_HEADING`
 
 Aggregate contexts:
 
-* :py:const:`FAIL`
-* :py:const:`UNSAFE`
-* :py:const:`DOUBLE`
-* :py:const:`NO_WIKILINKS`
-* :py:const:`NO_EXT_LINKS`
+* :const:`FAIL`
+* :const:`UNSAFE`
+* :const:`DOUBLE`
+* :const:`NO_WIKILINKS`
+* :const:`NO_EXT_LINKS`
 
 """
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 44f0d60..073e64c 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -135,7 +135,7 @@ class Tokenizer(object):
         """Fail the current tokenization route.
 
         Discards the current stack/context/textbuffer and raises
-        :py:exc:`~.BadRoute`.
+        :exc:`.BadRoute`.
         """
         context = self._context
         self._pop()
@@ -173,14 +173,14 @@ class Tokenizer(object):
     def _read(self, delta=0, wrap=False, strict=False):
         """Read the value at a relative point in the wikicode.
 
-        The value is read from :py:attr:`self._head <_head>` plus the value of
+        The value is read from :attr:`self._head <_head>` plus the value of
         *delta* (which can be negative). If *wrap* is ``False``, we will not
         allow attempts to read from the end of the string if ``self._head +
         delta`` is negative. If *strict* is ``True``, the route will be failed
-        (with :py:meth:`_fail_route`) if we try to read from past the end of
-        the string; otherwise, :py:attr:`self.END <END>` is returned. If we try
-        to read from before the start of the string, :py:attr:`self.START
-        <START>` is returned.
+        (with :meth:`_fail_route`) if we try to read from past the end of the
+        string; otherwise, :attr:`self.END <END>` is returned. If we try to
+        read from before the start of the string, :attr:`self.START <START>` is
+        returned.
         """
         index = self._head + delta
         if index < 0 and (not wrap or abs(index) > len(self._text)):
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index e567731..2e38a1c 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -24,8 +24,8 @@
 This module contains the token definitions that are used as an intermediate
 parsing data type - they are stored in a flat list, with each token being
 identified by its type and optional attributes. The token list is generated in
-a syntactically valid form by the :py:class:`~.Tokenizer`, and then converted
-into the :py:class`~.Wikicode` tree by the :py:class:`~.Builder`.
+a syntactically valid form by the :class:`.Tokenizer`, and then converted into
+the :class`.Wikicode` tree by the :class:`.Builder`.
 """
 
 from __future__ import unicode_literals
diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index cedfb5c..b4cfd1b 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -21,8 +21,8 @@
 # SOFTWARE.
 
 """
-This module contains the :py:class:`~.SmartList` type, as well as its
-:py:class:`~._ListProxy` child, which together implement a list whose sublists
+This module contains the :class:`.SmartList` type, as well as its
+:class:`._ListProxy` child, which together implement a list whose sublists
 reflect changes made to the main list, and vice-versa.
 """
 
@@ -35,7 +35,7 @@ __all__ = ["SmartList"]
 def inheritdoc(method):
     """Set __doc__ of *method* to __doc__ of *method* in its parent class.
 
-    Since this is used on :py:class:`~.SmartList`, the "parent class" used is
+    Since this is used on :class:`.SmartList`, the "parent class" used is
     ``list``. This function can be used as a decorator.
     """
     method.__doc__ = getattr(list, method.__name__).__doc__
@@ -65,9 +65,9 @@ class SmartList(_SliceNormalizerMixIn, list):
     list (such as the addition, removal, or replacement of elements) will be
     reflected in the sublist, or vice-versa, to the greatest degree possible.
     This is implemented by having sublists - instances of the
-    :py:class:`~._ListProxy` type - dynamically determine their elements by
-    storing their slice info and retrieving that slice from the parent. Methods
-    that change the size of the list also change the slice info. For example::
+    :class:`._ListProxy` type - dynamically determine their elements by storing
+    their slice info and retrieving that slice from the parent. Methods that
+    change the size of the list also change the slice info. For example::
 
         >>> parent = SmartList([0, 1, 2, 3])
         >>> parent
@@ -84,8 +84,8 @@ class SmartList(_SliceNormalizerMixIn, list):
     The parent needs to keep a list of its children in order to update them,
     which prevents them from being garbage-collected. If you are keeping the
     parent around for a while but creating many children, it is advisable to
-    call :py:meth:`~._ListProxy.detach` when you're finished with them. Certain
-    parent methods, like :py:meth:`reverse` and :py:meth:`sort`, will do this
+    call :meth:`._ListProxy.detach` when you're finished with them. Certain
+    parent methods, like :meth:`reverse` and :meth:`sort`, will do this
     automatically.
     """
 
@@ -217,9 +217,9 @@ class SmartList(_SliceNormalizerMixIn, list):
 class _ListProxy(_SliceNormalizerMixIn, list):
     """Implement the ``list`` interface by getting elements from a parent.
 
-    This is created by a :py:class:`~.SmartList` object when slicing. It does
-    not actually store the list at any time; instead, whenever the list is
-    needed, it builds it dynamically using the :py:meth:`_render` method.
+    This is created by a :class:`.SmartList` object when slicing. It does not
+    actually store the list at any time; instead, whenever the list is needed,
+    it builds it dynamically using the :meth:`_render` method.
     """
 
     def __init__(self, parent, sliceinfo):
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index fe41d6d..8da8692 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -21,7 +21,7 @@
 # SOFTWARE.
 
 """
-This module contains the :py:class:`~.StringMixIn` type, which implements the
+This module contains the :class:`.StringMixIn` type, which implements the
 interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner.
 """
 
@@ -35,7 +35,7 @@ __all__ = ["StringMixIn"]
 def inheritdoc(method):
     """Set __doc__ of *method* to __doc__ of *method* in its parent class.
 
-    Since this is used on :py:class:`~.StringMixIn`, the "parent class" used is
+    Since this is used on :class:`.StringMixIn`, the "parent class" used is
     ``str``. This function can be used as a decorator.
     """
     method.__doc__ = getattr(str, method.__name__).__doc__
@@ -44,11 +44,10 @@ def inheritdoc(method):
 class StringMixIn(object):
     """Implement the interface for ``unicode``/``str`` in a dynamic manner.
 
-    To use this class, inherit from it and override the :py:meth:`__unicode__`
+    To use this class, inherit from it and override the :meth:`__unicode__`
     method (same on py3k) to return the string representation of the object.
-    The various string methods will operate on the value of
-    :py:meth:`__unicode__` instead of the immutable ``self`` like the regular
-    ``str`` type.
+    The various string methods will operate on the value of :meth:`__unicode__`
+    instead of the immutable ``self`` like the regular ``str`` type.
     """
 
     if py3k:
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index 8dc5e4e..8f518a6 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -34,18 +34,18 @@ from .smart_list import SmartList
 __all__ = ["parse_anything"]
 
 def parse_anything(value, context=0, skip_style_tags=False):
-    """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.
+    """Return a :class:`.Wikicode` for *value*, allowing multiple types.
 
-    This differs from :py:meth:`.Parser.parse` in that we accept more than just
-    a string to be parsed. Unicode objects (strings in py3k), strings (bytes in
-    py3k), integers (converted to strings), ``None``, existing
-    :py:class:`~.Node` or :py:class:`~.Wikicode` objects, as well as an
-    iterable of these types, are supported. This is used to parse input
-    on-the-fly by various methods of :py:class:`~.Wikicode` and others like
-    :py:class:`~.Template`, such as :py:meth:`wikicode.insert()
-    <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.
+    This differs from :meth:`.Parser.parse` in that we accept more than just a
+    string to be parsed. Unicode objects (strings in py3k), strings (bytes in
+    py3k), integers (converted to strings), ``None``, existing :class:`.Node`
+    or :class:`.Wikicode` objects, as well as an iterable of these types, are
+    supported. This is used to parse input on-the-fly by various methods of
+    :class:`.Wikicode` and others like :class:`.Template`, such as
+    :meth:`wikicode.insert() <.Wikicode.insert>` or setting
+    :meth:`template.name <.Template.name>`.
 
-    Additional arguments are passed directly to :py:meth:`.Parser.parse`.
+    Additional arguments are passed directly to :meth:`.Parser.parse`.
     """
     from .parser import Parser
     from .wikicode import Wikicode
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index ffa6790..c24bc5f 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -39,8 +39,8 @@ class Wikicode(StringMixIn):
 
     Additionally, it contains methods that can be used to extract data from or
     modify the nodes, implemented in an interface similar to a list. For
-    example, :py:meth:`index` can get the index of a node in the list, and
-    :py:meth:`insert` can add a new node at that index. The :py:meth:`filter()
+    example, :meth:`index` can get the index of a node in the list, and
+    :meth:`insert` can add a new node at that index. The :meth:`filter()
     <ifilter>` series of functions is very useful for extracting and iterating
     over, for example, all of the templates in the object.
     """
@@ -55,7 +55,7 @@ class Wikicode(StringMixIn):
 
     @staticmethod
     def _get_children(node, contexts=False, restrict=None, parent=None):
-        """Iterate over all child :py:class:`.Node`\ s of a given *node*."""
+        """Iterate over all child :class:`.Node`\ s of a given *node*."""
         yield (parent, node) if contexts else node
         if restrict and isinstance(node, restrict):
             return
@@ -74,7 +74,7 @@ class Wikicode(StringMixIn):
 
     @staticmethod
     def _build_matcher(matches, flags):
-        """Helper for :py:meth:`_indexed_ifilter` and others.
+        """Helper for :meth:`_indexed_ifilter` and others.
 
         If *matches* is a function, return it. If it's a regex, return a
         wrapper around it that can be called with a node to do a search. If
@@ -90,7 +90,7 @@ class Wikicode(StringMixIn):
                          forcetype=None):
         """Iterate over nodes and their corresponding indices in the node list.
 
-        The arguments are interpreted as for :py:meth:`ifilter`. For each tuple
+        The arguments are interpreted as for :meth:`ifilter`. For each tuple
         ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
         that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
         node itself, but will still contain it.
@@ -111,14 +111,14 @@ class Wikicode(StringMixIn):
     def _do_strong_search(self, obj, recursive=True):
         """Search for the specific element *obj* within the node list.
 
-        *obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode`
-        object. If found, we return a tuple (*context*, *index*) where
-        *context* is the :py:class:`.Wikicode` that contains *obj* and *index*
-        is its index there, as a :py:class:`slice`. Note that if *recursive* is
-        ``False``, *context* will always be ``self`` (since we only look for
-        *obj* among immediate descendants), but if *recursive* is ``True``,
-        then it could be any :py:class:`.Wikicode` contained by a node within
-        ``self``. If *obj* is not found, :py:exc:`ValueError` is raised.
+        *obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If
+        found, we return a tuple (*context*, *index*) where *context* is the
+        :class:`.Wikicode` that contains *obj* and *index* is its index there,
+        as a :class:`slice`. Note that if *recursive* is ``False``, *context*
+        will always be ``self`` (since we only look for *obj* among immediate
+        descendants), but if *recursive* is ``True``, then it could be any
+        :class:`.Wikicode` contained by a node within ``self``. If *obj* is not
+        found, :exc:`ValueError` is raised.
         """
         if isinstance(obj, Node):
             mkslice = lambda i: slice(i, i + 1)
@@ -141,14 +141,14 @@ class Wikicode(StringMixIn):
     def _do_weak_search(self, obj, recursive):
         """Search for an element that looks like *obj* within the node list.
 
-        This follows the same rules as :py:meth:`_do_strong_search` with some
+        This follows the same rules as :meth:`_do_strong_search` with some
         differences. *obj* is treated as a string that might represent any
-        :py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two
-        present in the node list. Thus, matching is weak (using string
-        comparisons) rather than strong (using ``is``). Because multiple nodes
-        can match *obj*, the result is a list of tuples instead of just one
-        (however, :py:exc:`ValueError` is still raised if nothing is found).
-        Individual matches will never overlap.
+        :class:`.Node`, :class:`.Wikicode`, or combination of the two present
+        in the node list. Thus, matching is weak (using string comparisons)
+        rather than strong (using ``is``). Because multiple nodes can match
+        *obj*, the result is a list of tuples instead of just one (however,
+        :exc:`ValueError` is still raised if nothing is found). Individual
+        matches will never overlap.
 
         The tuples contain a new first element, *exact*, which is ``True`` if
         we were able to match *obj* exactly to one or more adjacent nodes, or
@@ -212,19 +212,19 @@ class Wikicode(StringMixIn):
     def _build_filter_methods(cls, **meths):
         """Given Node types, build the corresponding i?filter shortcuts.
 
-        The should be given as keys storing the method's base name paired
-        with values storing the corresponding :py:class:`~.Node` type. For
-        example, the dict may contain the pair ``("templates", Template)``,
-        which will produce the methods :py:meth:`ifilter_templates` and
-        :py:meth:`filter_templates`, which are shortcuts for
-        :py:meth:`ifilter(forcetype=Template) <ifilter>` and
-        :py:meth:`filter(forcetype=Template) <filter>`, respectively. These
+        The should be given as keys storing the method's base name paired with
+        values storing the corresponding :class:`.Node` type. For example, the
+        dict may contain the pair ``("templates", Template)``, which will
+        produce the methods :meth:`ifilter_templates` and
+        :meth:`filter_templates`, which are shortcuts for
+        :meth:`ifilter(forcetype=Template) <ifilter>` and
+        :meth:`filter(forcetype=Template) <filter>`, respectively. These
         shortcuts are added to the class itself, with an appropriate docstring.
         """
         doc = """Iterate over {0}.
 
-        This is equivalent to :py:meth:`{1}` with *forcetype* set to
-        :py:class:`~{2.__module__}.{2.__name__}`.
+        This is equivalent to :meth:`{1}` with *forcetype* set to
+        :class:`~{2.__module__}.{2.__name__}`.
         """
         make_ifilter = lambda ftype: (lambda self, *a, **kw:
                                       self.ifilter(forcetype=ftype, *a, **kw))
@@ -240,10 +240,10 @@ class Wikicode(StringMixIn):
 
     @property
     def nodes(self):
-        """A list of :py:class:`~.Node` objects.
+        """A list of :class:`.Node` objects.
 
-        This is the internal data actually stored within a
-        :py:class:`~.Wikicode` object.
+        This is the internal data actually stored within a :class:`.Wikicode`
+        object.
         """
         return self._nodes
 
@@ -260,11 +260,10 @@ class Wikicode(StringMixIn):
     def set(self, index, value):
         """Set the ``Node`` at *index* to *value*.
 
-        Raises :py:exc:`IndexError` if *index* is out of range, or
-        :py:exc:`ValueError` if *value* cannot be coerced into one
-        :py:class:`~.Node`. To insert multiple nodes at an index, use
-        :py:meth:`get` with either :py:meth:`remove` and :py:meth:`insert` or
-        :py:meth:`replace`.
+        Raises :exc:`IndexError` if *index* is out of range, or
+        :exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`.
+        To insert multiple nodes at an index, use :meth:`get` with either
+        :meth:`remove` and :meth:`insert` or :meth:`replace`.
         """
         nodes = parse_anything(value).nodes
         if len(nodes) > 1:
@@ -279,7 +278,7 @@ class Wikicode(StringMixIn):
     def index(self, obj, recursive=False):
         """Return the index of *obj* in the list of nodes.
 
-        Raises :py:exc:`ValueError` if *obj* is not found. If *recursive* is
+        Raises :exc:`ValueError` if *obj* is not found. If *recursive* is
         ``True``, we will look in all nodes of ours and their descendants, and
         return the index of our direct descendant node within *our* list of
         nodes. Otherwise, the lookup is done only on direct descendants.
@@ -298,9 +297,8 @@ class Wikicode(StringMixIn):
     def insert(self, index, value):
         """Insert *value* at *index* in the list of nodes.
 
-        *value* can be anything parsable by :py:func:`.parse_anything`, which
-        includes strings or other :py:class:`~.Wikicode` or :py:class:`~.Node`
-        objects.
+        *value* can be anything parsable by :func:`.parse_anything`, which
+        includes strings or other :class:`.Wikicode` or :class:`.Node` objects.
         """
         nodes = parse_anything(value).nodes
         for node in reversed(nodes):
@@ -309,15 +307,14 @@ class Wikicode(StringMixIn):
     def insert_before(self, obj, value, recursive=True):
         """Insert *value* immediately before *obj*.
 
-        *obj* can be either a string, a :py:class:`~.Node`, or another
-        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
-        for example). If *obj* is a string, we will operate on all instances
-        of that string within the code, otherwise only on the specific instance
-        given. *value* can be anything parsable by :py:func:`.parse_anything`.
-        If *recursive* is ``True``, we will try to find *obj* within our child
-        nodes even if it is not a direct descendant of this
-        :py:class:`~.Wikicode` object. If *obj* is not found,
-        :py:exc:`ValueError` is raised.
+        *obj* can be either a string, a :class:`.Node`, or another
+        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
+        example). If *obj* is a string, we will operate on all instances of
+        that string within the code, otherwise only on the specific instance
+        given. *value* can be anything parsable by :func:`.parse_anything`. If
+        *recursive* is ``True``, we will try to find *obj* within our child
+        nodes even if it is not a direct descendant of this :class:`.Wikicode`
+        object. If *obj* is not found, :exc:`ValueError` is raised.
         """
         if isinstance(obj, (Node, Wikicode)):
             context, index = self._do_strong_search(obj, recursive)
@@ -333,15 +330,14 @@ class Wikicode(StringMixIn):
     def insert_after(self, obj, value, recursive=True):
         """Insert *value* immediately after *obj*.
 
-        *obj* can be either a string, a :py:class:`~.Node`, or another
-        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
-        for example). If *obj* is a string, we will operate on all instances
-        of that string within the code, otherwise only on the specific instance
-        given. *value* can be anything parsable by :py:func:`.parse_anything`.
-        If *recursive* is ``True``, we will try to find *obj* within our child
-        nodes even if it is not a direct descendant of this
-        :py:class:`~.Wikicode` object. If *obj* is not found,
-        :py:exc:`ValueError` is raised.
+        *obj* can be either a string, a :class:`.Node`, or another
+        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
+        example). If *obj* is a string, we will operate on all instances of
+        that string within the code, otherwise only on the specific instance
+        given. *value* can be anything parsable by :func:`.parse_anything`. If
+        *recursive* is ``True``, we will try to find *obj* within our child
+        nodes even if it is not a direct descendant of this :class:`.Wikicode`
+        object. If *obj* is not found, :exc:`ValueError` is raised.
         """
         if isinstance(obj, (Node, Wikicode)):
             context, index = self._do_strong_search(obj, recursive)
@@ -357,15 +353,14 @@ class Wikicode(StringMixIn):
     def replace(self, obj, value, recursive=True):
         """Replace *obj* with *value*.
 
-        *obj* can be either a string, a :py:class:`~.Node`, or another
-        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
-        for example). If *obj* is a string, we will operate on all instances
-        of that string within the code, otherwise only on the specific instance
-        given. *value* can be anything parsable by :py:func:`.parse_anything`.
+        *obj* can be either a string, a :class:`.Node`, or another
+        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
+        example). If *obj* is a string, we will operate on all instances of
+        that string within the code, otherwise only on the specific instance
+        given. *value* can be anything parsable by :func:`.parse_anything`.
         If *recursive* is ``True``, we will try to find *obj* within our child
-        nodes even if it is not a direct descendant of this
-        :py:class:`~.Wikicode` object. If *obj* is not found,
-        :py:exc:`ValueError` is raised.
+        nodes even if it is not a direct descendant of this :class:`.Wikicode`
+        object. If *obj* is not found, :exc:`ValueError` is raised.
         """
         if isinstance(obj, (Node, Wikicode)):
             context, index = self._do_strong_search(obj, recursive)
@@ -384,7 +379,7 @@ class Wikicode(StringMixIn):
     def append(self, value):
         """Insert *value* at the end of the list of nodes.
 
-        *value* can be anything parsable by :py:func:`.parse_anything`.
+        *value* can be anything parsable by :func:`.parse_anything`.
         """
         nodes = parse_anything(value).nodes
         for node in nodes:
@@ -393,14 +388,14 @@ class Wikicode(StringMixIn):
     def remove(self, obj, recursive=True):
         """Remove *obj* from the list of nodes.
 
-        *obj* can be either a string, a :py:class:`~.Node`, or another
-        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
-        for example). If *obj* is a string, we will operate on all instances
-        of that string within the code, otherwise only on the specific instance
+        *obj* can be either a string, a :class:`.Node`, or another
+        :class:`.Wikicode` object (as created by :meth:`get_sections`, for
+        example). If *obj* is a string, we will operate on all instances of
+        that string within the code, otherwise only on the specific instance
         given. If *recursive* is ``True``, we will try to find *obj* within our
         child nodes even if it is not a direct descendant of this
-        :py:class:`~.Wikicode` object. If *obj* is not found,
-        :py:exc:`ValueError` is raised.
+        :class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is
+        raised.
         """
         if isinstance(obj, (Node, Wikicode)):
             context, index = self._do_strong_search(obj, recursive)
@@ -417,10 +412,10 @@ class Wikicode(StringMixIn):
     def matches(self, other):
         """Do a loose equivalency test suitable for comparing page names.
 
-        *other* can be any string-like object, including
-        :py:class:`~.Wikicode`, or a tuple of these. This operation is
-        symmetric; both sides are adjusted. Specifically, whitespace and markup
-        is stripped and the first letter's case is normalized. Typical usage is
+        *other* can be any string-like object, including :class:`.Wikicode`, or
+        a tuple of these. This operation is symmetric; both sides are adjusted.
+        Specifically, whitespace and markup is stripped and the first letter's
+        case is normalized. Typical usage is
         ``if template.name.matches("stub"): ...``.
         """
         cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:]
@@ -453,12 +448,12 @@ class Wikicode(StringMixIn):
             ["{{foo}}", "{{foo|{{bar}}}}"]
 
         *matches* can be used to further restrict the nodes, either as a
-        function (taking a single :py:class:`.Node` and returning a boolean) or
-        a regular expression (matched against the node's string representation
-        with :py:func:`re.search`). If *matches* is a regex, the flags passed
-        to :py:func:`re.search` are :py:const:`re.IGNORECASE`,
-        :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can
-        be specified by passing *flags*.
+        function (taking a single :class:`.Node` and returning a boolean) or a
+        regular expression (matched against the node's string representation
+        with :func:`re.search`). If *matches* is a regex, the flags passed to
+        :func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and
+        :const:`re.UNICODE`, but custom flags can be specified by passing
+        *flags*.
         """
         gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
         return (node for i, node in gen)
@@ -466,7 +461,7 @@ class Wikicode(StringMixIn):
     def filter(self, *args, **kwargs):
         """Return a list of nodes within our list matching certain conditions.
 
-        This is equivalent to calling :py:func:`list` on :py:meth:`ifilter`.
+        This is equivalent to calling :func:`list` on :meth:`ifilter`.
         """
         return list(self.ifilter(*args, **kwargs))
 
@@ -474,9 +469,9 @@ class Wikicode(StringMixIn):
                      include_lead=None, include_headings=True):
         """Return a list of sections within the page.
 
-        Sections are returned as :py:class:`~.Wikicode` objects with a shared
-        node list (implemented using :py:class:`~.SmartList`) so that changes
-        to sections are reflected in the parent Wikicode object.
+        Sections are returned as :class:`.Wikicode` objects with a shared node
+        list (implemented using :class:`.SmartList`) so that changes to
+        sections are reflected in the parent Wikicode object.
 
         Each section contains all of its subsections, unless *flat* is
         ``True``. If *levels* is given, it should be a iterable of integers;
@@ -484,14 +479,13 @@ class Wikicode(StringMixIn):
         *matches* is given, it should be either a function or a regex; only
         sections whose headings match it (without the surrounding equal signs)
         will be included. *flags* can be used to override the default regex
-        flags (see :py:meth:`ifilter`) if a regex *matches* is used.
+        flags (see :meth:`ifilter`) if a regex *matches* is used.
 
         If *include_lead* is ``True``, the first, lead section (without a
         heading) will be included in the list; ``False`` will not include it;
         the default will include it only if no specific *levels* were given. If
         *include_headings* is ``True``, the section's beginning
-        :py:class:`~.Heading` object will be included; otherwise, this is
-        skipped.
+        :class:`.Heading` object will be included; otherwise, this is skipped.
         """
         title_matcher = self._build_matcher(matches, flags)
         matcher = lambda heading: (title_matcher(heading.title) and
@@ -540,7 +534,7 @@ class Wikicode(StringMixIn):
         """Return a rendered string without unprintable code such as templates.
 
         The way a node is stripped is handled by the
-        :py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which
+        :meth:`~.Node.__strip__` method of :class:`.Node` objects, which
         generally return a subset of their nodes or ``None``. For example,
         templates and tags are removed completely, links are stripped to just
         their display part, headings are stripped to just their title. If
@@ -568,9 +562,9 @@ class Wikicode(StringMixIn):
         """Return a hierarchical tree representation of the object.
 
         The representation is a string makes the most sense printed. It is
-        built by calling :py:meth:`_get_tree` on the
-        :py:class:`~.Wikicode` object and its children recursively. The end
-        result may look something like the following::
+        built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and
+        its children recursively. The end result may look something like the
+        following::
 
             >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
             >>> print mwparserfromhell.parse(text).get_tree()

From a8d2983161e422e27e0de8c1261b196e7a79363b Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Mon, 14 Jul 2014 10:37:36 -0700
Subject: [PATCH 040/102] Started table parsing in PyTokenizer

Started parsing table support and added the start of table support.
This is a big commit (ugh) and it should probably be split up into
multiple smaller ones if possible, but that seems unworkable as of
right now because of all the dependencies. Also breaks tests of
CTokenizer (double ugh) because I haven't started table support there.

May want to pick line by line on this commit later but I need to save
my work for now.
---
 mwparserfromhell/definitions.py      |  2 +-
 mwparserfromhell/parser/contexts.py  |  8 +++-
 mwparserfromhell/parser/tokenizer.py | 76 +++++++++++++++++++++++++++++++++---
 tests/tokenizer/tables.mwtest        | 32 +++++++++++++++
 4 files changed, 111 insertions(+), 7 deletions(-)
 create mode 100644 tests/tokenizer/tables.mwtest

diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py
index 6020ad1..af41f49 100644
--- a/mwparserfromhell/definitions.py
+++ b/mwparserfromhell/definitions.py
@@ -52,7 +52,7 @@ INVISIBLE_TAGS = [
 
 # [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762
 SINGLE_ONLY = ["br", "hr", "meta", "link", "img"]
-SINGLE = SINGLE_ONLY + ["li", "dt", "dd"]
+SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"]
 
 MARKUP_TO_HTML = {
     "#": "li",
diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index f568fac..678a392 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -155,13 +155,19 @@ FAIL_ON_EQUALS = 1 << 29
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                 FAIL_ON_RBRACE + FAIL_ON_EQUALS)
 
+TABLE_OPEN =                1 << 30
+TABLE_CELL_LINE =           1 << 31
+TABLE_HEADER_LINE =         1 << 32
+TABLE_CELL_OPEN =           1 << 33
+TABLE_CELL_STYLE_POSSIBLE = 1 << 34
+
 # Global contexts:
 
 GL_HEADING = 1 << 0
 
 # Aggregate contexts:
 
-FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE
+FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE + TABLE_OPEN
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE +
           TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE)
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 073e64c..70e2d5d 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1002,6 +1002,39 @@ class Tokenizer(object):
             self._fail_route()
         return self._pop()
 
+    def _handle_table_start(self):
+        """Handle the start of a table."""
+        # TODO - fail all other contexts on start?
+        self._head += 2
+        reset = self._head - 1
+        try:
+            table = self._parse(contexts.TABLE_OPEN)
+        except BadRoute:
+            self._head = reset
+            self._emit_text("{|")
+        else:
+            self._emit_style_tag("table", "{|", table)
+
+    def _handle_table_end(self):
+        self._head += 2
+        return self._pop()
+
+    def _handle_table_row(self):
+        self._head += 2
+        self._emit(tokens.TagOpenOpen(wiki_markup="{-"))
+        self._emit_text("tr")
+        self._emit(tokens.TagCloseSelfclose())
+        self._context &= ~contexts.TABLE_CELL_OPEN
+
+    def _handle_table_cell(self):
+        pass
+
+    def _handle_header_cell(self):
+        pass
+
+    def _handle_cell_style(self):
+        pass
+
     def _verify_safe(self, this):
         """Make sure we are not trying to write an invalid character."""
         context = self._context
@@ -1144,15 +1177,48 @@ class Tokenizer(object):
                 result = self._parse_style()
                 if result is not None:
                     return result
-            elif self._read(-1) in ("\n", self.START):
-                if this in ("#", "*", ";", ":"):
+            elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"):
                     self._handle_list()
-                elif this == next == self._read(2) == self._read(3) == "-":
+            elif self._read(-1) in ("\n", self.START) and this == next == self._read(2) == self._read(3) == "-":
                     self._handle_hr()
-                else:
-                    self._emit_text(this)
             elif this in ("\n", ":") and self._context & contexts.DL_TERM:
                 self._handle_dl_term()
+
+            elif (this == "{" and next == "|" and (self._read(-1) in ("\n", self.START)) or
+                    (self._read(-2) in ("\n", self.START) and self._read(-1).strip() == "")):
+                if self._can_recurse():
+                    self._handle_table_start()
+                else:
+                    self._emit_text("{|")
+            elif self._context & contexts.TABLE_OPEN:
+                if this == "|" and next == "}":
+                    return self._handle_table_end()
+                elif this == "|" and next == "|" and self._context & contexts.TABLE_CELL_LINE:
+                    self._handle_table_cell()
+                elif this == "|" and next == "|" and self._context & contexts.TABLE_HEADER_LINE:
+                    self._handle_header_cell()
+                elif this == "!" and next == "!" and self._context & contexts.TABLE_HEADER_LINE:
+                    self._handle_header_cell()
+                elif this == "|" and self._context & contexts.TABLE_CELL_STYLE_POSSIBLE:
+                    self._handle_cell_style()
+                # on newline, clear out cell line contexts
+                elif this == "\n" and self._context & (contexts.TABLE_CELL_LINE | contexts.TABLE_HEADER_LINE | contexts.TABLE_CELL_STYLE_POSSIBLE):
+                    self._context &= (~contexts.TABLE_CELL_LINE & ~contexts.TABLE_HEADER_LINE & ~contexts.TABLE_CELL_STYLE_POSSIBLE)
+                    self._emit_text(this)
+                # newline or whitespace/newline
+                elif (self._read(-1) in ("\n", self.START) or
+                    (self._read(-2) in ("\n", self.START) and self._read(-1).strip() == "")):
+                    if this == "|" and next == "-":
+                        self._handle_table_row()
+                    elif this == "|" and self._can_recurse():
+                        self._handle_table_cell()
+                    elif this == "!" and self._can_recurse():
+                        self._handle_header_cell()
+                    else:
+                        self._emit_text(this)
+                else:
+                    self._emit_text(this)
+
             else:
                 self._emit_text(this)
             self._head += 1
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
new file mode 100644
index 0000000..399f7fd
--- /dev/null
+++ b/tests/tokenizer/tables.mwtest
@@ -0,0 +1,32 @@
+name:   empty_table
+label:  Parsing an empty table.
+input:  "{|\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   inline_table
+label:  Correctly handle tables with close on the same line.
+input:  "{||}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   no_table_close_simple
+label:  Handle case when there is no table close.
+input:  "{| "
+output: [Text(text="{| ")]
+
+---
+
+name:   leading_whitespace_table
+label:  Handle leading whitespace for a table.
+input:  "foo \n    \t {|\n|}"
+output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   leading_characters_table
+label:  Don't parse as a table when leading characters are not newline or whitespace.
+input:  "foo \n  foo  \t {|\n|}"
+output: [Text(text="foo \n  foo  \t {|\n|}")]

From b7e40d7b5aea817c23de68326627c263652cc36c Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Mon, 14 Jul 2014 16:03:09 -0700
Subject: [PATCH 041/102] Table cells now recurse

Added another stack layer for tokenizing table cells because of
styling/correctness of implementation. Added many tests cases.
---
 mwparserfromhell/parser/tokenizer.py | 68 ++++++++++++++++++++++++++----------
 tests/tokenizer/tables.mwtest        | 56 +++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+), 18 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 70e2d5d..80cb501 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1020,17 +1020,34 @@ class Tokenizer(object):
         return self._pop()
 
     def _handle_table_row(self):
-        self._head += 2
-        self._emit(tokens.TagOpenOpen(wiki_markup="{-"))
+        self._head += 1
+        self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
         self._emit_text("tr")
         self._emit(tokens.TagCloseSelfclose())
-        self._context &= ~contexts.TABLE_CELL_OPEN
 
-    def _handle_table_cell(self):
-        pass
+    def _handle_table_cell(self, markup, tag, line_context):
+        """Parse as normal syntax unless we hit a style marker, then parse as HTML attributes"""
+        if not self._can_recurse():
+            self._emit_text(markup)
+            self._head += len(markup) - 1
+            return
 
-    def _handle_header_cell(self):
-        pass
+        reset = self._head
+        self._head += len(markup)
+        try:
+            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | contexts.TABLE_CELL_STYLE_POSSIBLE | line_context)
+        except BadRoute:
+            self._head = reset
+            raise
+        else:
+            self._emit(tokens.TagOpenOpen(wiki_markup=markup))
+            self._emit_text(tag)
+            self._emit(tokens.TagCloseSelfclose())
+            self._emit_all(cell)
+            self._head -= 1
+
+    def _handle_table_cell_end(self):
+        return self._pop()
 
     def _handle_cell_style(self):
         pass
@@ -1184,36 +1201,51 @@ class Tokenizer(object):
             elif this in ("\n", ":") and self._context & contexts.DL_TERM:
                 self._handle_dl_term()
 
-            elif (this == "{" and next == "|" and (self._read(-1) in ("\n", self.START)) or
-                    (self._read(-2) in ("\n", self.START) and self._read(-1).strip() == "")):
+            elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or
+                    (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                 if self._can_recurse():
                     self._handle_table_start()
                 else:
                     self._emit_text("{|")
             elif self._context & contexts.TABLE_OPEN:
                 if this == "|" and next == "}":
+                    if self._context & contexts.TABLE_CELL_OPEN:
+                        return self._handle_table_cell_end()
                     return self._handle_table_end()
                 elif this == "|" and next == "|" and self._context & contexts.TABLE_CELL_LINE:
-                    self._handle_table_cell()
+                    if self._context & contexts.TABLE_CELL_OPEN:
+                        return self._handle_table_cell_end()
+                    self._handle_table_cell("||", "td", contexts.TABLE_CELL_LINE)
                 elif this == "|" and next == "|" and self._context & contexts.TABLE_HEADER_LINE:
-                    self._handle_header_cell()
+                    if self._context & contexts.TABLE_CELL_OPEN:
+                        return self._handle_table_cell_end()
+                    self._handle_table_cell("||", "th", contexts.TABLE_HEADER_LINE)
                 elif this == "!" and next == "!" and self._context & contexts.TABLE_HEADER_LINE:
-                    self._handle_header_cell()
+                    if self._context & contexts.TABLE_CELL_OPEN:
+                        return self._handle_table_cell_end()
+                    self._handle_table_cell("!!", "th", contexts.TABLE_HEADER_LINE)
                 elif this == "|" and self._context & contexts.TABLE_CELL_STYLE_POSSIBLE:
                     self._handle_cell_style()
                 # on newline, clear out cell line contexts
                 elif this == "\n" and self._context & (contexts.TABLE_CELL_LINE | contexts.TABLE_HEADER_LINE | contexts.TABLE_CELL_STYLE_POSSIBLE):
+                    # TODO might not be handled due to DL_TERM code above
+                    # TODO does this even work?
                     self._context &= (~contexts.TABLE_CELL_LINE & ~contexts.TABLE_HEADER_LINE & ~contexts.TABLE_CELL_STYLE_POSSIBLE)
                     self._emit_text(this)
-                # newline or whitespace/newline
                 elif (self._read(-1) in ("\n", self.START) or
-                    (self._read(-2) in ("\n", self.START) and self._read(-1).strip() == "")):
+                    (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                     if this == "|" and next == "-":
+                        if self._context & contexts.TABLE_CELL_OPEN:
+                            return self._handle_table_cell_end()
                         self._handle_table_row()
-                    elif this == "|" and self._can_recurse():
-                        self._handle_table_cell()
-                    elif this == "!" and self._can_recurse():
-                        self._handle_header_cell()
+                    elif this == "|":
+                        if self._context & contexts.TABLE_CELL_OPEN:
+                            return self._handle_table_cell_end()
+                        self._handle_table_cell("|", "td", contexts.TABLE_CELL_LINE)
+                    elif this == "!":
+                        if self._context & contexts.TABLE_CELL_OPEN:
+                            return self._handle_table_cell_end()
+                        self._handle_table_cell("!", "th", contexts.TABLE_HEADER_LINE)
                     else:
                         self._emit_text(this)
                 else:
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 399f7fd..f818f65 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -19,6 +19,13 @@ output: [Text(text="{| ")]
 
 ---
 
+name:   no_table_close_inside_cell
+label:  Handle case when there is no table close while inside of a cell.
+input:  "{| | "
+output: [Text(text="{| | ")]
+
+---
+
 name:   leading_whitespace_table
 label:  Handle leading whitespace for a table.
 input:  "foo \n    \t {|\n|}"
@@ -30,3 +37,52 @@ name:   leading_characters_table
 label:  Don't parse as a table when leading characters are not newline or whitespace.
 input:  "foo \n  foo  \t {|\n|}"
 output: [Text(text="foo \n  foo  \t {|\n|}")]
+
+---
+
+name:   table_row_simple
+label:  Simple table row.
+input:  "{|\n |- \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_simple
+label:  Simple table cell.
+input:  "{|\n | foo \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   nowiki_inside_table
+label:  Nowiki handles pipe characters in tables.
+input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_text_outside_cell
+label:  Parse text inside table but outside of a cell.
+input:  "{|\n bar \n | foo \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   no_table_cell_with_leading_characters
+label:  Fail to create a table cell when there are leading non-whitespace characters.
+input:  "{|\n bar | foo \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   no_table_row_with_leading_characters
+label:  Fail to create a table row when there are leading non-whitespace characters.
+input:  "{|\n bar |- foo \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   template_inside_table_cell
+label:  Template within table cell.
+input:  "{|\n |{{foo\n|bar=baz}} \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

From a13bc948fae32485087feae30b115728885a7abf Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 15 Jul 2014 10:17:23 -0700
Subject: [PATCH 042/102] Started table cell attribute support

Started support for parsing table style attributes. I suspect some
of this is incorrect, need to add more tests to see.
---
 mwparserfromhell/parser/tokenizer.py | 66 +++++++++++++++++++++++++++++++-----
 tests/tokenizer/tables.mwtest        | 35 +++++++++++++++++++
 2 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 80cb501..f09adc8 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1027,30 +1027,78 @@ class Tokenizer(object):
 
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse as HTML attributes"""
+        table_context = contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context
         if not self._can_recurse():
             self._emit_text(markup)
+            # TODO check if this works
             self._head += len(markup) - 1
             return
 
         reset = self._head
         self._head += len(markup)
+        style = None
         try:
-            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | contexts.TABLE_CELL_STYLE_POSSIBLE | line_context)
+            (cell_context, cell) = self._parse(table_context | contexts.TABLE_CELL_STYLE_POSSIBLE)
         except BadRoute:
             self._head = reset
             raise
-        else:
-            self._emit(tokens.TagOpenOpen(wiki_markup=markup))
-            self._emit_text(tag)
-            self._emit(tokens.TagCloseSelfclose())
-            self._emit_all(cell)
-            self._head -= 1
+        # except for handling cell style
+        except StopIteration:
+            self._head = reset + len(markup)
+            try:
+                style = self._parse_as_table_style("|")
+                (cell_context, cell) = self._parse(table_context)
+            except BadRoute:
+                assert False
+                self._head = reset
+                raise
+        self._emit(tokens.TagOpenOpen(wiki_markup=markup))
+        self._emit_text(tag)
+        if style:
+            # this looks highly suspicious
+            if type(style[0] == tokens.Text):
+                style.pop(0)
+            self._emit_all(style)
+        self._emit(tokens.TagCloseSelfclose())
+        self._emit_all(cell)
+        # keep header/cell line contexts
+        self._context |= cell_context & (contexts.TABLE_HEADER_LINE | contexts.TABLE_CELL_LINE)
+        # offset displacement done by _parse()
+        self._head -= 1
+
+    def _parse_as_table_style(self, end_token):
+        data = _TagOpenData()
+        data.context = _TagOpenData.CX_ATTR_READY
+        while True:
+            this, next = self._read(), self._read(1)
+            can_exit = (not data.context & (data.CX_NAME) or
+                        data.context & data.CX_NOTE_SPACE)
+            if this is self.END:
+                if self._context & contexts.TAG_ATTR:
+                    if data.context & data.CX_QUOTED:
+                        # Unclosed attribute quote: reset, don't die
+                        data.context = data.CX_ATTR_VALUE
+                        self._pop()
+                        self._head = data.reset
+                        continue
+                    self._pop()
+                self._fail_route()
+            elif this == end_token and can_exit:
+                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
+                    self._push_tag_buffer(data)
+                self._head += 1
+                return self._pop()
+            else:
+                self._handle_tag_data(data, this)
+            self._head += 1
 
     def _handle_table_cell_end(self):
-        return self._pop()
+        """Returns the context and stack in a tuple."""
+        return (self._context, self._pop())
 
     def _handle_cell_style(self):
-        pass
+        """Pop the cell off the stack and try to parse as style"""
+        raise StopIteration()
 
     def _verify_safe(self, this):
         """Make sure we are not trying to write an invalid character."""
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index f818f65..e7eb40c 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -54,6 +54,13 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
 
 ---
 
+name:   table_cell_inline
+label:  Multiple inline table cells.
+input:  "{|\n | foo || bar || test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
@@ -86,3 +93,31 @@ name:   template_inside_table_cell
 label:  Template within table cell.
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_attributes
+label:  Parse table cell style attributes.
+input:  "{| \n | name="foo bar"| test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_attributes_quote_with_pipe
+label:  Pipe inside an attribute quote should still be used as a style separator.
+input:  "{| \n | name="foo|bar"| test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(), Text(text="bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_attributes_name_with_pipe
+label:  Pipe inside an attribute name should still be used as a style separator.
+input:  "{| \n | name|="foo bar"| test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_attributes_pipe_after_equals
+label:  Pipe inside an attribute should still be used as a style separator after an equals.
+input:  "{| \n | name=|"foo|bar"| test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

From 0bba69d5dc32bea027a13573490263530456269d Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 15 Jul 2014 10:23:44 -0700
Subject: [PATCH 043/102] Added tests/support for header cells

Support for header cells was mostly in already, just needed minor
changes. Added two tests as well.
---
 mwparserfromhell/parser/tokenizer.py |  2 +-
 tests/tokenizer/tables.mwtest        | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index f09adc8..b899e75 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -63,7 +63,7 @@ class Tokenizer(object):
     START = object()
     END = object()
     MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
-               ":", "/", "-", "\n", START, END]
+               ":", "/", "-", "!", "\n", START, END]
     MAX_DEPTH = 40
     MAX_CYCLES = 100000
     regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index e7eb40c..1087381 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -61,6 +61,20 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
 
 ---
 
+name:   table_header_simple
+label:  Simple header cell.
+input:  "{|\n ! foo \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_header_inline
+label:  Multiple inline header cells.
+input:  "{|\n ! foo || bar !! test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"

From 9f159ecfa2443cbacf542c174058f3cd37eeb08d Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 15 Jul 2014 13:32:33 -0700
Subject: [PATCH 044/102] Add table start/row start style attribute support

Started styling attributes for table row and table start. Still not entirely
sure about this, definitely need to make changes regarding padding.
---
 mwparserfromhell/parser/tokenizer.py | 49 ++++++++++++++++++++++++++++++------
 tests/tokenizer/tables.mwtest        | 24 +++++++++++++++++-
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index b899e75..c2d5240 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1007,23 +1007,53 @@ class Tokenizer(object):
         # TODO - fail all other contexts on start?
         self._head += 2
         reset = self._head - 1
+        style = None
         try:
+            self._push(contexts.TABLE_OPEN)
+            style = self._parse_as_table_style("\n", break_on_table_end=True)
+            if len(style) == 0:
+                self._head = reset + 1
             table = self._parse(contexts.TABLE_OPEN)
         except BadRoute:
             self._head = reset
             self._emit_text("{|")
         else:
-            self._emit_style_tag("table", "{|", table)
+            self._emit(tokens.TagOpenOpen(wiki_markup="{|"))
+            self._emit_text("table")
+            if style:
+                self._emit_all(style)
+            self._emit(tokens.TagCloseOpen())
+            self._emit_all(table)
+            self._emit(tokens.TagOpenClose())
+            self._emit_text("table")
+            self._emit(tokens.TagCloseClose())
+            # self._emit_style_tag("table", "{|", table)
 
     def _handle_table_end(self):
         self._head += 2
         return self._pop()
 
     def _handle_table_row(self):
-        self._head += 1
-        self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
-        self._emit_text("tr")
-        self._emit(tokens.TagCloseSelfclose())
+        reset = self._head
+        self._head += 2
+        try:
+            self._push(contexts.TABLE_OPEN)
+            style = self._parse_as_table_style("\n")
+            if len(style) == 0:
+                self._head = reset + 2
+        except BadRoute:
+            self._head = reset
+            raise
+        else:
+            self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
+            self._emit_text("tr")
+            if style:
+                # this looks highly suspicious
+                # if type(style[0] == tokens.Text):
+                #     style.pop(0)
+                self._emit_all(style)
+            self._emit(tokens.TagCloseSelfclose())
+            self._head -= 1
 
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse as HTML attributes"""
@@ -1047,9 +1077,10 @@ class Tokenizer(object):
             self._head = reset + len(markup)
             try:
                 style = self._parse_as_table_style("|")
+                # Don't parse the style separator
+                self._head += 1
                 (cell_context, cell) = self._parse(table_context)
             except BadRoute:
-                assert False
                 self._head = reset
                 raise
         self._emit(tokens.TagOpenOpen(wiki_markup=markup))
@@ -1066,7 +1097,7 @@ class Tokenizer(object):
         # offset displacement done by _parse()
         self._head -= 1
 
-    def _parse_as_table_style(self, end_token):
+    def _parse_as_table_style(self, end_token, break_on_table_end=False):
         data = _TagOpenData()
         data.context = _TagOpenData.CX_ATTR_READY
         while True:
@@ -1086,7 +1117,9 @@ class Tokenizer(object):
             elif this == end_token and can_exit:
                 if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
                     self._push_tag_buffer(data)
-                self._head += 1
+                # self._head += 1
+                return self._pop()
+            elif break_on_table_end and this == "|" and next == "}":
                 return self._pop()
             else:
                 self._handle_tag_data(data, this)
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 1087381..fa068fd 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -127,7 +127,7 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
 name:   table_cell_attributes_name_with_pipe
 label:  Pipe inside an attribute name should still be used as a style separator.
 input:  "{| \n | name|="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -135,3 +135,25 @@ name:   table_cell_attributes_pipe_after_equals
 label:  Pipe inside an attribute should still be used as a style separator after an equals.
 input:  "{| \n | name=|"foo|bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_row_attributes
+label:  Parse table row style attributes.
+input:  "{| \n |- name="foo bar"\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_row_attributes_crazy_whitespace
+label:  Parse table row style attributes with different whitespace.
+input:  "{| \t    \n |- \t   name="foo bar"\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t    \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+
+
+---
+
+name:   table_attributes
+label:  Parse table style attributes.
+input:  "{| name="foo bar"\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

From d356a570b32d849ba581a02b77f2aa5b8cdb8ba2 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 15 Jul 2014 14:37:58 -0700
Subject: [PATCH 045/102] Added closing_wiki_markup support to Tag node

Added support for allowing different wiki syntax for replacing the opening
and closing tags. Added for table support.
---
 mwparserfromhell/nodes/tag.py      | 34 +++++++++++++++++++++++++--
 mwparserfromhell/parser/builder.py |  4 +++-
 tests/test_tag.py                  | 18 +++++++++++++++
 tests/tokenizer/tables.mwtest      | 47 ++++++++++++++++++++++----------------
 4 files changed, 80 insertions(+), 23 deletions(-)

diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index 7cbe78d..0fe580f 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -35,7 +35,7 @@ class Tag(Node):
 
     def __init__(self, tag, contents=None, attrs=None, wiki_markup=None,
                  self_closing=False, invalid=False, implicit=False, padding="",
-                 closing_tag=None):
+                 closing_tag=None, closing_wiki_markup=None):
         super(Tag, self).__init__()
         self._tag = tag
         if contents is None and not self_closing:
@@ -44,6 +44,13 @@ class Tag(Node):
             self._contents = contents
         self._attrs = attrs if attrs else []
         self._wiki_markup = wiki_markup
+        if wiki_markup and not self_closing:
+            if closing_wiki_markup:
+                self._closing_wiki_markup = closing_wiki_markup
+            else:
+                self._closing_wiki_markup = wiki_markup
+        else:
+            self._closing_wiki_markup = None
         self._self_closing = self_closing
         self._invalid = invalid
         self._implicit = implicit
@@ -55,10 +62,11 @@ class Tag(Node):
 
     def __unicode__(self):
         if self.wiki_markup:
+            attrs = "".join([str(attr) for attr in self.attributes]) if self.attributes else ""
             if self.self_closing:
                 return self.wiki_markup
             else:
-                return self.wiki_markup + str(self.contents) + self.wiki_markup
+                return self.wiki_markup + attrs + str(self.contents) + self.closing_wiki_markup
 
         result = ("</" if self.invalid else "<") + str(self.tag)
         if self.attributes:
@@ -135,6 +143,19 @@ class Tag(Node):
         return self._wiki_markup
 
     @property
+    def closing_wiki_markup(self):
+        """The wikified version of the closing tag to show instead of HTML.
+
+        If set to a value, this will be displayed instead of the close tag
+        brackets. If tag is :attr:`self_closing` is ``True``, this is set to
+        ``None`` and not displayed. If :attr:`wiki_markup` is set and this has
+        not been set, this is set to the value of :attr:`wiki_markup`. If this
+        has been set and :attr:`wiki_markup` is set to a ``False`` value, this
+        is set to ``None``.
+        """
+        return self._closing_wiki_markup
+
+    @property
     def self_closing(self):
         """Whether the tag is self-closing with no content (like ``<br/>``)."""
         return self._self_closing
@@ -185,10 +206,19 @@ class Tag(Node):
     @wiki_markup.setter
     def wiki_markup(self, value):
         self._wiki_markup = str(value) if value else None
+        if not value or not self.closing_wiki_markup:
+            self.closing_wiki_markup = str(value) if value else None
+
+
+    @closing_wiki_markup.setter
+    def closing_wiki_markup(self, value):
+        self._closing_wiki_markup = str(value) if value and not self.self_closing else None
 
     @self_closing.setter
     def self_closing(self, value):
         self._self_closing = bool(value)
+        if not bool(value):
+            self.closing_wiki_markup = None
 
     @invalid.setter
     def invalid(self, value):
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 2d68036..8d1852e 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -248,6 +248,7 @@ class Builder(object):
         close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
         implicit, attrs, contents, closing_tag = False, [], None, None
         wiki_markup, invalid = token.wiki_markup, token.invalid or False
+        closing_wiki_markup = None
         self._push()
         while self._tokens:
             token = self._tokens.pop()
@@ -258,6 +259,7 @@ class Builder(object):
                 tag = self._pop()
                 self._push()
             elif isinstance(token, tokens.TagOpenClose):
+                closing_wiki_markup = token.wiki_markup
                 contents = self._pop()
                 self._push()
             elif isinstance(token, close_tokens):
@@ -270,7 +272,7 @@ class Builder(object):
                     self_closing = False
                     closing_tag = self._pop()
                 return Tag(tag, contents, attrs, wiki_markup, self_closing,
-                           invalid, implicit, padding, closing_tag)
+                           invalid, implicit, padding, closing_tag, closing_wiki_markup)
             else:
                 self._write(self._handle_token(token))
         raise ParserError("_handle_tag() missed a close token")
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 7577cce..950233f 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -171,6 +171,24 @@ class TestTag(TreeEqualityTestCase):
         self.assertFalse(node.wiki_markup)
         self.assertEqual("<i>italic text</i>", node)
 
+    def test_closing_wiki_markup(self):
+        """test getter/setter behavior for closing_wiki_markup attribute"""
+        node = Tag(wraptext("table"), wraptext("\n"))
+        self.assertIs(None, node.closing_wiki_markup)
+        node.wiki_markup = "{|"
+        self.assertEqual("{|", node.closing_wiki_markup)
+        node.closing_wiki_markup = "|}"
+        self.assertEqual("|}", node.closing_wiki_markup)
+        self.assertEqual("{|\n|}", node)
+        node.wiki_markup = False
+        self.assertFalse(node.closing_wiki_markup)
+        node.self_closing = True
+        node.wiki_markup = "{|"
+        self.assertIs(None, node.closing_wiki_markup)
+        node.wiki_markup = False
+        node.self_closing = False
+        self.assertEqual("<table>\n</table>", node)
+
     def test_self_closing(self):
         """test getter/setter for the self_closing attribute"""
         node = Tag(wraptext("ref"), wraptext("foobar"))
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index fa068fd..bfdd83f 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -1,14 +1,14 @@
 name:   empty_table
 label:  Parsing an empty table.
 input:  "{|\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_table
 label:  Correctly handle tables with close on the same line.
 input:  "{||}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -29,7 +29,7 @@ output: [Text(text="{| | ")]
 name:   leading_whitespace_table
 label:  Handle leading whitespace for a table.
 input:  "foo \n    \t {|\n|}"
-output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -43,112 +43,119 @@ output: [Text(text="foo \n  foo  \t {|\n|}")]
 name:   table_row_simple
 label:  Simple table row.
 input:  "{|\n |- \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_simple
 label:  Simple table cell.
 input:  "{|\n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_inline
 label:  Multiple inline table cells.
 input:  "{|\n | foo || bar || test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_simple
 label:  Simple header cell.
 input:  "{|\n ! foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_inline
 label:  Multiple inline header cells.
 input:  "{|\n ! foo || bar !! test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_text_outside_cell
 label:  Parse text inside table but outside of a cell.
 input:  "{|\n bar \n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_cell_with_leading_characters
 label:  Fail to create a table cell when there are leading non-whitespace characters.
 input:  "{|\n bar | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_row_with_leading_characters
 label:  Fail to create a table row when there are leading non-whitespace characters.
 input:  "{|\n bar |- foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   template_inside_table_cell
 label:  Template within table cell.
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes
 label:  Parse table cell style attributes.
 input:  "{| \n | name="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_quote_with_pipe
 label:  Pipe inside an attribute quote should still be used as a style separator.
 input:  "{| \n | name="foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(), Text(text="bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_name_with_pipe
 label:  Pipe inside an attribute name should still be used as a style separator.
 input:  "{| \n | name|="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_pipe_after_equals
 label:  Pipe inside an attribute should still be used as a style separator after an equals.
 input:  "{| \n | name=|"foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_attributes_templates
+label:  Pipe inside attributes shouldn't be style separator.
+input:  "{| \n | {{comment|template=baz}} | test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes
 label:  Parse table row style attributes.
 input:  "{| \n |- name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes_crazy_whitespace
 label:  Parse table row style attributes with different whitespace.
 input:  "{| \t    \n |- \t   name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t    \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t    \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 
 ---
@@ -156,4 +163,4 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
 name:   table_attributes
 label:  Parse table style attributes.
 input:  "{| name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

From 9e4bb0c7e5b0289bc110cb41619b883b57f55954 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 15 Jul 2014 15:45:53 -0700
Subject: [PATCH 046/102] Clean up and style changes

Added comments, tried to keep to 80 character lines.
---
 mwparserfromhell/parser/contexts.py  | 24 ++++++++++---
 mwparserfromhell/parser/tokenizer.py | 67 +++++++++++++++++++-----------------
 2 files changed, 55 insertions(+), 36 deletions(-)

diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 678a392..564ceca 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -90,6 +90,15 @@ Local (stack-specific) contexts:
     * :const:`FAIL_ON_RBRACE`
     * :const:`FAIL_ON_EQUALS`
 
+* :const:`TABLE`
+
+    * :const:`TABLE_OPEN`
+    * :const:`TABLE_CELL_OPEN`
+    * :const:`TABLE_CELL_STYLE_POSSIBLE`
+    * :const:`TABLE_TD_LINE`
+    * :const:`TABLE_TH_LINE`
+    * :const:`TABLE_CELL_LINE_CONTEXTS`
+
 Global contexts:
 
 * :const:`GL_HEADING`
@@ -156,10 +165,14 @@ SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                 FAIL_ON_RBRACE + FAIL_ON_EQUALS)
 
 TABLE_OPEN =                1 << 30
-TABLE_CELL_LINE =           1 << 31
-TABLE_HEADER_LINE =         1 << 32
-TABLE_CELL_OPEN =           1 << 33
-TABLE_CELL_STYLE_POSSIBLE = 1 << 34
+TABLE_CELL_OPEN =           1 << 31
+TABLE_CELL_STYLE_POSSIBLE = 1 << 32
+TABLE_TD_LINE =             1 << 33
+TABLE_TH_LINE =             1 << 34
+TABLE_CELL_LINE_CONTEXTS = (TABLE_TD_LINE + TABLE_TH_LINE +
+                            TABLE_CELL_STYLE_POSSIBLE)
+TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE_POSSIBLE +
+         TABLE_TD_LINE + TABLE_TH_LINE)
 
 # Global contexts:
 
@@ -167,7 +180,8 @@ GL_HEADING = 1 << 0
 
 # Aggregate contexts:
 
-FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE + TABLE_OPEN
+FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG +
+        STYLE + TABLE_OPEN)
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE +
           TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE)
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index c2d5240..4a9c0f5 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1004,18 +1004,18 @@ class Tokenizer(object):
 
     def _handle_table_start(self):
         """Handle the start of a table."""
-        # TODO - fail all other contexts on start?
         self._head += 2
-        reset = self._head - 1
+        reset = self._head
         style = None
         try:
             self._push(contexts.TABLE_OPEN)
             style = self._parse_as_table_style("\n", break_on_table_end=True)
             if len(style) == 0:
-                self._head = reset + 1
+                self._head = reset
             table = self._parse(contexts.TABLE_OPEN)
         except BadRoute:
-            self._head = reset
+            # offset displacement done by _parse()
+            self._head = reset - 1
             self._emit_text("{|")
         else:
             self._emit(tokens.TagOpenOpen(wiki_markup="{|"))
@@ -1024,16 +1024,22 @@ class Tokenizer(object):
                 self._emit_all(style)
             self._emit(tokens.TagCloseOpen())
             self._emit_all(table)
-            self._emit(tokens.TagOpenClose())
+            self._emit(tokens.TagOpenClose(wiki_markup="|}"))
             self._emit_text("table")
             self._emit(tokens.TagCloseClose())
-            # self._emit_style_tag("table", "{|", table)
 
     def _handle_table_end(self):
+        """Return the stack in order to handle the table end."""
         self._head += 2
         return self._pop()
 
     def _handle_table_row(self):
+        """Parse as style until end of the line, then continue."""
+        if not self._can_recurse():
+            self._emit_text("|-")
+            self._head += 2
+            return
+
         reset = self._head
         self._head += 2
         try:
@@ -1048,22 +1054,20 @@ class Tokenizer(object):
             self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
             self._emit_text("tr")
             if style:
-                # this looks highly suspicious
-                # if type(style[0] == tokens.Text):
-                #     style.pop(0)
                 self._emit_all(style)
             self._emit(tokens.TagCloseSelfclose())
+            # offset displacement done by _parse()
             self._head -= 1
 
     def _handle_table_cell(self, markup, tag, line_context):
-        """Parse as normal syntax unless we hit a style marker, then parse as HTML attributes"""
-        table_context = contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context
+        """Parse as normal syntax unless we hit a style marker, then parse style
+        as HTML attributes and the remainder as normal syntax."""
         if not self._can_recurse():
             self._emit_text(markup)
-            # TODO check if this works
             self._head += len(markup) - 1
             return
 
+        table_context = contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context
         reset = self._head
         self._head += len(markup)
         style = None
@@ -1074,8 +1078,10 @@ class Tokenizer(object):
             raise
         # except for handling cell style
         except StopIteration:
+            self._pop()
             self._head = reset + len(markup)
             try:
+                self._push(table_context)
                 style = self._parse_as_table_style("|")
                 # Don't parse the style separator
                 self._head += 1
@@ -1083,21 +1089,20 @@ class Tokenizer(object):
             except BadRoute:
                 self._head = reset
                 raise
+
         self._emit(tokens.TagOpenOpen(wiki_markup=markup))
         self._emit_text(tag)
         if style:
-            # this looks highly suspicious
-            if type(style[0] == tokens.Text):
-                style.pop(0)
             self._emit_all(style)
         self._emit(tokens.TagCloseSelfclose())
         self._emit_all(cell)
         # keep header/cell line contexts
-        self._context |= cell_context & (contexts.TABLE_HEADER_LINE | contexts.TABLE_CELL_LINE)
+        self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
         # offset displacement done by _parse()
         self._head -= 1
 
     def _parse_as_table_style(self, end_token, break_on_table_end=False):
+        """Parse until ``end_token`` as style attributes for a table."""
         data = _TagOpenData()
         data.context = _TagOpenData.CX_ATTR_READY
         while True:
@@ -1117,7 +1122,6 @@ class Tokenizer(object):
             elif this == end_token and can_exit:
                 if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
                     self._push_tag_buffer(data)
-                # self._head += 1
                 return self._pop()
             elif break_on_table_end and this == "|" and next == "}":
                 return self._pop()
@@ -1130,7 +1134,7 @@ class Tokenizer(object):
         return (self._context, self._pop())
 
     def _handle_cell_style(self):
-        """Pop the cell off the stack and try to parse as style"""
+        """Pop the cell off the stack and try to parse as style."""
         raise StopIteration()
 
     def _verify_safe(self, this):
@@ -1281,7 +1285,10 @@ class Tokenizer(object):
                     self._handle_hr()
             elif this in ("\n", ":") and self._context & contexts.DL_TERM:
                 self._handle_dl_term()
-
+                if this == "\n":
+                    # kill potential table contexts
+                    self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
+            # Start of table parsing
             elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or
                     (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                 if self._can_recurse():
@@ -1293,25 +1300,23 @@ class Tokenizer(object):
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
                     return self._handle_table_end()
-                elif this == "|" and next == "|" and self._context & contexts.TABLE_CELL_LINE:
+                elif this == "|" and next == "|" and self._context & contexts.TABLE_TD_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
-                    self._handle_table_cell("||", "td", contexts.TABLE_CELL_LINE)
-                elif this == "|" and next == "|" and self._context & contexts.TABLE_HEADER_LINE:
+                    self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE)
+                elif this == "|" and next == "|" and self._context & contexts.TABLE_TH_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
-                    self._handle_table_cell("||", "th", contexts.TABLE_HEADER_LINE)
-                elif this == "!" and next == "!" and self._context & contexts.TABLE_HEADER_LINE:
+                    self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE)
+                elif this == "!" and next == "!" and self._context & contexts.TABLE_TH_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
-                    self._handle_table_cell("!!", "th", contexts.TABLE_HEADER_LINE)
+                    self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE)
                 elif this == "|" and self._context & contexts.TABLE_CELL_STYLE_POSSIBLE:
                     self._handle_cell_style()
                 # on newline, clear out cell line contexts
-                elif this == "\n" and self._context & (contexts.TABLE_CELL_LINE | contexts.TABLE_HEADER_LINE | contexts.TABLE_CELL_STYLE_POSSIBLE):
-                    # TODO might not be handled due to DL_TERM code above
-                    # TODO does this even work?
-                    self._context &= (~contexts.TABLE_CELL_LINE & ~contexts.TABLE_HEADER_LINE & ~contexts.TABLE_CELL_STYLE_POSSIBLE)
+                elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS:
+                    self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
                     self._emit_text(this)
                 elif (self._read(-1) in ("\n", self.START) or
                     (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
@@ -1322,11 +1327,11 @@ class Tokenizer(object):
                     elif this == "|":
                         if self._context & contexts.TABLE_CELL_OPEN:
                             return self._handle_table_cell_end()
-                        self._handle_table_cell("|", "td", contexts.TABLE_CELL_LINE)
+                        self._handle_table_cell("|", "td", contexts.TABLE_TD_LINE)
                     elif this == "!":
                         if self._context & contexts.TABLE_CELL_OPEN:
                             return self._handle_table_cell_end()
-                        self._handle_table_cell("!", "th", contexts.TABLE_HEADER_LINE)
+                        self._handle_table_cell("!", "th", contexts.TABLE_TH_LINE)
                     else:
                         self._emit_text(this)
                 else:

From ec080018716f66efdb09332ad6de8bf7b8096e99 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 15 Jul 2014 18:19:48 -0700
Subject: [PATCH 047/102] Tables and rows now use newline as padding

Tables and rows use newlines as padding, partly because these characters
are pretty important to the integrity of the table. They might need
to be in the preceding whitespace of inner tags instead as padding after,
not sure.
---
 mwparserfromhell/nodes/tag.py        | 39 +++++++++++-----------
 mwparserfromhell/parser/builder.py   |  1 +
 mwparserfromhell/parser/tokenizer.py | 32 ++++++++++--------
 tests/test_tag.py                    |  5 ---
 tests/tokenizer/tables.mwtest        | 65 ++++++++++++++++++++++++------------
 5 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index 0fe580f..b3ea85c 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -44,11 +44,10 @@ class Tag(Node):
             self._contents = contents
         self._attrs = attrs if attrs else []
         self._wiki_markup = wiki_markup
-        if wiki_markup and not self_closing:
-            if closing_wiki_markup:
-                self._closing_wiki_markup = closing_wiki_markup
-            else:
-                self._closing_wiki_markup = wiki_markup
+        if closing_wiki_markup:
+            self._closing_wiki_markup = closing_wiki_markup
+        elif wiki_markup and not self_closing:
+            self._closing_wiki_markup = wiki_markup
         else:
             self._closing_wiki_markup = None
         self._self_closing = self_closing
@@ -63,10 +62,12 @@ class Tag(Node):
     def __unicode__(self):
         if self.wiki_markup:
             attrs = "".join([str(attr) for attr in self.attributes]) if self.attributes else ""
+            close = self.closing_wiki_markup if self.closing_wiki_markup else ""
+            padding = self.padding if self.padding else ""
             if self.self_closing:
-                return self.wiki_markup
+                return self.wiki_markup + attrs + close + padding
             else:
-                return self.wiki_markup + attrs + str(self.contents) + self.closing_wiki_markup
+                return self.wiki_markup + attrs + padding + str(self.contents) + close
 
         result = ("</" if self.invalid else "<") + str(self.tag)
         if self.attributes:
@@ -81,10 +82,10 @@ class Tag(Node):
     def __children__(self):
         if not self.wiki_markup:
             yield self.tag
-            for attr in self.attributes:
-                yield attr.name
-                if attr.value is not None:
-                    yield attr.value
+        for attr in self.attributes:
+            yield attr.name
+            if attr.value is not None:
+                yield attr.value
         if self.contents:
             yield self.contents
         if not self.self_closing and not self.wiki_markup and self.closing_tag:
@@ -147,11 +148,12 @@ class Tag(Node):
         """The wikified version of the closing tag to show instead of HTML.
 
         If set to a value, this will be displayed instead of the close tag
-        brackets. If tag is :attr:`self_closing` is ``True``, this is set to
-        ``None`` and not displayed. If :attr:`wiki_markup` is set and this has
-        not been set, this is set to the value of :attr:`wiki_markup`. If this
-        has been set and :attr:`wiki_markup` is set to a ``False`` value, this
-        is set to ``None``.
+        brackets. If tag is :attr:`self_closing` is ``True`` and this is not
+        ``None``, then it becomes the self-closing end tag. If
+        :attr:`wiki_markup` is set and this has not been set, this is set to the
+        value of :attr:`wiki_markup`. If this has been set and
+        :attr:`wiki_markup` is set to a ``False`` value, this is set to
+        ``None``.
         """
         return self._closing_wiki_markup
 
@@ -209,16 +211,13 @@ class Tag(Node):
         if not value or not self.closing_wiki_markup:
             self.closing_wiki_markup = str(value) if value else None
 
-
     @closing_wiki_markup.setter
     def closing_wiki_markup(self, value):
-        self._closing_wiki_markup = str(value) if value and not self.self_closing else None
+        self._closing_wiki_markup = str(value) if value else None
 
     @self_closing.setter
     def self_closing(self, value):
         self._self_closing = bool(value)
-        if not bool(value):
-            self.closing_wiki_markup = None
 
     @invalid.setter
     def invalid(self, value):
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 8d1852e..32cbb93 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -264,6 +264,7 @@ class Builder(object):
                 self._push()
             elif isinstance(token, close_tokens):
                 if isinstance(token, tokens.TagCloseSelfclose):
+                    closing_wiki_markup = token.wiki_markup
                     tag = self._pop()
                     self_closing = True
                     padding = token.padding or ""
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 4a9c0f5..0829e7d 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1009,9 +1009,9 @@ class Tokenizer(object):
         style = None
         try:
             self._push(contexts.TABLE_OPEN)
-            style = self._parse_as_table_style("\n", break_on_table_end=True)
-            if len(style) == 0:
-                self._head = reset
+            (style, padding) = self._parse_as_table_style("\n", break_on_table_end=True)
+            # Have to do this in the case of inline tables
+            self._head += 1 if "\n" in padding else 0
             table = self._parse(contexts.TABLE_OPEN)
         except BadRoute:
             # offset displacement done by _parse()
@@ -1022,7 +1022,7 @@ class Tokenizer(object):
             self._emit_text("table")
             if style:
                 self._emit_all(style)
-            self._emit(tokens.TagCloseOpen())
+            self._emit(tokens.TagCloseOpen(padding=padding))
             self._emit_all(table)
             self._emit(tokens.TagOpenClose(wiki_markup="|}"))
             self._emit_text("table")
@@ -1044,9 +1044,7 @@ class Tokenizer(object):
         self._head += 2
         try:
             self._push(contexts.TABLE_OPEN)
-            style = self._parse_as_table_style("\n")
-            if len(style) == 0:
-                self._head = reset + 2
+            (style, padding) = self._parse_as_table_style("\n")
         except BadRoute:
             self._head = reset
             raise
@@ -1055,9 +1053,7 @@ class Tokenizer(object):
             self._emit_text("tr")
             if style:
                 self._emit_all(style)
-            self._emit(tokens.TagCloseSelfclose())
-            # offset displacement done by _parse()
-            self._head -= 1
+            self._emit(tokens.TagCloseSelfclose(padding=padding))
 
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse style
@@ -1082,7 +1078,7 @@ class Tokenizer(object):
             self._head = reset + len(markup)
             try:
                 self._push(table_context)
-                style = self._parse_as_table_style("|")
+                (style, padding) = self._parse_as_table_style("|")
                 # Don't parse the style separator
                 self._head += 1
                 (cell_context, cell) = self._parse(table_context)
@@ -1094,7 +1090,9 @@ class Tokenizer(object):
         self._emit_text(tag)
         if style:
             self._emit_all(style)
-        self._emit(tokens.TagCloseSelfclose())
+            self._emit(tokens.TagCloseSelfclose(wiki_markup="|"))
+        else:
+            self._emit(tokens.TagCloseSelfclose())
         self._emit_all(cell)
         # keep header/cell line contexts
         self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
@@ -1122,9 +1120,15 @@ class Tokenizer(object):
             elif this == end_token and can_exit:
                 if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
                     self._push_tag_buffer(data)
-                return self._pop()
+                if this.isspace():
+                    data.padding_buffer["first"] += this
+                return (self._pop(), data.padding_buffer["first"])
             elif break_on_table_end and this == "|" and next == "}":
-                return self._pop()
+                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
+                    self._push_tag_buffer(data)
+                if this.isspace():
+                    data.padding_buffer["first"] += this
+                return (self._pop(), data.padding_buffer["first"])
             else:
                 self._handle_tag_data(data, this)
             self._head += 1
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 950233f..2d67723 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -182,11 +182,6 @@ class TestTag(TreeEqualityTestCase):
         self.assertEqual("{|\n|}", node)
         node.wiki_markup = False
         self.assertFalse(node.closing_wiki_markup)
-        node.self_closing = True
-        node.wiki_markup = "{|"
-        self.assertIs(None, node.closing_wiki_markup)
-        node.wiki_markup = False
-        node.self_closing = False
         self.assertEqual("<table>\n</table>", node)
 
     def test_self_closing(self):
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index bfdd83f..7cf826c 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -1,14 +1,14 @@
 name:   empty_table
 label:  Parsing an empty table.
 input:  "{|\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_table
 label:  Correctly handle tables with close on the same line.
 input:  "{||}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=""), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -29,7 +29,7 @@ output: [Text(text="{| | ")]
 name:   leading_whitespace_table
 label:  Handle leading whitespace for a table.
 input:  "foo \n    \t {|\n|}"
-output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -43,119 +43,133 @@ output: [Text(text="foo \n  foo  \t {|\n|}")]
 name:   table_row_simple
 label:  Simple table row.
 input:  "{|\n |- \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_simple
 label:  Simple table cell.
 input:  "{|\n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_inline
 label:  Multiple inline table cells.
 input:  "{|\n | foo || bar || test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_simple
 label:  Simple header cell.
 input:  "{|\n ! foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_inline
 label:  Multiple inline header cells.
 input:  "{|\n ! foo || bar !! test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_text_outside_cell
 label:  Parse text inside table but outside of a cell.
 input:  "{|\n bar \n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_cell_with_leading_characters
 label:  Fail to create a table cell when there are leading non-whitespace characters.
 input:  "{|\n bar | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_row_with_leading_characters
 label:  Fail to create a table row when there are leading non-whitespace characters.
 input:  "{|\n bar |- foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   template_inside_table_cell
 label:  Template within table cell.
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes
 label:  Parse table cell style attributes.
 input:  "{| \n | name="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|"), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_quote_with_pipe
 label:  Pipe inside an attribute quote should still be used as a style separator.
 input:  "{| \n | name="foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|"), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_name_with_pipe
 label:  Pipe inside an attribute name should still be used as a style separator.
 input:  "{| \n | name|="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text="" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(wiki_markup="|"), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_pipe_after_equals
 label:  Pipe inside an attribute should still be used as a style separator after an equals.
 input:  "{| \n | name=|"foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(wiki_markup="|"), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_templates
 label:  Pipe inside attributes shouldn't be style separator.
 input:  "{| \n | {{comment|template=baz}} | test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(wiki_markup="|"), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   header_cell_attributes
+label:  Parse header cell style attributes.
+input:  "{| \n ! name="foo bar"| test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|"), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   inline_cell_attributes
+label:  Parse cell style attributes of inline cells.
+input:  "{| \n ! name="foo bar" | test ||color="red"| markup!!foo | time \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|"), Text(text=" test "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseSelfclose(wiki_markup="|"), Text(text=" markup"), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|"), Text(text=" time \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes
 label:  Parse table row style attributes.
 input:  "{| \n |- name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes_crazy_whitespace
 label:  Parse table row style attributes with different whitespace.
-input:  "{| \t    \n |- \t   name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t    \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+input:  "{| \t    \n |- \t   name="foo bar"  \t \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="  \t \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 
 ---
@@ -163,4 +177,11 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
 name:   table_attributes
 label:  Parse table style attributes.
 input:  "{| name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   inline_table_attributes
+label:  Correctly handle attributes in inline tables.
+input:  "{| foo="tee bar" |}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="tee bar"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

From f1664a8d67d7544d6524bd8de3ab3e554247bc2e Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 10:00:58 -0700
Subject: [PATCH 048/102] Updated row and table handling

Changed row recursion handling to make sure the tag is emitted even
when hitting recursion limits. Need to test table recursion to make
sure that works. Also fixed a bug in which tables were eating the
trailing token. Added several tests for rows and trailing tokens with
tables.
---
 mwparserfromhell/parser/tokenizer.py | 33 ++++++++++++++++-----------------
 tests/tokenizer/tables.mwtest        | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 0829e7d..787ea0a 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1027,6 +1027,8 @@ class Tokenizer(object):
             self._emit(tokens.TagOpenClose(wiki_markup="|}"))
             self._emit_text("table")
             self._emit(tokens.TagCloseClose())
+            # offset displacement done by _parse()
+            self._head -= 1
 
     def _handle_table_end(self):
         """Return the stack in order to handle the table end."""
@@ -1035,25 +1037,22 @@ class Tokenizer(object):
 
     def _handle_table_row(self):
         """Parse as style until end of the line, then continue."""
-        if not self._can_recurse():
-            self._emit_text("|-")
-            self._head += 2
-            return
-
         reset = self._head
         self._head += 2
-        try:
-            self._push(contexts.TABLE_OPEN)
-            (style, padding) = self._parse_as_table_style("\n")
-        except BadRoute:
-            self._head = reset
-            raise
-        else:
-            self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
-            self._emit_text("tr")
-            if style:
-                self._emit_all(style)
-            self._emit(tokens.TagCloseSelfclose(padding=padding))
+        style, padding = None, ""
+        # If we can't recurse, still tokenize tag but parse style attrs as text
+        if self._can_recurse():
+            try:
+                self._push(contexts.TABLE_OPEN)
+                (style, padding) = self._parse_as_table_style("\n")
+            except BadRoute:
+                self._head = reset
+                raise
+        self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
+        self._emit_text("tr")
+        if style:
+            self._emit_all(style)
+        self._emit(tokens.TagCloseSelfclose(padding=padding))
 
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse style
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 7cf826c..2770227 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -26,6 +26,13 @@ output: [Text(text="{| | ")]
 
 ---
 
+name:   no_table_close_inside_row
+label:  Handle case when there is no table close while inside of a row.
+input:  "{| |- "
+output: [Text(text="{| |- ")]
+
+---
+
 name:   leading_whitespace_table
 label:  Handle leading whitespace for a table.
 input:  "foo \n    \t {|\n|}"
@@ -33,6 +40,27 @@ output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="t
 
 ---
 
+name:   whitespace_after_table
+label:  Handle whitespace after a table close.
+input:  "{|\n|}\n    \t "
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text="\n    \t ")]
+
+---
+
+name:   different_whitespace_after_table
+label:  Handle spaces after a table close.
+input:  "{|\n|} \n  "
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n  ")]
+
+---
+
+name:   characters_after_table
+label:  Handle characters after a table close.
+input:  "{|\n|} tsta"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")]
+
+---
+
 name:   leading_characters_table
 label:  Don't parse as a table when leading characters are not newline or whitespace.
 input:  "foo \n  foo  \t {|\n|}"
@@ -47,6 +75,13 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 
 ---
 
+name:   table_row_multiple
+label:  Simple table row.
+input:  "{|\n |- \n|- \n   |-\n |}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), Text(text="   "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
 name:   table_cell_simple
 label:  Simple table cell.
 input:  "{|\n | foo \n|}"
@@ -171,7 +206,6 @@ label:  Parse table row style attributes with different whitespace.
 input:  "{| \t    \n |- \t   name="foo bar"  \t \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="  \t \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
-
 ---
 
 name:   table_attributes

From 842af20c38c65188061811959eac8b6e263fd1f2 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 12:23:38 -0700
Subject: [PATCH 049/102] fixed hacky table cell style exception, added tests

Removed the `StopIteration()` exception for handling table style
and instead call `_handle_table_cell_end()` with a new parameter.
Also added some random tests for table openings.
---
 mwparserfromhell/parser/tokenizer.py | 22 ++++++++--------------
 tests/tokenizer/tables.mwtest        | 28 ++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 787ea0a..0de2831 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1067,24 +1067,21 @@ class Tokenizer(object):
         self._head += len(markup)
         style = None
         try:
-            (cell_context, cell) = self._parse(table_context | contexts.TABLE_CELL_STYLE_POSSIBLE)
+            cell_context, cell, reset_for_style = self._parse(table_context | contexts.TABLE_CELL_STYLE_POSSIBLE)
         except BadRoute:
             self._head = reset
             raise
-        # except for handling cell style
-        except StopIteration:
-            self._pop()
+        if reset_for_style:
             self._head = reset + len(markup)
             try:
                 self._push(table_context)
                 (style, padding) = self._parse_as_table_style("|")
                 # Don't parse the style separator
                 self._head += 1
-                (cell_context, cell) = self._parse(table_context)
+                cell_context, cell, reset_for_style = self._parse(table_context)
             except BadRoute:
                 self._head = reset
                 raise
-
         self._emit(tokens.TagOpenOpen(wiki_markup=markup))
         self._emit_text(tag)
         if style:
@@ -1132,13 +1129,10 @@ class Tokenizer(object):
                 self._handle_tag_data(data, this)
             self._head += 1
 
-    def _handle_table_cell_end(self):
-        """Returns the context and stack in a tuple."""
-        return (self._context, self._pop())
-
-    def _handle_cell_style(self):
-        """Pop the cell off the stack and try to parse as style."""
-        raise StopIteration()
+    def _handle_table_cell_end(self, reset_for_style=False):
+        """Returns the context, stack, and whether to reset the cell for style
+        in a tuple."""
+        return self._context, self._pop(), reset_for_style
 
     def _verify_safe(self, this):
         """Make sure we are not trying to write an invalid character."""
@@ -1316,7 +1310,7 @@ class Tokenizer(object):
                         return self._handle_table_cell_end()
                     self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE)
                 elif this == "|" and self._context & contexts.TABLE_CELL_STYLE_POSSIBLE:
-                    self._handle_cell_style()
+                    return self._handle_table_cell_end(reset_for_style=True)
                 # on newline, clear out cell line contexts
                 elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS:
                     self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 2770227..184e695 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -33,6 +33,34 @@ output: [Text(text="{| |- ")]
 
 ---
 
+name:   no_table_close_attributes
+label:  Don't parse attributes as attributes if the table doesn't exist.
+input:  "{| border="1""
+output: [Text(text="{| border=\"1\"")]
+
+---
+
+name:   no_table_close_row_attributes
+label:  Don't parse row attributes as attributes if the table doesn't exist.
+input:  "{| |- border="1""
+output: [Text(text="{| |- border=\"1\"")]
+
+---
+
+name:   no_table_close_cell
+label:  Don't parse cells if the table doesn't close.
+input:  "{| | border="1"| test || red | foo"
+output: [Text(text="{| | border=\"1\"| test || red | foo")]
+
+---
+
+name:   crazy_no_table_close
+label:  Lost of opened wiki syntax without closes.
+input:  "{{{ {{ {| <ref"
+output: [Text(text="{{{ {{ {| <ref")]
+
+---
+
 name:   leading_whitespace_table
 label:  Handle leading whitespace for a table.
 input:  "foo \n    \t {|\n|}"

From ddaa3ec92a2cab2e280eed0afcacbe3e4d6c0d21 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 12:28:40 -0700
Subject: [PATCH 050/102] Reorder table tokenizer methods for forward
 declaration

Make sure py tokenizer methods only call methods that have been declared
earlier. Not necessary but makes it much easier to maintain/write
the C tokenizer if methods are in the same order.
---
 mwparserfromhell/parser/tokenizer.py | 68 ++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 0de2831..db4a8cf 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1002,6 +1002,40 @@ class Tokenizer(object):
             self._fail_route()
         return self._pop()
 
+     def _parse_as_table_style(self, end_token, break_on_table_end=False):
+        """Parse until ``end_token`` as style attributes for a table."""
+        data = _TagOpenData()
+        data.context = _TagOpenData.CX_ATTR_READY
+        while True:
+            this, next = self._read(), self._read(1)
+            can_exit = (not data.context & (data.CX_NAME) or
+                        data.context & data.CX_NOTE_SPACE)
+            if this is self.END:
+                if self._context & contexts.TAG_ATTR:
+                    if data.context & data.CX_QUOTED:
+                        # Unclosed attribute quote: reset, don't die
+                        data.context = data.CX_ATTR_VALUE
+                        self._pop()
+                        self._head = data.reset
+                        continue
+                    self._pop()
+                self._fail_route()
+            elif this == end_token and can_exit:
+                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
+                    self._push_tag_buffer(data)
+                if this.isspace():
+                    data.padding_buffer["first"] += this
+                return (self._pop(), data.padding_buffer["first"])
+            elif break_on_table_end and this == "|" and next == "}":
+                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
+                    self._push_tag_buffer(data)
+                if this.isspace():
+                    data.padding_buffer["first"] += this
+                return (self._pop(), data.padding_buffer["first"])
+            else:
+                self._handle_tag_data(data, this)
+            self._head += 1
+
     def _handle_table_start(self):
         """Handle the start of a table."""
         self._head += 2
@@ -1095,40 +1129,6 @@ class Tokenizer(object):
         # offset displacement done by _parse()
         self._head -= 1
 
-    def _parse_as_table_style(self, end_token, break_on_table_end=False):
-        """Parse until ``end_token`` as style attributes for a table."""
-        data = _TagOpenData()
-        data.context = _TagOpenData.CX_ATTR_READY
-        while True:
-            this, next = self._read(), self._read(1)
-            can_exit = (not data.context & (data.CX_NAME) or
-                        data.context & data.CX_NOTE_SPACE)
-            if this is self.END:
-                if self._context & contexts.TAG_ATTR:
-                    if data.context & data.CX_QUOTED:
-                        # Unclosed attribute quote: reset, don't die
-                        data.context = data.CX_ATTR_VALUE
-                        self._pop()
-                        self._head = data.reset
-                        continue
-                    self._pop()
-                self._fail_route()
-            elif this == end_token and can_exit:
-                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
-                    self._push_tag_buffer(data)
-                if this.isspace():
-                    data.padding_buffer["first"] += this
-                return (self._pop(), data.padding_buffer["first"])
-            elif break_on_table_end and this == "|" and next == "}":
-                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
-                    self._push_tag_buffer(data)
-                if this.isspace():
-                    data.padding_buffer["first"] += this
-                return (self._pop(), data.padding_buffer["first"])
-            else:
-                self._handle_tag_data(data, this)
-            self._head += 1
-
     def _handle_table_cell_end(self, reset_for_style=False):
         """Returns the context, stack, and whether to reset the cell for style
         in a tuple."""

From 457b2240457a7ed256c7bdf290d9672a4575f435 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 13:07:11 -0700
Subject: [PATCH 051/102] Add padding to table cell tags

Padding now included on all wiki table cells. With wiki table cells
that include attributes, `wiki_markup` is also included (unchanged).
---
 mwparserfromhell/parser/tokenizer.py | 12 +++++-----
 tests/tokenizer/tables.mwtest        | 44 ++++++++++++++++++++++++------------
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index db4a8cf..c404ebb 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1002,7 +1002,7 @@ class Tokenizer(object):
             self._fail_route()
         return self._pop()
 
-     def _parse_as_table_style(self, end_token, break_on_table_end=False):
+    def _parse_as_table_style(self, end_token, break_on_table_end=False):
         """Parse until ``end_token`` as style attributes for a table."""
         data = _TagOpenData()
         data.context = _TagOpenData.CX_ATTR_READY
@@ -1099,7 +1099,7 @@ class Tokenizer(object):
         table_context = contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context
         reset = self._head
         self._head += len(markup)
-        style = None
+        rest_for_style, padding = False, ""
         try:
             cell_context, cell, reset_for_style = self._parse(table_context | contexts.TABLE_CELL_STYLE_POSSIBLE)
         except BadRoute:
@@ -1112,17 +1112,17 @@ class Tokenizer(object):
                 (style, padding) = self._parse_as_table_style("|")
                 # Don't parse the style separator
                 self._head += 1
-                cell_context, cell, reset_for_style = self._parse(table_context)
+                cell_context, cell, unused = self._parse(table_context)
             except BadRoute:
                 self._head = reset
                 raise
         self._emit(tokens.TagOpenOpen(wiki_markup=markup))
         self._emit_text(tag)
-        if style:
+        if reset_for_style:
             self._emit_all(style)
-            self._emit(tokens.TagCloseSelfclose(wiki_markup="|"))
+            self._emit(tokens.TagCloseSelfclose(wiki_markup="|", padding=padding))
         else:
-            self._emit(tokens.TagCloseSelfclose())
+            self._emit(tokens.TagCloseSelfclose(padding=padding))
         self._emit_all(cell)
         # keep header/cell line contexts
         self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 184e695..3f3a68d 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -113,42 +113,42 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   table_cell_simple
 label:  Simple table cell.
 input:  "{|\n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_inline
 label:  Multiple inline table cells.
 input:  "{|\n | foo || bar || test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_simple
 label:  Simple header cell.
 input:  "{|\n ! foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_inline
 label:  Multiple inline header cells.
 input:  "{|\n ! foo || bar !! test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_text_outside_cell
 label:  Parse text inside table but outside of a cell.
 input:  "{|\n bar \n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -169,56 +169,70 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   template_inside_table_cell
 label:  Template within table cell.
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes
 label:  Parse table cell style attributes.
 input:  "{| \n | name="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|"), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_empty_attributes
+label:  Parse table cell with style markers but no attributes.
+input:  "{| \n | | test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_with_dash
+label:  Parse a situation in which a cell line looks like a row line.
+input:  "{|\n ||- \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="- \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_quote_with_pipe
 label:  Pipe inside an attribute quote should still be used as a style separator.
 input:  "{| \n | name="foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|"), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_name_with_pipe
 label:  Pipe inside an attribute name should still be used as a style separator.
-input:  "{| \n | name|="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text="" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(wiki_markup="|"), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+input:  "{| \n | name|="foo bar" | test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text="" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_pipe_after_equals
 label:  Pipe inside an attribute should still be used as a style separator after an equals.
 input:  "{| \n | name=|"foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(wiki_markup="|"), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_templates
 label:  Pipe inside attributes shouldn't be style separator.
 input:  "{| \n | {{comment|template=baz}} | test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(wiki_markup="|"), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   header_cell_attributes
 label:  Parse header cell style attributes.
 input:  "{| \n ! name="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|"), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_cell_attributes
 label:  Parse cell style attributes of inline cells.
 input:  "{| \n ! name="foo bar" | test ||color="red"| markup!!foo | time \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|"), Text(text=" test "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseSelfclose(wiki_markup="|"), Text(text=" markup"), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|"), Text(text=" time \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text=" test "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" markup"), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" time \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 

From 8b5d6f9a3b8892ee9b05e0cf0025475e14f814e0 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 14:31:40 -0700
Subject: [PATCH 052/102] Changes to table close handling

Fix problem in which fake table closes were causing a problem inside
cells. Changed inline table handling to fix this.
---
 mwparserfromhell/parser/tokenizer.py | 29 ++++++++++++++++-------------
 tests/tokenizer/tables.mwtest        | 28 ++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index c404ebb..b70e932 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1029,8 +1029,6 @@ class Tokenizer(object):
             elif break_on_table_end and this == "|" and next == "}":
                 if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
                     self._push_tag_buffer(data)
-                if this.isspace():
-                    data.padding_buffer["first"] += this
                 return (self._pop(), data.padding_buffer["first"])
             else:
                 self._handle_tag_data(data, this)
@@ -1040,13 +1038,17 @@ class Tokenizer(object):
         """Handle the start of a table."""
         self._head += 2
         reset = self._head
-        style = None
+        style, table = None, None
         try:
             self._push(contexts.TABLE_OPEN)
             (style, padding) = self._parse_as_table_style("\n", break_on_table_end=True)
-            # Have to do this in the case of inline tables
-            self._head += 1 if "\n" in padding else 0
-            table = self._parse(contexts.TABLE_OPEN)
+            # continue to parse if it is NOT an inline table
+            if "\n" in padding:
+                self._head += 1
+                table = self._parse(contexts.TABLE_OPEN)
+            else:
+                # close tag
+                self._head += 2
         except BadRoute:
             # offset displacement done by _parse()
             self._head = reset - 1
@@ -1057,7 +1059,8 @@ class Tokenizer(object):
             if style:
                 self._emit_all(style)
             self._emit(tokens.TagCloseOpen(padding=padding))
-            self._emit_all(table)
+            if table:
+                self._emit_all(table)
             self._emit(tokens.TagOpenClose(wiki_markup="|}"))
             self._emit_text("table")
             self._emit(tokens.TagCloseClose())
@@ -1293,11 +1296,7 @@ class Tokenizer(object):
                 else:
                     self._emit_text("{|")
             elif self._context & contexts.TABLE_OPEN:
-                if this == "|" and next == "}":
-                    if self._context & contexts.TABLE_CELL_OPEN:
-                        return self._handle_table_cell_end()
-                    return self._handle_table_end()
-                elif this == "|" and next == "|" and self._context & contexts.TABLE_TD_LINE:
+                if this == "|" and next == "|" and self._context & contexts.TABLE_TD_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
                     self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE)
@@ -1317,7 +1316,11 @@ class Tokenizer(object):
                     self._emit_text(this)
                 elif (self._read(-1) in ("\n", self.START) or
                     (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
-                    if this == "|" and next == "-":
+                    if this == "|" and next == "}":
+                        if self._context & contexts.TABLE_CELL_OPEN:
+                            return self._handle_table_cell_end()
+                        return self._handle_table_end()
+                    elif this == "|" and next == "-":
                         if self._context & contexts.TABLE_CELL_OPEN:
                             return self._handle_table_cell_end()
                         self._handle_table_row()
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 3f3a68d..e63bd11 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -89,6 +89,13 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 
 ---
 
+name:   characters_after_inline_table
+label:  Handle characters after an inline table close.
+input:  "{| |} tsta"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")]
+
+---
+
 name:   leading_characters_table
 label:  Don't parse as a table when leading characters are not newline or whitespace.
 input:  "foo \n  foo  \t {|\n|}"
@@ -124,6 +131,27 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 
 ---
 
+name:   table_cell_fake_close
+label:  Looks like a table close but is not.
+input:  "{|\n | |} \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text="} \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_more_fake_close
+label:  Looks like a table close but is not.
+input:  "{|\n || |} \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" |} \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_extra_close
+label:  Process second close as text.
+input:  "{| \n |} \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n|}")]
+
+---
+
 name:   table_header_simple
 label:  Simple header cell.
 input:  "{|\n ! foo \n|}"

From 151a73e4371c26dea5b20169a3acd26ca3f7f711 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 15:03:26 -0700
Subject: [PATCH 053/102] Fix issue with incorrect table attributes

Fix problem in which invalid table attributes were being parsed
incorrectly. Added tests.
---
 mwparserfromhell/parser/tokenizer.py | 21 +++++++++------------
 tests/tokenizer/tables.mwtest        | 35 ++++++++++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index b70e932..7bfd11a 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1008,9 +1008,16 @@ class Tokenizer(object):
         data.context = _TagOpenData.CX_ATTR_READY
         while True:
             this, next = self._read(), self._read(1)
-            can_exit = (not data.context & (data.CX_NAME) or
+            table_end = break_on_table_end and this == "|" and next == "}"
+            can_exit = (not data.context & data.CX_QUOTED or
                         data.context & data.CX_NOTE_SPACE)
-            if this is self.END:
+            if (this == end_token and can_exit) or table_end:
+                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
+                    self._push_tag_buffer(data)
+                if this.isspace():
+                    data.padding_buffer["first"] += this
+                return (self._pop(), data.padding_buffer["first"])
+            elif this is self.END or table_end or this == end_token:
                 if self._context & contexts.TAG_ATTR:
                     if data.context & data.CX_QUOTED:
                         # Unclosed attribute quote: reset, don't die
@@ -1020,16 +1027,6 @@ class Tokenizer(object):
                         continue
                     self._pop()
                 self._fail_route()
-            elif this == end_token and can_exit:
-                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
-                    self._push_tag_buffer(data)
-                if this.isspace():
-                    data.padding_buffer["first"] += this
-                return (self._pop(), data.padding_buffer["first"])
-            elif break_on_table_end and this == "|" and next == "}":
-                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
-                    self._push_tag_buffer(data)
-                return (self._pop(), data.padding_buffer["first"])
             else:
                 self._handle_tag_data(data, this)
             self._head += 1
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index e63bd11..163579b 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -225,14 +225,14 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   table_cell_attributes_quote_with_pipe
 label:  Pipe inside an attribute quote should still be used as a style separator.
 input:  "{| \n | name="foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_name_with_pipe
 label:  Pipe inside an attribute name should still be used as a style separator.
 input:  "{| \n | name|="foo bar" | test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text="" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="=\"foo bar\" | test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -274,7 +274,7 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   table_row_attributes_crazy_whitespace
 label:  Parse table row style attributes with different whitespace.
 input:  "{| \t    \n |- \t   name="foo bar"  \t \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="  \t \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="  \t \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -289,3 +289,32 @@ name:   inline_table_attributes
 label:  Correctly handle attributes in inline tables.
 input:  "{| foo="tee bar" |}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="tee bar"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_incorrect_attributes
+label:  Parse incorrect table style attributes.
+input:  "{| name="foo\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_cell_unclosed_style
+label:  Parse unclosed and closed bold and italics inside cells.
+input:  "{|\n | ''foo || '''bar ||''baz''||'''test'''\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" ''foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" '''bar "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'"), Text(text="i"), TagCloseOpen(), Text(text="baz"), TagOpenClose(), Text(text="i"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="text"), TagOpenClose(), Text(text="b"), TagCloseClose() Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+
+---
+
+name:   recursion_five_hundred_opens
+label:  test potentially dangerous recursion: five hundred table openings, without spaces
+input:  "{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|"
+output: [Text(text="{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|")]
+
+---
+
+name:   recursion_one_hundred_opens
+label:  test potentially dangerous recursion: one hundred table openings, with spaces
+input:  "{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|"
+output: [Text(text="{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|")]
\ No newline at end of file

From e6ec5dc4de743f62889c65272448bdb1041fea29 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Wed, 16 Jul 2014 18:11:12 -0700
Subject: [PATCH 054/102] Refactor methods to avoid returning tuples

Various changes to avoid returning tuples - working on the C tokenizer
made me realize this was a bad idea for compatability/similarity between
the two.
---
 mwparserfromhell/parser/contexts.py  | 17 ++++++++---------
 mwparserfromhell/parser/tokenizer.py | 30 +++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 564ceca..3827708 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -164,15 +164,14 @@ FAIL_ON_EQUALS = 1 << 29
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                 FAIL_ON_RBRACE + FAIL_ON_EQUALS)
 
-TABLE_OPEN =                1 << 30
-TABLE_CELL_OPEN =           1 << 31
-TABLE_CELL_STYLE_POSSIBLE = 1 << 32
-TABLE_TD_LINE =             1 << 33
-TABLE_TH_LINE =             1 << 34
-TABLE_CELL_LINE_CONTEXTS = (TABLE_TD_LINE + TABLE_TH_LINE +
-                            TABLE_CELL_STYLE_POSSIBLE)
-TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE_POSSIBLE +
-         TABLE_TD_LINE + TABLE_TH_LINE)
+TABLE_OPEN =       1 << 30
+TABLE_CELL_OPEN =  1 << 31
+TABLE_CELL_STYLE = 1 << 32
+TABLE_TD_LINE =    1 << 33
+TABLE_TH_LINE =    1 << 34
+TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
+TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_TD_LINE +
+         TABLE_TH_LINE)
 
 # Global contexts:
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 7bfd11a..7fda2d5 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1016,7 +1016,7 @@ class Tokenizer(object):
                     self._push_tag_buffer(data)
                 if this.isspace():
                     data.padding_buffer["first"] += this
-                return (self._pop(), data.padding_buffer["first"])
+                return data.padding_buffer["first"]
             elif this is self.END or table_end or this == end_token:
                 if self._context & contexts.TAG_ATTR:
                     if data.context & data.CX_QUOTED:
@@ -1038,7 +1038,8 @@ class Tokenizer(object):
         style, table = None, None
         try:
             self._push(contexts.TABLE_OPEN)
-            (style, padding) = self._parse_as_table_style("\n", break_on_table_end=True)
+            padding = self._parse_as_table_style("\n", break_on_table_end=True)
+            style = self._pop()
             # continue to parse if it is NOT an inline table
             if "\n" in padding:
                 self._head += 1
@@ -1078,7 +1079,8 @@ class Tokenizer(object):
         if self._can_recurse():
             try:
                 self._push(contexts.TABLE_OPEN)
-                (style, padding) = self._parse_as_table_style("\n")
+                padding = self._parse_as_table_style("\n")
+                style = self._pop()
             except BadRoute:
                 self._head = reset
                 raise
@@ -1099,9 +1101,11 @@ class Tokenizer(object):
         table_context = contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context
         reset = self._head
         self._head += len(markup)
-        rest_for_style, padding = False, ""
+        reset_for_style, padding = False, ""
         try:
-            cell_context, cell, reset_for_style = self._parse(table_context | contexts.TABLE_CELL_STYLE_POSSIBLE)
+            cell_context = self._parse(table_context | contexts.TABLE_CELL_STYLE)
+            cell = self._pop()
+            reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
         except BadRoute:
             self._head = reset
             raise
@@ -1109,10 +1113,12 @@ class Tokenizer(object):
             self._head = reset + len(markup)
             try:
                 self._push(table_context)
-                (style, padding) = self._parse_as_table_style("|")
+                padding = self._parse_as_table_style("|")
+                style = self._pop()
                 # Don't parse the style separator
                 self._head += 1
-                cell_context, cell, unused = self._parse(table_context)
+                cell_context = self._parse(table_context)
+                cell = self._pop()
             except BadRoute:
                 self._head = reset
                 raise
@@ -1130,9 +1136,11 @@ class Tokenizer(object):
         self._head -= 1
 
     def _handle_table_cell_end(self, reset_for_style=False):
-        """Returns the context, stack, and whether to reset the cell for style
-        in a tuple."""
-        return self._context, self._pop(), reset_for_style
+        """Returns the current context, with the TABLE_CELL_STYLE flag set if
+        it is necessary to reset and parse style attributes."""
+        if reset_for_style:
+            return self._context | contexts.TABLE_CELL_STYLE
+        return self._context & ~contexts.TABLE_CELL_STYLE
 
     def _verify_safe(self, this):
         """Make sure we are not trying to write an invalid character."""
@@ -1305,7 +1313,7 @@ class Tokenizer(object):
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
                     self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE)
-                elif this == "|" and self._context & contexts.TABLE_CELL_STYLE_POSSIBLE:
+                elif this == "|" and self._context & contexts.TABLE_CELL_STYLE:
                     return self._handle_table_cell_end(reset_for_style=True)
                 # on newline, clear out cell line contexts
                 elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS:

From 406dd3a157e72d3f37e80661cebc65cc544a321f Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Thu, 17 Jul 2014 16:07:43 -0700
Subject: [PATCH 055/102] All tokenizer end methods return a stack

For C compatability, switch table cell end to return the stack.
Now context is kept by using `keep_context` when calling `self._pop()`.
---
 mwparserfromhell/parser/contexts.py  |  4 ++--
 mwparserfromhell/parser/tokenizer.py | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 3827708..6dd5319 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -94,7 +94,7 @@ Local (stack-specific) contexts:
 
     * :const:`TABLE_OPEN`
     * :const:`TABLE_CELL_OPEN`
-    * :const:`TABLE_CELL_STYLE_POSSIBLE`
+    * :const:`TABLE_CELL_STYLE`
     * :const:`TABLE_TD_LINE`
     * :const:`TABLE_TH_LINE`
     * :const:`TABLE_CELL_LINE_CONTEXTS`
@@ -180,7 +180,7 @@ GL_HEADING = 1 << 0
 # Aggregate contexts:
 
 FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG +
-        STYLE + TABLE_OPEN)
+        STYLE + TABLE)
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE +
           TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE)
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 7fda2d5..9e22b28 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1098,13 +1098,14 @@ class Tokenizer(object):
             self._head += len(markup) - 1
             return
 
-        table_context = contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context
+        old_context = self._context
         reset = self._head
         self._head += len(markup)
         reset_for_style, padding = False, ""
         try:
-            cell_context = self._parse(table_context | contexts.TABLE_CELL_STYLE)
-            cell = self._pop()
+            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
+            cell_context = self._context
+            self._context = old_context
             reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
         except BadRoute:
             self._head = reset
@@ -1112,13 +1113,14 @@ class Tokenizer(object):
         if reset_for_style:
             self._head = reset + len(markup)
             try:
-                self._push(table_context)
+                self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
                 padding = self._parse_as_table_style("|")
                 style = self._pop()
                 # Don't parse the style separator
                 self._head += 1
-                cell_context = self._parse(table_context)
-                cell = self._pop()
+                cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
+                cell_context = self._context
+                self._context = old_context
             except BadRoute:
                 self._head = reset
                 raise
@@ -1139,8 +1141,10 @@ class Tokenizer(object):
         """Returns the current context, with the TABLE_CELL_STYLE flag set if
         it is necessary to reset and parse style attributes."""
         if reset_for_style:
-            return self._context | contexts.TABLE_CELL_STYLE
-        return self._context & ~contexts.TABLE_CELL_STYLE
+            self._context |= contexts.TABLE_CELL_STYLE
+        else:
+            self._context &= ~contexts.TABLE_CELL_STYLE
+        return self._pop(keep_context=True)
 
     def _verify_safe(self, this):
         """Make sure we are not trying to write an invalid character."""

From 2d945b30e53d41b0a4d448ddee56d1580274b7c6 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Thu, 17 Jul 2014 16:21:20 -0700
Subject: [PATCH 056/102] Use uint64_t for context

For the C tokenizer, include `<stdint.h>` and use `uint64_t` instead
of `int` for context. Changes to tables mean that context can be
larger than 32 bits, and it is possible for `int` to only have 16
bits anyways (though this is very unlikely).
---
 mwparserfromhell/parser/tokenizer.c | 29 +++++++++++++++--------------
 mwparserfromhell/parser/tokenizer.h |  7 ++++---
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 814ad50..90f51b0 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -241,7 +241,7 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 /*
     Add a new token stack, context, and textbuffer to the list.
 */
-static int Tokenizer_push(Tokenizer* self, int context)
+static int Tokenizer_push(Tokenizer* self, uint64_t context)
 {
     Stack* top = malloc(sizeof(Stack));
 
@@ -333,7 +333,7 @@ static PyObject* Tokenizer_pop(Tokenizer* self)
 static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self)
 {
     PyObject* stack;
-    int context;
+    uint64_t context;
 
     if (Tokenizer_push_textbuffer(self))
         return NULL;
@@ -351,7 +351,7 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self)
 */
 static void* Tokenizer_fail_route(Tokenizer* self)
 {
-    int context = self->topstack->context;
+    uint64_t context = self->topstack->context;
     PyObject* stack = Tokenizer_pop(self);
 
     Py_XDECREF(stack);
@@ -1034,7 +1034,7 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next)
 {
     // Built from Tokenizer_parse()'s end sentinels:
     Py_UNICODE after = Tokenizer_READ(self, 2);
-    int ctx = self->topstack->context;
+    uint64_t ctx = self->topstack->context;
 
     return (!this || this == '\n' || this == '[' || this == ']' ||
         this == '<' || this == '>'  || (this == '\'' && next == '\'') ||
@@ -1629,9 +1629,9 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
 static int
 Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text)
 {
-    int ctx = data->context;
-    int end_of_value = (ctx & TAG_ATTR_VALUE &&
-                        !(ctx & (TAG_QUOTED | TAG_NOTE_QUOTE)));
+    uint64_t ctx = data->context;
+    uint64_t end_of_value = (ctx & TAG_ATTR_VALUE &&
+                             !(ctx & (TAG_QUOTED | TAG_NOTE_QUOTE)));
 
     if (end_of_value || (ctx & TAG_QUOTED && ctx & TAG_NOTE_SPACE)) {
         if (Tokenizer_push_tag_buffer(self, data))
@@ -2153,7 +2153,7 @@ static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag,
 static int Tokenizer_parse_italics(Tokenizer* self)
 {
     Py_ssize_t reset = self->head;
-    int context;
+    uint64_t context;
     PyObject *stack;
 
     stack = Tokenizer_parse(self, LC_STYLE_ITALICS, 1);
@@ -2273,7 +2273,7 @@ static int Tokenizer_parse_italics_and_bold(Tokenizer* self)
 */
 static PyObject* Tokenizer_parse_style(Tokenizer* self)
 {
-    int context = self->topstack->context, ticks = 2, i;
+    uint64_t context = self->topstack->context, ticks = 2, i;
 
     self->head += 2;
     while (Tokenizer_READ(self, 0) == '\'') {
@@ -2428,7 +2428,7 @@ static int Tokenizer_handle_dl_term(Tokenizer* self)
 /*
     Handle the end of the stream of wikitext.
 */
-static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
+static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 {
     PyObject *token, *text, *trash;
     int single;
@@ -2457,7 +2457,7 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
     Make sure we are not trying to write an invalid character. Return 0 if
     everything is safe, or -1 if the route must be failed.
 */
-static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
+static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
 {
     if (context & LC_FAIL_NEXT)
         return -1;
@@ -2536,9 +2536,9 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
     Parse the wikicode string, using context for when to stop. If push is true,
     we will push a new context, otherwise we won't and context will be ignored.
 */
-static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
+static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
 {
-    int this_context;
+    uint64_t this_context;
     Py_UNICODE this, next, next_next, last;
     PyObject* temp;
 
@@ -2697,7 +2697,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
     PyObject *text, *temp, *tokens;
-    int context = 0, skip_style_tags = 0;
+    uint64_t context = 0;
+    int skip_style_tags = 0;
 
     if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) {
         Py_XDECREF(self->text);
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index dde6464..e9b1a92 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -29,6 +29,7 @@ SOFTWARE.
 #include <math.h>
 #include <structmember.h>
 #include <bytesobject.h>
+#include <stdint.h>
 
 #if PY_MAJOR_VERSION >= 3
 #define IS_PY3K
@@ -191,7 +192,7 @@ struct Textbuffer {
 
 struct Stack {
     PyObject* stack;
-    int context;
+    uint64_t context;
     struct Textbuffer* textbuffer;
     struct Stack* next;
 };
@@ -202,7 +203,7 @@ typedef struct {
 } HeadingData;
 
 typedef struct {
-    int context;
+    uint64_t context;
     struct Textbuffer* pad_first;
     struct Textbuffer* pad_before_eq;
     struct Textbuffer* pad_after_eq;
@@ -267,7 +268,7 @@ static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
 static int Tokenizer_handle_dl_term(Tokenizer*);
 static int Tokenizer_parse_tag(Tokenizer*);
-static PyObject* Tokenizer_parse(Tokenizer*, int, int);
+static PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
 
 static int load_exceptions(void);

From 0128b1f78a346dbe774800bd17b1b0f92bb9ca30 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Fri, 18 Jul 2014 17:41:24 -0700
Subject: [PATCH 057/102] Implement CTokenizer for tables

CTokenizer is completely implemented in this commit - it didn't
make much sense to me to split it up. All tests passing, memory test
shows no leaks on Linux.
---
 mwparserfromhell/parser/tokenizer.c  | 503 ++++++++++++++++++++++++++++++++++-
 mwparserfromhell/parser/tokenizer.h  | 108 ++++----
 mwparserfromhell/parser/tokenizer.py |   2 +-
 3 files changed, 551 insertions(+), 62 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 90f51b0..1d2964e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2454,6 +2454,399 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 }
 
 /*
+    Parse until ``end_token`` as style attributes for a table.
+*/
+static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
+                                                int break_on_table_end)
+{
+    TagData *data = TagData_new();
+    PyObject *padding, *trash;
+    Py_UNICODE this, next;
+    int can_exit, table_end;
+
+    if (!data)
+        return NULL;
+    data->context = TAG_ATTR_READY;
+
+    while (1) {
+        this = Tokenizer_READ(self, 0);
+        next = Tokenizer_READ(self, 1);
+        can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE);
+        table_end = (break_on_table_end && this == '|' && next == '}');
+        if ((this == end_token && can_exit) || table_end) {
+            if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) {
+                if (Tokenizer_push_tag_buffer(self, data)) {
+                    TagData_dealloc(data);
+                    return NULL;
+                }
+            }
+            if (Py_UNICODE_ISSPACE(this))
+                Textbuffer_write(&(data->pad_first), this);
+            padding = Textbuffer_render(data->pad_first);
+            TagData_dealloc(data);
+            if (!padding)
+                return NULL;
+            return padding;
+        }
+        else if (!this || table_end || this == end_token) {
+           if (self->topstack->context & LC_TAG_ATTR) {
+                if (data->context & TAG_QUOTED) {
+                    // Unclosed attribute quote: reset, don't die
+                    data->context = TAG_ATTR_VALUE;
+                    trash = Tokenizer_pop(self);
+                    Py_XDECREF(trash);
+                    self->head = data->reset;
+                    continue;
+                }
+                trash = Tokenizer_pop(self);
+                Py_XDECREF(trash);
+            }
+            TagData_dealloc(data);
+            return Tokenizer_fail_route(self);
+        }
+        else {
+            if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) {
+                TagData_dealloc(data);
+                return NULL;
+            }
+        }
+        self->head++;
+    }
+}
+
+/*
+    Handle the start of a table.
+*/
+static int Tokenizer_handle_table_start(Tokenizer* self)
+{
+    self->head += 2;
+    Py_ssize_t reset = self->head;
+    PyObject *style, *open_open_kwargs, *close_open_kwargs, *open_close_kwargs,
+             *padding, *newline_character, *open_wiki_markup, *close_wiki_markup;
+    PyObject *table = NULL;
+
+    if(Tokenizer_push(self, LC_TABLE_OPEN))
+        return -1;
+    padding = Tokenizer_parse_as_table_style(self, '\n', 1);
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
+        self->head = reset - 1;
+        if (Tokenizer_emit_text(self, "{|"))
+            return -1;
+        return 0;
+    }
+    if (!padding)
+        return -1;
+    style = Tokenizer_pop(self);
+    if (!style) {
+        Py_DECREF(padding);
+        return -1;
+    }
+
+    newline_character = PyUnicode_FromString("\n");
+    if (!newline_character) {
+        Py_DECREF(padding);
+        Py_DECREF(style);
+        return -1;
+    }
+    // continue to parse if it is NOT an inline table
+    if (PyUnicode_Contains(padding, newline_character)) {
+        Py_DECREF(newline_character);
+        self->head++;
+        table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
+        if (BAD_ROUTE) {
+            RESET_ROUTE();
+            // offset displacement done by parse()
+            self->head = reset - 1;
+            if (Tokenizer_emit_text(self, "{|"))
+                return -1;
+            return 0;
+        }
+        if (!table) {
+            Py_DECREF(padding);
+            Py_DECREF(style);
+            return -1;
+        }
+    } else {
+        Py_DECREF(newline_character);
+        // close tag
+        self->head += 2;
+    }
+
+    open_open_kwargs = PyDict_New();
+    if (!open_open_kwargs)
+        goto fail_decref_all;
+    open_wiki_markup = PyUnicode_FromString("{|");
+    if (!open_wiki_markup) {
+        Py_DECREF(open_open_kwargs);
+        goto fail_decref_all;
+    }
+    PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_wiki_markup);
+    Py_DECREF(open_wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, "table"))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_padding_table;
+        Py_DECREF(style);
+    }
+
+    close_open_kwargs = PyDict_New();
+    if (!close_open_kwargs)
+        goto fail_decref_padding_table;
+    PyDict_SetItemString(close_open_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs))
+        goto fail_decref_table;
+
+    if (table) {
+        if (Tokenizer_emit_all(self, table))
+            goto fail_decref_table;
+        Py_DECREF(table);
+    }
+
+    open_close_kwargs = PyDict_New();
+    if (!open_close_kwargs)
+        return -1;
+    close_wiki_markup = PyUnicode_FromString("|}");
+    if (!close_wiki_markup) {
+        Py_DECREF(open_close_kwargs);
+        return -1;
+    }
+    PyDict_SetItemString(open_close_kwargs, "wiki_markup", close_wiki_markup);
+    Py_DECREF(close_wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
+        return -1;
+    if (Tokenizer_emit_text(self, "table"))
+        return -1;
+    if (Tokenizer_emit(self, TagCloseClose))
+        return -1;
+    // offset displacement done by _parse()
+    self->head--;
+    return 0;
+
+    fail_decref_all:
+    Py_DECREF(style);
+    fail_decref_padding_table:
+    Py_DECREF(padding);
+    fail_decref_table:
+    Py_XDECREF(table);
+    return -1;
+}
+
+/*
+    Return the stack in order to handle the table end.
+*/
+static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
+{
+    self->head += 2;
+    return Tokenizer_pop(self);
+}
+
+/*
+    Parse as style until end of the line, then continue.
+*/
+static int Tokenizer_handle_table_row(Tokenizer* self)
+{
+    Py_ssize_t reset = self->head;
+    self->head += 2;
+    PyObject *padding, *open_kwargs, *close_kwargs, *wiki_markup;
+    PyObject *style = NULL;
+
+    // If we can't recurse, still tokenize tag but parse style attrs as text
+    if (Tokenizer_CAN_RECURSE(self)) {
+        if(Tokenizer_push(self, LC_TABLE_OPEN))
+            return -1;
+        padding = Tokenizer_parse_as_table_style(self, '\n', 0);
+        if (BAD_ROUTE) {
+            self->head = reset;
+            return 0;
+        }
+        if (!padding)
+            return -1;
+        style = Tokenizer_pop(self);
+        if (!style) {
+            Py_DECREF(padding);
+            return -1;
+        }
+    } else {
+        padding = PyUnicode_FromString("");
+        if (!padding)
+            return -1;
+    }
+
+    open_kwargs = PyDict_New();
+    if (!open_kwargs)
+        goto fail_decref_all;
+    wiki_markup = PyUnicode_FromString("|-");
+    if (!wiki_markup) {
+        Py_DECREF(open_kwargs);
+        goto fail_decref_all;
+    }
+    PyDict_SetItemString(open_kwargs, "wiki_markup", wiki_markup);
+    Py_DECREF(wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, "tr"))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_all;
+        Py_DECREF(style);
+    }
+
+    close_kwargs = PyDict_New();
+    if (!close_kwargs)
+        goto fail_decref_all;
+    PyDict_SetItemString(close_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
+        return -1;
+    return 0;
+
+    fail_decref_all:
+    Py_XDECREF(style);
+    Py_DECREF(padding);
+    return -1;
+}
+
+/*
+    Parse as normal syntax unless we hit a style marker, then parse style
+    as HTML attributes and the remainder as normal syntax.
+*/
+static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
+                                       const char *tag, uint64_t line_context)
+{
+    if (!Tokenizer_CAN_RECURSE(self)) {
+        if (Tokenizer_emit_text(self, markup))
+            return -1;
+        self->head += strlen(markup) - 1;
+        return 0;
+    }
+
+    uint64_t old_context = self->topstack->context;
+    uint64_t cell_context;
+    Py_ssize_t reset = self->head;
+    self->head += strlen(markup);
+    PyObject *padding;
+    PyObject *cell, *open_kwargs, *close_kwargs, *open_wiki_markup, *close_wiki_markup;
+    PyObject *style = NULL;
+
+    cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
+    if (BAD_ROUTE) {
+        self->head = reset;
+        return 0;
+    }
+    if (!cell)
+        return -1;
+    cell_context = self->topstack->context;
+    self->topstack->context = old_context;
+
+    if (cell_context & LC_TABLE_CELL_STYLE) {
+        Py_DECREF(cell);
+        self->head = reset + strlen(markup);
+        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context))
+            return -1;
+        padding = Tokenizer_parse_as_table_style(self, '|', 0);
+        if (BAD_ROUTE) {
+            self->head = reset;
+            return 0;
+        }
+        if (!padding)
+            return -1;
+        style = Tokenizer_pop(self);
+        if (!style) {
+            Py_DECREF(padding);
+            return -1;
+        }
+        // Don't parse the style separator
+        self->head++;
+        cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
+        if (BAD_ROUTE) {
+            self->head = reset;
+            return 0;
+        }
+        if (!cell)
+            return -1;
+        cell_context = self->topstack->context;
+        self->topstack->context = old_context;
+    }
+    else {
+        padding = PyUnicode_FromString("");
+        if (!padding) {
+            Py_DECREF(cell);
+            return -1;
+        }
+    }
+
+    open_kwargs = PyDict_New();
+    if (!open_kwargs)
+        goto fail_decref_all;
+    close_kwargs = PyDict_New();
+    if (!close_kwargs)
+        goto fail_decref_all;
+    open_wiki_markup = PyUnicode_FromString(markup);
+    if (!open_wiki_markup)
+        goto fail_decref_all;
+    PyDict_SetItemString(open_kwargs, "wiki_markup", open_wiki_markup);
+    Py_DECREF(open_wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, tag))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_all;
+        close_wiki_markup = PyUnicode_FromString("|");
+        if (!close_wiki_markup)
+            goto fail_decref_all;
+        PyDict_SetItemString(close_kwargs, "wiki_markup", close_wiki_markup);
+        Py_DECREF(close_wiki_markup);
+        Py_DECREF(style);
+    }
+
+    PyDict_SetItemString(close_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
+        goto fail_decref_cell;
+    if (Tokenizer_emit_all(self, cell))
+        goto fail_decref_cell;
+    Py_DECREF(cell);
+    // keep header/cell line contexts
+    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
+    // offset displacement done by parse()
+    self->head--;
+    return 0;
+
+    fail_decref_all:
+    Py_XDECREF(style);
+    Py_DECREF(padding);
+    Py_XDECREF(open_kwargs);
+    Py_XDECREF(close_kwargs);
+    fail_decref_cell:
+    Py_DECREF(cell);
+    return -1;
+}
+
+/*
+    Returns the context, stack, and whether to reset the cell for style
+    in a tuple.
+*/
+static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
+{
+    if (reset_for_style)
+        self->topstack->context |= LC_TABLE_CELL_STYLE;
+    else
+        self->topstack->context &= ~LC_TABLE_CELL_STYLE;
+    return Tokenizer_pop_keeping_context(self);
+}
+
+/*
     Make sure we are not trying to write an invalid character. Return 0 if
     everything is safe, or -1 if the route must be failed.
 */
@@ -2533,6 +2926,24 @@ static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE d
 }
 
 /*
+    Returns whether the current head has leading whitespace.
+    TODO: treat comments and templates as whitespace, allow fail on non-newline spaces.
+*/
+static int Tokenizer_has_leading_whitespace(Tokenizer* self)
+{
+    int offset = 1;
+    Py_UNICODE current_character;
+    while (1) {
+        current_character = Tokenizer_READ_BACKWARDS(self, offset);
+        if (!current_character || current_character == '\n')
+            return 1;
+        else if (!Py_UNICODE_ISSPACE(current_character))
+            return 0;
+        offset++;
+    }
+}
+
+/*
     Parse the wikicode string, using context for when to stop. If push is true,
     we will push a new context, otherwise we won't and context will be ignored.
 */
@@ -2667,24 +3078,94 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
             if (temp != Py_None)
                 return temp;
         }
-        else if (!last || last == '\n') {
-            if (this == '#' || this == '*' || this == ';' || this == ':') {
-                if (Tokenizer_handle_list(self))
+        else if ((!last || last == '\n') && (this == '#' || this == '*' || this == ';' || this == ':')) {
+            if (Tokenizer_handle_list(self))
+                return NULL;
+        }
+        else if ((!last || last == '\n') && (this == '-' && this == next &&
+                 this == Tokenizer_READ(self, 2) &&
+                 this == Tokenizer_READ(self, 3))) {
+            if (Tokenizer_handle_hr(self))
+                return NULL;
+        }
+        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
+            if (Tokenizer_handle_dl_term(self))
+                return NULL;
+            // kill potential table contexts
+            if (this == '\n')
+                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
+        }
+
+        // Start of table parsing
+        else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) {
+            if (Tokenizer_CAN_RECURSE(self)) {
+                if (Tokenizer_handle_table_start(self))
+                    return NULL;
+            }
+            else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
+                return NULL;
+            else
+                self->head++;
+        }
+        else if (this_context & LC_TABLE_OPEN) {
+            if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) {
+                if (this_context & LC_TABLE_CELL_OPEN)
+                    return Tokenizer_handle_table_cell_end(self, 0);
+                else if (Tokenizer_handle_table_cell(self, "||", "td", LC_TABLE_TD_LINE))
+                    return NULL;
+            }
+            else if (this == '|' && next == '|' && this_context & LC_TABLE_TH_LINE) {
+                if (this_context & LC_TABLE_CELL_OPEN)
+                    return Tokenizer_handle_table_cell_end(self, 0);
+                else if (Tokenizer_handle_table_cell(self, "||", "th", LC_TABLE_TH_LINE))
                     return NULL;
             }
-            else if (this == '-' && this == next &&
-                     this == Tokenizer_READ(self, 2) &&
-                     this == Tokenizer_READ(self, 3)) {
-                if (Tokenizer_handle_hr(self))
+            else if (this == '!' && next == '!' && this_context & LC_TABLE_TH_LINE) {
+                if (this_context & LC_TABLE_CELL_OPEN)
+                    return Tokenizer_handle_table_cell_end(self, 0);
+                else if (Tokenizer_handle_table_cell(self, "!!", "th", LC_TABLE_TH_LINE))
+                    return NULL;
+            }
+            else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) {
+                return Tokenizer_handle_table_cell_end(self, 1);
+            }
+            // on newline, clear out cell line contexts
+            else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) {
+                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
+                if (Tokenizer_emit_char(self, this))
+                    return NULL;
+            }
+            else if (Tokenizer_has_leading_whitespace(self)) {
+                if (this == '|' && next == '}') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else
+                        return Tokenizer_handle_table_end(self);
+                }
+                else if (this == '|' && next == '-') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else if (Tokenizer_handle_table_row(self))
+                        return NULL;
+                }
+                else if (this == '|') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else if (Tokenizer_handle_table_cell(self, "|", "td", LC_TABLE_TD_LINE))
+                        return NULL;
+                }
+                else if (this == '!') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else if (Tokenizer_handle_table_cell(self, "!", "th", LC_TABLE_TH_LINE))
+                        return NULL;
+                }
+                else if (Tokenizer_emit_char(self, this))
                     return NULL;
             }
             else if (Tokenizer_emit_char(self, this))
                 return NULL;
         }
-        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
-            if (Tokenizer_handle_dl_term(self))
-                return NULL;
-        }
         else if (Tokenizer_emit_char(self, this))
             return NULL;
         self->head++;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index e9b1a92..de7b7d4 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -44,9 +44,9 @@ SOFTWARE.
 
 static const char MARKERS[] = {
     '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/',
-    '-', '\n', '\0'};
+    '-', '!', '\n', '\0'};
 
-#define NUM_MARKERS 18
+#define NUM_MARKERS 19
 #define TEXTBUFFER_BLOCKSIZE 1024
 #define MAX_DEPTH 40
 #define MAX_CYCLES 100000
@@ -110,60 +110,68 @@ static PyObject* TagCloseClose;
 
 /* Local contexts: */
 
-#define LC_TEMPLATE             0x00000007
-#define LC_TEMPLATE_NAME        0x00000001
-#define LC_TEMPLATE_PARAM_KEY   0x00000002
-#define LC_TEMPLATE_PARAM_VALUE 0x00000004
-
-#define LC_ARGUMENT             0x00000018
-#define LC_ARGUMENT_NAME        0x00000008
-#define LC_ARGUMENT_DEFAULT     0x00000010
-
-#define LC_WIKILINK             0x00000060
-#define LC_WIKILINK_TITLE       0x00000020
-#define LC_WIKILINK_TEXT        0x00000040
-
-#define LC_EXT_LINK             0x00000180
-#define LC_EXT_LINK_URI         0x00000080
-#define LC_EXT_LINK_TITLE       0x00000100
-
-#define LC_HEADING              0x00007E00
-#define LC_HEADING_LEVEL_1      0x00000200
-#define LC_HEADING_LEVEL_2      0x00000400
-#define LC_HEADING_LEVEL_3      0x00000800
-#define LC_HEADING_LEVEL_4      0x00001000
-#define LC_HEADING_LEVEL_5      0x00002000
-#define LC_HEADING_LEVEL_6      0x00004000
-
-#define LC_TAG                  0x00078000
-#define LC_TAG_OPEN             0x00008000
-#define LC_TAG_ATTR             0x00010000
-#define LC_TAG_BODY             0x00020000
-#define LC_TAG_CLOSE            0x00040000
-
-#define LC_STYLE                0x00780000
-#define LC_STYLE_ITALICS        0x00080000
-#define LC_STYLE_BOLD           0x00100000
-#define LC_STYLE_PASS_AGAIN     0x00200000
-#define LC_STYLE_SECOND_PASS    0x00400000
-
-#define LC_DLTERM               0x00800000
-
-#define LC_SAFETY_CHECK         0x3F000000
-#define LC_HAS_TEXT             0x01000000
-#define LC_FAIL_ON_TEXT         0x02000000
-#define LC_FAIL_NEXT            0x04000000
-#define LC_FAIL_ON_LBRACE       0x08000000
-#define LC_FAIL_ON_RBRACE       0x10000000
-#define LC_FAIL_ON_EQUALS       0x20000000
-
+#define LC_TEMPLATE                 0x0000000000000007
+#define LC_TEMPLATE_NAME            0x0000000000000001
+#define LC_TEMPLATE_PARAM_KEY       0x0000000000000002
+#define LC_TEMPLATE_PARAM_VALUE     0x0000000000000004
+
+#define LC_ARGUMENT                 0x0000000000000018
+#define LC_ARGUMENT_NAME            0x0000000000000008
+#define LC_ARGUMENT_DEFAULT         0x0000000000000010
+
+#define LC_WIKILINK                 0x0000000000000060
+#define LC_WIKILINK_TITLE           0x0000000000000020
+#define LC_WIKILINK_TEXT            0x0000000000000040
+
+#define LC_EXT_LINK                 0x0000000000000180
+#define LC_EXT_LINK_URI             0x0000000000000080
+#define LC_EXT_LINK_TITLE           0x0000000000000100
+
+#define LC_HEADING                  0x0000000000007E00
+#define LC_HEADING_LEVEL_1          0x0000000000000200
+#define LC_HEADING_LEVEL_2          0x0000000000000400
+#define LC_HEADING_LEVEL_3          0x0000000000000800
+#define LC_HEADING_LEVEL_4          0x0000000000001000
+#define LC_HEADING_LEVEL_5          0x0000000000002000
+#define LC_HEADING_LEVEL_6          0x0000000000004000
+
+#define LC_TAG                      0x0000000000078000
+#define LC_TAG_OPEN                 0x0000000000008000
+#define LC_TAG_ATTR                 0x0000000000010000
+#define LC_TAG_BODY                 0x0000000000020000
+#define LC_TAG_CLOSE                0x0000000000040000
+
+#define LC_STYLE                    0x0000000000780000
+#define LC_STYLE_ITALICS            0x0000000000080000
+#define LC_STYLE_BOLD               0x0000000000100000
+#define LC_STYLE_PASS_AGAIN         0x0000000000200000
+#define LC_STYLE_SECOND_PASS        0x0000000000400000
+
+#define LC_DLTERM                   0x0000000000800000
+
+#define LC_SAFETY_CHECK             0x000000003F000000
+#define LC_HAS_TEXT                 0x0000000001000000
+#define LC_FAIL_ON_TEXT             0x0000000002000000
+#define LC_FAIL_NEXT                0x0000000004000000
+#define LC_FAIL_ON_LBRACE           0x0000000008000000
+#define LC_FAIL_ON_RBRACE           0x0000000010000000
+#define LC_FAIL_ON_EQUALS           0x0000000020000000
+
+// TODO realign all
+#define LC_TABLE                    0x00000007C0000000
+#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000700000000
+#define LC_TABLE_OPEN               0x0000000040000000
+#define LC_TABLE_CELL_OPEN          0x0000000080000000
+#define LC_TABLE_CELL_STYLE         0x0000000100000000
+#define LC_TABLE_TD_LINE            0x0000000200000000
+#define LC_TABLE_TH_LINE            0x0000000400000000
 /* Global contexts: */
 
 #define GL_HEADING 0x1
 
 /* Aggregate contexts: */
 
-#define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE)
+#define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 9e22b28..e8f21c0 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1134,7 +1134,7 @@ class Tokenizer(object):
         self._emit_all(cell)
         # keep header/cell line contexts
         self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
-        # offset displacement done by _parse()
+        # offset displacement done by parse()
         self._head -= 1
 
     def _handle_table_cell_end(self, reset_for_style=False):

From 94a9e32494fd8c3f1ce5e39a5ef1738967244ac2 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 21 Jul 2014 15:51:59 -0400
Subject: [PATCH 058/102] Add missing comma to test output.

---
 tests/tokenizer/tables.mwtest | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 163579b..9572733 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -302,7 +302,7 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_fir
 name:   table_cell_unclosed_style
 label:  Parse unclosed and closed bold and italics inside cells.
 input:  "{|\n | ''foo || '''bar ||''baz''||'''test'''\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" ''foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" '''bar "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'"), Text(text="i"), TagCloseOpen(), Text(text="baz"), TagOpenClose(), Text(text="i"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="text"), TagOpenClose(), Text(text="b"), TagCloseClose() Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" ''foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" '''bar "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'"), Text(text="i"), TagCloseOpen(), Text(text="baz"), TagOpenClose(), Text(text="i"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="text"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 
 ---
@@ -317,4 +317,4 @@ output: [Text(text="{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|
 name:   recursion_one_hundred_opens
 label:  test potentially dangerous recursion: one hundred table openings, with spaces
 input:  "{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|"
-output: [Text(text="{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|")]
\ No newline at end of file
+output: [Text(text="{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|")]

From 7bbeb6899a653cbca35c75f66edddfc6289b7564 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 22 Jul 2014 10:41:34 -0700
Subject: [PATCH 059/102] Fix ordering of tag representation

Self-closing wiki syntax tags have incorrectly ordered wiki syntax
and padding, fixed the ordering.
---
 mwparserfromhell/nodes/tag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index b3ea85c..c5f9d84 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -65,7 +65,7 @@ class Tag(Node):
             close = self.closing_wiki_markup if self.closing_wiki_markup else ""
             padding = self.padding if self.padding else ""
             if self.self_closing:
-                return self.wiki_markup + attrs + close + padding
+                return self.wiki_markup + attrs + padding + close
             else:
                 return self.wiki_markup + attrs + padding + str(self.contents) + close
 

From 64869fe84be7a5aa5b1c14f5f12c06232402ab9c Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 22 Jul 2014 12:23:44 -0700
Subject: [PATCH 060/102] Remove style test

Remove style test to properly implement implicit style closes later.
---
 tests/tokenizer/tables.mwtest | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 9572733..c684451 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -299,14 +299,6 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_fir
 
 ---
 
-name:   table_cell_unclosed_style
-label:  Parse unclosed and closed bold and italics inside cells.
-input:  "{|\n | ''foo || '''bar ||''baz''||'''test'''\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" ''foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" '''bar "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'"), Text(text="i"), TagCloseOpen(), Text(text="baz"), TagOpenClose(), Text(text="i"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="text"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
-
-
----
-
 name:   recursion_five_hundred_opens
 label:  test potentially dangerous recursion: five hundred table openings, without spaces
 input:  "{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|"

From 213c105666a669349dfa607a163da245df9af466 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 22 Jul 2014 14:31:37 -0700
Subject: [PATCH 061/102] Table tags are no longer self-closing

Table tags no longer self-closing. Rows and cells now contain their
contents. Also refactored out an `emit_table_tag` method.
Note: this will require changes to the Tag node and possibly the builder,
those changes will be in the next commit.
---
 mwparserfromhell/parser/contexts.py  |   9 +-
 mwparserfromhell/parser/tokenizer.c  | 289 +++++++++++++++++------------------
 mwparserfromhell/parser/tokenizer.h  |  11 +-
 mwparserfromhell/parser/tokenizer.py |  83 +++++-----
 tests/tokenizer/tables.mwtest        |  44 +++---
 5 files changed, 218 insertions(+), 218 deletions(-)

diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 6dd5319..ef44ce2 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -167,11 +167,12 @@ SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
 TABLE_OPEN =       1 << 30
 TABLE_CELL_OPEN =  1 << 31
 TABLE_CELL_STYLE = 1 << 32
-TABLE_TD_LINE =    1 << 33
-TABLE_TH_LINE =    1 << 34
+TABLE_ROW_OPEN =   1 << 33
+TABLE_TD_LINE =    1 << 34
+TABLE_TH_LINE =    1 << 35
 TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
-TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_TD_LINE +
-         TABLE_TH_LINE)
+TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + + TABLE_ROW_OPEN +
+         TABLE_TD_LINE + TABLE_TH_LINE)
 
 # Global contexts:
 
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 1d2964e..c062404 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2454,6 +2454,88 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 }
 
 /*
+    Emit a table tag.
+*/
+static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
+                                    const char* tag, PyObject* style, PyObject* padding,
+                                    const char* close_open_markup, PyObject* contents,
+                                    const char* open_close_markup)
+{
+    PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs, *close_open_markup_unicode,
+             *open_close_kwargs, *open_close_markup_unicode;
+
+    open_open_kwargs = PyDict_New();
+    if (!open_open_kwargs)
+        goto fail_decref_all;
+    open_open_markup_unicode = PyUnicode_FromString(open_open_markup);
+    if (!open_open_markup_unicode) {
+        Py_DECREF(open_open_kwargs);
+        goto fail_decref_all;
+    }
+    PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_open_markup_unicode);
+    Py_DECREF(open_open_markup_unicode);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, tag))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_all;
+        Py_DECREF(style);
+    }
+
+    close_open_kwargs = PyDict_New();
+    if (!close_open_kwargs)
+        goto fail_decref_padding_contents;
+    if (close_open_markup && strlen(close_open_markup) != 0) {
+        close_open_markup_unicode = PyUnicode_FromString(close_open_markup);
+        if (!close_open_markup_unicode) {
+            Py_DECREF(close_open_kwargs);
+            goto fail_decref_padding_contents;
+        }
+        PyDict_SetItemString(close_open_kwargs, "wiki_markup", close_open_markup_unicode);
+        Py_DECREF(close_open_markup_unicode);
+    }
+    PyDict_SetItemString(close_open_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs))
+        goto fail_decref_contents;
+
+    if (contents) {
+        if (Tokenizer_emit_all(self, contents))
+            goto fail_decref_contents;
+        Py_DECREF(contents);
+    }
+
+    open_close_kwargs = PyDict_New();
+    if (!open_close_kwargs)
+        return -1;
+    open_close_markup_unicode = PyUnicode_FromString(open_close_markup);
+    if (!open_close_markup_unicode) {
+        Py_DECREF(open_close_kwargs);
+        return -1;
+    }
+    PyDict_SetItemString(open_close_kwargs, "wiki_markup", open_close_markup_unicode);
+    Py_DECREF(open_close_markup_unicode);
+    if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
+        return -1;
+    if (Tokenizer_emit_text(self, tag))
+        return -1;
+    if (Tokenizer_emit(self, TagCloseClose))
+        return -1;
+    return 0;
+
+    fail_decref_all:
+    Py_XDECREF(style);
+    fail_decref_padding_contents:
+    Py_DECREF(padding);
+    fail_decref_contents:
+    Py_DECREF(contents);
+    return -1;
+}
+
+/*
     Parse until ``end_token`` as style attributes for a table.
 */
 static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
@@ -2521,8 +2603,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
 {
     self->head += 2;
     Py_ssize_t reset = self->head;
-    PyObject *style, *open_open_kwargs, *close_open_kwargs, *open_close_kwargs,
-             *padding, *newline_character, *open_wiki_markup, *close_wiki_markup;
+    PyObject *style, *padding, *newline_character;
     PyObject *table = NULL;
 
     if(Tokenizer_push(self, LC_TABLE_OPEN))
@@ -2573,68 +2654,11 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
         self->head += 2;
     }
 
-    open_open_kwargs = PyDict_New();
-    if (!open_open_kwargs)
-        goto fail_decref_all;
-    open_wiki_markup = PyUnicode_FromString("{|");
-    if (!open_wiki_markup) {
-        Py_DECREF(open_open_kwargs);
-        goto fail_decref_all;
-    }
-    PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_wiki_markup);
-    Py_DECREF(open_wiki_markup);
-    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
-        goto fail_decref_all;
-    if (Tokenizer_emit_text(self, "table"))
-        goto fail_decref_all;
-
-    if (style) {
-        if (Tokenizer_emit_all(self, style))
-            goto fail_decref_padding_table;
-        Py_DECREF(style);
-    }
-
-    close_open_kwargs = PyDict_New();
-    if (!close_open_kwargs)
-        goto fail_decref_padding_table;
-    PyDict_SetItemString(close_open_kwargs, "padding", padding);
-    Py_DECREF(padding);
-    if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs))
-        goto fail_decref_table;
-
-    if (table) {
-        if (Tokenizer_emit_all(self, table))
-            goto fail_decref_table;
-        Py_DECREF(table);
-    }
-
-    open_close_kwargs = PyDict_New();
-    if (!open_close_kwargs)
-        return -1;
-    close_wiki_markup = PyUnicode_FromString("|}");
-    if (!close_wiki_markup) {
-        Py_DECREF(open_close_kwargs);
-        return -1;
-    }
-    PyDict_SetItemString(open_close_kwargs, "wiki_markup", close_wiki_markup);
-    Py_DECREF(close_wiki_markup);
-    if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
-        return -1;
-    if (Tokenizer_emit_text(self, "table"))
-        return -1;
-    if (Tokenizer_emit(self, TagCloseClose))
+    if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL, table, "|}"))
         return -1;
     // offset displacement done by _parse()
     self->head--;
     return 0;
-
-    fail_decref_all:
-    Py_DECREF(style);
-    fail_decref_padding_table:
-    Py_DECREF(padding);
-    fail_decref_table:
-    Py_XDECREF(table);
-    return -1;
 }
 
 /*
@@ -2651,67 +2675,60 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
 */
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
+    if (!Tokenizer_CAN_RECURSE(self)) {
+        if (Tokenizer_emit_text(self, "|-"))
+            return -1;
+        self->head += 1;
+        return 0;
+    }
+
     Py_ssize_t reset = self->head;
     self->head += 2;
-    PyObject *padding, *open_kwargs, *close_kwargs, *wiki_markup;
-    PyObject *style = NULL;
+    PyObject *padding, *style, *row;
 
-    // If we can't recurse, still tokenize tag but parse style attrs as text
-    if (Tokenizer_CAN_RECURSE(self)) {
-        if(Tokenizer_push(self, LC_TABLE_OPEN))
-            return -1;
-        padding = Tokenizer_parse_as_table_style(self, '\n', 0);
-        if (BAD_ROUTE) {
-            self->head = reset;
-            return 0;
-        }
-        if (!padding)
-            return -1;
-        style = Tokenizer_pop(self);
-        if (!style) {
-            Py_DECREF(padding);
-            return -1;
-        }
-    } else {
-        padding = PyUnicode_FromString("");
-        if (!padding)
-            return -1;
+    if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
+        return -1;
+    padding = Tokenizer_parse_as_table_style(self, '\n', 0);
+    if (BAD_ROUTE) {
+        self->head = reset;
+        return 0;
     }
-
-    open_kwargs = PyDict_New();
-    if (!open_kwargs)
-        goto fail_decref_all;
-    wiki_markup = PyUnicode_FromString("|-");
-    if (!wiki_markup) {
-        Py_DECREF(open_kwargs);
-        goto fail_decref_all;
+    if (!padding)
+        return -1;
+    style = Tokenizer_pop(self);
+    if (!style) {
+        Py_DECREF(padding);
+        return -1;
     }
-    PyDict_SetItemString(open_kwargs, "wiki_markup", wiki_markup);
-    Py_DECREF(wiki_markup);
-    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
-        goto fail_decref_all;
-    if (Tokenizer_emit_text(self, "tr"))
-        goto fail_decref_all;
-
-    if (style) {
-        if (Tokenizer_emit_all(self, style))
-            goto fail_decref_all;
+    // don't parse the style separator
+    self->head++;
+    row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1);
+    if (BAD_ROUTE) {
+        Py_DECREF(padding);
         Py_DECREF(style);
+        self->head = reset;
+        return 0;
+    }
+    if (!row) {
+        Py_DECREF(padding);
+        Py_DECREF(style);
+        Py_DECREF(row);
+        return -1;
     }
 
-    close_kwargs = PyDict_New();
-    if (!close_kwargs)
-        goto fail_decref_all;
-    PyDict_SetItemString(close_kwargs, "padding", padding);
-    Py_DECREF(padding);
-    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
+    if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row, ""))
         return -1;
+    // offset displacement done by _parse()
+    self->head--;
     return 0;
+}
 
-    fail_decref_all:
-    Py_XDECREF(style);
-    Py_DECREF(padding);
-    return -1;
+/*
+    Return the stack in order to handle the table row end.
+*/
+static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
+{
+    return Tokenizer_pop(self);
 }
 
 /*
@@ -2732,9 +2749,9 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     uint64_t cell_context;
     Py_ssize_t reset = self->head;
     self->head += strlen(markup);
-    PyObject *padding;
-    PyObject *cell, *open_kwargs, *close_kwargs, *open_wiki_markup, *close_wiki_markup;
+    PyObject *padding, *cell;
     PyObject *style = NULL;
+    const char *close_open_markup = NULL;
 
     cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
     if (BAD_ROUTE) {
@@ -2783,54 +2800,16 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
         }
     }
 
-    open_kwargs = PyDict_New();
-    if (!open_kwargs)
-        goto fail_decref_all;
-    close_kwargs = PyDict_New();
-    if (!close_kwargs)
-        goto fail_decref_all;
-    open_wiki_markup = PyUnicode_FromString(markup);
-    if (!open_wiki_markup)
-        goto fail_decref_all;
-    PyDict_SetItemString(open_kwargs, "wiki_markup", open_wiki_markup);
-    Py_DECREF(open_wiki_markup);
-    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
-        goto fail_decref_all;
-    if (Tokenizer_emit_text(self, tag))
-        goto fail_decref_all;
-
     if (style) {
-        if (Tokenizer_emit_all(self, style))
-            goto fail_decref_all;
-        close_wiki_markup = PyUnicode_FromString("|");
-        if (!close_wiki_markup)
-            goto fail_decref_all;
-        PyDict_SetItemString(close_kwargs, "wiki_markup", close_wiki_markup);
-        Py_DECREF(close_wiki_markup);
-        Py_DECREF(style);
+        close_open_markup = "|";
     }
-
-    PyDict_SetItemString(close_kwargs, "padding", padding);
-    Py_DECREF(padding);
-    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
-        goto fail_decref_cell;
-    if (Tokenizer_emit_all(self, cell))
-        goto fail_decref_cell;
-    Py_DECREF(cell);
+    if (Tokenizer_emit_table_tag(self, markup, tag, style, padding, close_open_markup, cell, ""))
+        return -1;
     // keep header/cell line contexts
     self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
     // offset displacement done by parse()
     self->head--;
     return 0;
-
-    fail_decref_all:
-    Py_XDECREF(style);
-    Py_DECREF(padding);
-    Py_XDECREF(open_kwargs);
-    Py_XDECREF(close_kwargs);
-    fail_decref_cell:
-    Py_DECREF(cell);
-    return -1;
 }
 
 /*
@@ -3139,12 +3118,16 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
                 if (this == '|' && next == '}') {
                     if (this_context & LC_TABLE_CELL_OPEN)
                         return Tokenizer_handle_table_cell_end(self, 0);
+                    if (this_context & LC_TABLE_ROW_OPEN)
+                        return Tokenizer_handle_table_row_end(self);
                     else
                         return Tokenizer_handle_table_end(self);
                 }
                 else if (this == '|' && next == '-') {
                     if (this_context & LC_TABLE_CELL_OPEN)
                         return Tokenizer_handle_table_cell_end(self, 0);
+                    if (this_context & LC_TABLE_ROW_OPEN)
+                        return Tokenizer_handle_table_row_end(self);
                     else if (Tokenizer_handle_table_row(self))
                         return NULL;
                 }
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index de7b7d4..57a0121 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -157,14 +157,15 @@ static PyObject* TagCloseClose;
 #define LC_FAIL_ON_RBRACE           0x0000000010000000
 #define LC_FAIL_ON_EQUALS           0x0000000020000000
 
-// TODO realign all
-#define LC_TABLE                    0x00000007C0000000
-#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000700000000
+#define LC_TABLE                    0x0000000FC0000000
+#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000
 #define LC_TABLE_OPEN               0x0000000040000000
 #define LC_TABLE_CELL_OPEN          0x0000000080000000
 #define LC_TABLE_CELL_STYLE         0x0000000100000000
-#define LC_TABLE_TD_LINE            0x0000000200000000
-#define LC_TABLE_TH_LINE            0x0000000400000000
+#define LC_TABLE_ROW_OPEN           0x0000000200000000
+#define LC_TABLE_TD_LINE            0x0000000400000000
+#define LC_TABLE_TH_LINE            0x0000000800000000
+
 /* Global contexts: */
 
 #define GL_HEADING 0x1
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index e8f21c0..6ae6050 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1002,6 +1002,23 @@ class Tokenizer(object):
             self._fail_route()
         return self._pop()
 
+    def _emit_table_tag(self, open_open_markup, tag, style, padding,
+                        close_open_markup, contents, open_close_markup):
+        """Emit a table tag."""
+        self._emit(tokens.TagOpenOpen(wiki_markup=open_open_markup))
+        self._emit_text(tag)
+        if style:
+            self._emit_all(style)
+        if close_open_markup:
+            self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup, padding=padding))
+        else:
+            self._emit(tokens.TagCloseOpen(padding=padding))
+        if contents:
+            self._emit_all(contents)
+        self._emit(tokens.TagOpenClose(wiki_markup=open_close_markup))
+        self._emit_text(tag)
+        self._emit(tokens.TagCloseClose())
+
     def _parse_as_table_style(self, end_token, break_on_table_end=False):
         """Parse until ``end_token`` as style attributes for a table."""
         data = _TagOpenData()
@@ -1052,17 +1069,7 @@ class Tokenizer(object):
             self._head = reset - 1
             self._emit_text("{|")
         else:
-            self._emit(tokens.TagOpenOpen(wiki_markup="{|"))
-            self._emit_text("table")
-            if style:
-                self._emit_all(style)
-            self._emit(tokens.TagCloseOpen(padding=padding))
-            if table:
-                self._emit_all(table)
-            self._emit(tokens.TagOpenClose(wiki_markup="|}"))
-            self._emit_text("table")
-            self._emit(tokens.TagCloseClose())
-            # offset displacement done by _parse()
+            self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
             self._head -= 1
 
     def _handle_table_end(self):
@@ -1072,23 +1079,31 @@ class Tokenizer(object):
 
     def _handle_table_row(self):
         """Parse as style until end of the line, then continue."""
+        if not self._can_recurse():
+            self._emit_text("|-")
+            self._head += 1
+            return
+
         reset = self._head
         self._head += 2
         style, padding = None, ""
-        # If we can't recurse, still tokenize tag but parse style attrs as text
-        if self._can_recurse():
-            try:
-                self._push(contexts.TABLE_OPEN)
-                padding = self._parse_as_table_style("\n")
-                style = self._pop()
-            except BadRoute:
-                self._head = reset
-                raise
-        self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
-        self._emit_text("tr")
-        if style:
-            self._emit_all(style)
-        self._emit(tokens.TagCloseSelfclose(padding=padding))
+        try:
+            self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
+            padding = self._parse_as_table_style("\n")
+            style = self._pop()
+            # don't parse the style separator
+            self._head += 1
+            row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
+        except BadRoute:
+            self._head = reset
+            raise
+        self._emit_table_tag("|-", "tr", style, padding, None, row, "")
+        # offset displacement done by parse()
+        self._head -= 1
+
+    def _handle_table_row_end(self):
+        """Return the stack in order to handle the table row end."""
+        return self._pop()
 
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse style
@@ -1101,7 +1116,7 @@ class Tokenizer(object):
         old_context = self._context
         reset = self._head
         self._head += len(markup)
-        reset_for_style, padding = False, ""
+        reset_for_style, padding, style = False, "", None
         try:
             cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
             cell_context = self._context
@@ -1124,14 +1139,8 @@ class Tokenizer(object):
             except BadRoute:
                 self._head = reset
                 raise
-        self._emit(tokens.TagOpenOpen(wiki_markup=markup))
-        self._emit_text(tag)
-        if reset_for_style:
-            self._emit_all(style)
-            self._emit(tokens.TagCloseSelfclose(wiki_markup="|", padding=padding))
-        else:
-            self._emit(tokens.TagCloseSelfclose(padding=padding))
-        self._emit_all(cell)
+        close_open_markup = "|" if reset_for_style else None
+        self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "")
         # keep header/cell line contexts
         self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
         # offset displacement done by parse()
@@ -1140,6 +1149,8 @@ class Tokenizer(object):
     def _handle_table_cell_end(self, reset_for_style=False):
         """Returns the current context, with the TABLE_CELL_STYLE flag set if
         it is necessary to reset and parse style attributes."""
+        if self._context & (contexts.FAIL & ~contexts.TABLE):
+            raise BadRoute
         if reset_for_style:
             self._context |= contexts.TABLE_CELL_STYLE
         else:
@@ -1328,10 +1339,14 @@ class Tokenizer(object):
                     if this == "|" and next == "}":
                         if self._context & contexts.TABLE_CELL_OPEN:
                             return self._handle_table_cell_end()
+                        if self._context & contexts.TABLE_ROW_OPEN:
+                            return self._handle_table_row_end()
                         return self._handle_table_end()
                     elif this == "|" and next == "-":
                         if self._context & contexts.TABLE_CELL_OPEN:
                             return self._handle_table_cell_end()
+                        if self._context & contexts.TABLE_ROW_OPEN:
+                            return self._handle_table_row_end()
                         self._handle_table_row()
                     elif this == "|":
                         if self._context & contexts.TABLE_CELL_OPEN:
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index c684451..455da67 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -106,42 +106,42 @@ output: [Text(text="foo \n  foo  \t {|\n|}")]
 name:   table_row_simple
 label:  Simple table row.
 input:  "{|\n |- \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Tag Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_multiple
 label:  Simple table row.
 input:  "{|\n |- \n|- \n   |-\n |}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), Text(text="   "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), Text(text="   "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_simple
 label:  Simple table cell.
 input:  "{|\n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_inline
 label:  Multiple inline table cells.
 input:  "{|\n | foo || bar || test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_fake_close
 label:  Looks like a table close but is not.
 input:  "{|\n | |} \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text="} \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text="} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_more_fake_close
 label:  Looks like a table close but is not.
 input:  "{|\n || |} \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" |} \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" |} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -155,28 +155,28 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   table_header_simple
 label:  Simple header cell.
 input:  "{|\n ! foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_inline
 label:  Multiple inline header cells.
 input:  "{|\n ! foo || bar !! test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_text_outside_cell
 label:  Parse text inside table but outside of a cell.
 input:  "{|\n bar \n | foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -197,84 +197,84 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   template_inside_table_cell
 label:  Template within table cell.
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(padding=""), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes
 label:  Parse table cell style attributes.
 input:  "{| \n | name="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_empty_attributes
 label:  Parse table cell with style markers but no attributes.
 input:  "{| \n | | test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_with_dash
 label:  Parse a situation in which a cell line looks like a row line.
 input:  "{|\n ||- \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="- \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="- \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_quote_with_pipe
 label:  Pipe inside an attribute quote should still be used as a style separator.
 input:  "{| \n | name="foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_name_with_pipe
 label:  Pipe inside an attribute name should still be used as a style separator.
 input:  "{| \n | name|="foo bar" | test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="=\"foo bar\" | test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="=\"foo bar\" | test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_pipe_after_equals
 label:  Pipe inside an attribute should still be used as a style separator after an equals.
 input:  "{| \n | name=|"foo|bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseOpen(wiki_markup="|", padding=""), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_templates
 label:  Pipe inside attributes shouldn't be style separator.
 input:  "{| \n | {{comment|template=baz}} | test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   header_cell_attributes
 label:  Parse header cell style attributes.
 input:  "{| \n ! name="foo bar"| test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_cell_attributes
 label:  Parse cell style attributes of inline cells.
 input:  "{| \n ! name="foo bar" | test ||color="red"| markup!!foo | time \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(wiki_markup="|", padding=" "), Text(text=" test "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" markup"), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseSelfclose(wiki_markup="|", padding=""), Text(text=" time \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" markup"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" time \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes
 label:  Parse table row style attributes.
 input:  "{| \n |- name="foo bar"\n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes_crazy_whitespace
 label:  Parse table row style attributes with different whitespace.
 input:  "{| \t    \n |- \t   name="foo bar"  \t \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding="  \t \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="  \t \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 

From 1b3e3c365704bed8b0b9d8601c9ca5cbe8e7e0f6 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 22 Jul 2014 15:17:51 -0700
Subject: [PATCH 062/102] Change wiki tags to use style separators

For wiki syntax tables, add `wiki_style_separator` as an attribute
for the Tag node. Also reorder `closing_wiki_markup` property and tests
to match its place in the constructor.
---
 mwparserfromhell/nodes/tag.py      | 78 +++++++++++++++++++++++---------------
 mwparserfromhell/parser/builder.py |  6 ++-
 tests/test_tag.py                  | 40 ++++++++++++-------
 3 files changed, 79 insertions(+), 45 deletions(-)

diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index c5f9d84..e9531e7 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -35,7 +35,8 @@ class Tag(Node):
 
     def __init__(self, tag, contents=None, attrs=None, wiki_markup=None,
                  self_closing=False, invalid=False, implicit=False, padding="",
-                 closing_tag=None, closing_wiki_markup=None):
+                 closing_tag=None, wiki_style_separator=None,
+                 closing_wiki_markup=None):
         super(Tag, self).__init__()
         self._tag = tag
         if contents is None and not self_closing:
@@ -44,12 +45,6 @@ class Tag(Node):
             self._contents = contents
         self._attrs = attrs if attrs else []
         self._wiki_markup = wiki_markup
-        if closing_wiki_markup:
-            self._closing_wiki_markup = closing_wiki_markup
-        elif wiki_markup and not self_closing:
-            self._closing_wiki_markup = wiki_markup
-        else:
-            self._closing_wiki_markup = None
         self._self_closing = self_closing
         self._invalid = invalid
         self._implicit = implicit
@@ -58,16 +53,28 @@ class Tag(Node):
             self._closing_tag = closing_tag
         else:
             self._closing_tag = tag
+        self._wiki_style_separator = wiki_style_separator
+        if closing_wiki_markup is not None:
+            self._closing_wiki_markup = closing_wiki_markup
+        elif wiki_markup and not self_closing:
+            self._closing_wiki_markup = wiki_markup
+        else:
+            self._closing_wiki_markup = None
 
     def __unicode__(self):
         if self.wiki_markup:
-            attrs = "".join([str(attr) for attr in self.attributes]) if self.attributes else ""
-            close = self.closing_wiki_markup if self.closing_wiki_markup else ""
-            padding = self.padding if self.padding else ""
+            if self.attributes:
+                attrs = "".join([str(attr) for attr in self.attributes])
+            else:
+                attrs = ""
+            padding = self.padding or ""
+            separator = self.wiki_style_separator or ""
+            close = self.closing_wiki_markup or ""
             if self.self_closing:
-                return self.wiki_markup + attrs + padding + close
+                return self.wiki_markup + attrs + padding + separator
             else:
-                return self.wiki_markup + attrs + padding + str(self.contents) + close
+                return self.wiki_markup + attrs + padding + separator + \
+                       str(self.contents) + close
 
         result = ("</" if self.invalid else "<") + str(self.tag)
         if self.attributes:
@@ -144,20 +151,6 @@ class Tag(Node):
         return self._wiki_markup
 
     @property
-    def closing_wiki_markup(self):
-        """The wikified version of the closing tag to show instead of HTML.
-
-        If set to a value, this will be displayed instead of the close tag
-        brackets. If tag is :attr:`self_closing` is ``True`` and this is not
-        ``None``, then it becomes the self-closing end tag. If
-        :attr:`wiki_markup` is set and this has not been set, this is set to the
-        value of :attr:`wiki_markup`. If this has been set and
-        :attr:`wiki_markup` is set to a ``False`` value, this is set to
-        ``None``.
-        """
-        return self._closing_wiki_markup
-
-    @property
     def self_closing(self):
         """Whether the tag is self-closing with no content (like ``<br/>``)."""
         return self._self_closing
@@ -197,6 +190,27 @@ class Tag(Node):
         """
         return self._closing_tag
 
+    @property
+    def wiki_style_separator(self):
+        """The separator between the padding and content in a wiki markup tag.
+
+        Essentially the wiki equivalent of the TagCloseOpen.
+        """
+        return self._wiki_style_separator
+
+    @property
+    def closing_wiki_markup(self):
+        """The wikified version of the closing tag to show instead of HTML.
+
+        If set to a value, this will be displayed instead of the close tag
+        brackets. If tag is :attr:`self_closing` is ``True`` then this is not
+        displayed. If :attr:`wiki_markup` is set and this has not been set, this
+        is set to the value of :attr:`wiki_markup`. If this has been set and
+        :attr:`wiki_markup` is set to a ``False`` value, this is set to
+        ``None``.
+        """
+        return self._closing_wiki_markup
+
     @tag.setter
     def tag(self, value):
         self._tag = self._closing_tag = parse_anything(value)
@@ -211,10 +225,6 @@ class Tag(Node):
         if not value or not self.closing_wiki_markup:
             self.closing_wiki_markup = str(value) if value else None
 
-    @closing_wiki_markup.setter
-    def closing_wiki_markup(self, value):
-        self._closing_wiki_markup = str(value) if value else None
-
     @self_closing.setter
     def self_closing(self, value):
         self._self_closing = bool(value)
@@ -241,6 +251,14 @@ class Tag(Node):
     def closing_tag(self, value):
         self._closing_tag = parse_anything(value)
 
+    @wiki_style_separator.setter
+    def wiki_style_separator(self, value):
+        self._wiki_style_separator = str(value) if value else None
+
+    @closing_wiki_markup.setter
+    def closing_wiki_markup(self, value):
+        self._closing_wiki_markup = str(value) if value else None
+
     def has(self, name):
         """Return whether any attribute in the tag has the given *name*.
 
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 32cbb93..99a54d1 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -248,13 +248,14 @@ class Builder(object):
         close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
         implicit, attrs, contents, closing_tag = False, [], None, None
         wiki_markup, invalid = token.wiki_markup, token.invalid or False
-        closing_wiki_markup = None
+        wiki_style_separator, closing_wiki_markup = None, wiki_markup
         self._push()
         while self._tokens:
             token = self._tokens.pop()
             if isinstance(token, tokens.TagAttrStart):
                 attrs.append(self._handle_attribute(token))
             elif isinstance(token, tokens.TagCloseOpen):
+                wiki_style_separator = token.wiki_markup
                 padding = token.padding or ""
                 tag = self._pop()
                 self._push()
@@ -273,7 +274,8 @@ class Builder(object):
                     self_closing = False
                     closing_tag = self._pop()
                 return Tag(tag, contents, attrs, wiki_markup, self_closing,
-                           invalid, implicit, padding, closing_tag, closing_wiki_markup)
+                           invalid, implicit, padding, closing_tag,
+                           wiki_style_separator, closing_wiki_markup)
             else:
                 self._write(self._handle_token(token))
         raise ParserError("_handle_tag() missed a close token")
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 2d67723..c2c751b 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -171,19 +171,6 @@ class TestTag(TreeEqualityTestCase):
         self.assertFalse(node.wiki_markup)
         self.assertEqual("<i>italic text</i>", node)
 
-    def test_closing_wiki_markup(self):
-        """test getter/setter behavior for closing_wiki_markup attribute"""
-        node = Tag(wraptext("table"), wraptext("\n"))
-        self.assertIs(None, node.closing_wiki_markup)
-        node.wiki_markup = "{|"
-        self.assertEqual("{|", node.closing_wiki_markup)
-        node.closing_wiki_markup = "|}"
-        self.assertEqual("|}", node.closing_wiki_markup)
-        self.assertEqual("{|\n|}", node)
-        node.wiki_markup = False
-        self.assertFalse(node.closing_wiki_markup)
-        self.assertEqual("<table>\n</table>", node)
-
     def test_self_closing(self):
         """test getter/setter for the self_closing attribute"""
         node = Tag(wraptext("ref"), wraptext("foobar"))
@@ -239,6 +226,33 @@ class TestTag(TreeEqualityTestCase):
         self.assertWikicodeEqual(parsed, node.closing_tag)
         self.assertEqual("<ref>foobar</ref {{ignore me}}>", node)
 
+    def test_wiki_style_separator(self):
+        """test getter/setter for wiki_style_separator attribute"""
+        node = Tag(wraptext("table"), wraptext("\n"))
+        self.assertIs(None, node.wiki_style_separator)
+        node.wiki_style_separator = "|"
+        self.assertEqual("|", node.wiki_style_separator)
+        node.wiki_markup = "{"
+        self.assertEqual("{|\n{", node)
+        node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|")
+        self.assertEqual("|", node.wiki_style_separator)
+
+    def test_closing_wiki_markup(self):
+        """test getter/setter for closing_wiki_markup attribute"""
+        node = Tag(wraptext("table"), wraptext("\n"))
+        self.assertIs(None, node.closing_wiki_markup)
+        node.wiki_markup = "{|"
+        self.assertEqual("{|", node.closing_wiki_markup)
+        node.closing_wiki_markup = "|}"
+        self.assertEqual("|}", node.closing_wiki_markup)
+        self.assertEqual("{|\n|}", node)
+        node.wiki_markup = False
+        self.assertFalse(node.closing_wiki_markup)
+        self.assertEqual("<table>\n</table>", node)
+        node2 = Tag(wraptext("table"), wraptext("\n"), wiki_markup="{|",
+                    closing_wiki_markup="|}")
+        self.assertEqual("|}", node2.closing_wiki_markup)
+
     def test_has(self):
         """test Tag.has()"""
         node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")])

From c63108039b4bb56348bd54ba0b59fe77c5f19eec Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 22 Jul 2014 16:01:32 -0700
Subject: [PATCH 063/102] Fix C code to make declarations before statements

Python 3.4 compiles C extensions with the
`-Werror=declaration-after-statement` flag that enforces C90 more
strictly than previous versions. Move all statements after declarations
to make sure this extension builds on 3.4.
---
 mwparserfromhell/parser/tokenizer.c  | 34 +++++++++++++++++-----------------
 mwparserfromhell/parser/tokenizer.py | 26 ++++++++++++--------------
 2 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index c062404..c902c3d 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2601,17 +2601,17 @@ static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
 */
 static int Tokenizer_handle_table_start(Tokenizer* self)
 {
-    self->head += 2;
-    Py_ssize_t reset = self->head;
+    Py_ssize_t reset = self->head + 1;
     PyObject *style, *padding, *newline_character;
     PyObject *table = NULL;
+    self->head += 2;
 
     if(Tokenizer_push(self, LC_TABLE_OPEN))
         return -1;
     padding = Tokenizer_parse_as_table_style(self, '\n', 1);
     if (BAD_ROUTE) {
         RESET_ROUTE();
-        self->head = reset - 1;
+        self->head = reset;
         if (Tokenizer_emit_text(self, "{|"))
             return -1;
         return 0;
@@ -2638,7 +2638,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
         if (BAD_ROUTE) {
             RESET_ROUTE();
             // offset displacement done by parse()
-            self->head = reset - 1;
+            self->head = reset;
             if (Tokenizer_emit_text(self, "{|"))
                 return -1;
             return 0;
@@ -2675,17 +2675,17 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
 */
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
+    Py_ssize_t reset = self->head;
+    PyObject *padding, *style, *row;
+    self->head += 2;
+
     if (!Tokenizer_CAN_RECURSE(self)) {
         if (Tokenizer_emit_text(self, "|-"))
             return -1;
-        self->head += 1;
+        self->head -= 1;
         return 0;
     }
 
-    Py_ssize_t reset = self->head;
-    self->head += 2;
-    PyObject *padding, *style, *row;
-
     if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
         return -1;
     padding = Tokenizer_parse_as_table_style(self, '\n', 0);
@@ -2738,20 +2738,20 @@ static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
 static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
                                        const char *tag, uint64_t line_context)
 {
-    if (!Tokenizer_CAN_RECURSE(self)) {
-        if (Tokenizer_emit_text(self, markup))
-            return -1;
-        self->head += strlen(markup) - 1;
-        return 0;
-    }
-
     uint64_t old_context = self->topstack->context;
     uint64_t cell_context;
     Py_ssize_t reset = self->head;
-    self->head += strlen(markup);
     PyObject *padding, *cell;
     PyObject *style = NULL;
     const char *close_open_markup = NULL;
+    self->head += strlen(markup);
+
+    if (!Tokenizer_CAN_RECURSE(self)) {
+        if (Tokenizer_emit_text(self, markup))
+            return -1;
+        self->head--;
+        return 0;
+    }
 
     cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
     if (BAD_ROUTE) {
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 6ae6050..59f2156 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1050,9 +1050,9 @@ class Tokenizer(object):
 
     def _handle_table_start(self):
         """Handle the start of a table."""
-        self._head += 2
-        reset = self._head
+        reset = self._head + 1
         style, table = None, None
+        self._head += 2
         try:
             self._push(contexts.TABLE_OPEN)
             padding = self._parse_as_table_style("\n", break_on_table_end=True)
@@ -1066,7 +1066,7 @@ class Tokenizer(object):
                 self._head += 2
         except BadRoute:
             # offset displacement done by _parse()
-            self._head = reset - 1
+            self._head = reset
             self._emit_text("{|")
         else:
             self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
@@ -1079,14 +1079,14 @@ class Tokenizer(object):
 
     def _handle_table_row(self):
         """Parse as style until end of the line, then continue."""
+        reset = self._head
+        style, padding = None, ""
+        self._head += 2
         if not self._can_recurse():
             self._emit_text("|-")
-            self._head += 1
+            self._head -= 1
             return
 
-        reset = self._head
-        self._head += 2
-        style, padding = None, ""
         try:
             self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
             padding = self._parse_as_table_style("\n")
@@ -1108,15 +1108,15 @@ class Tokenizer(object):
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse style
         as HTML attributes and the remainder as normal syntax."""
+        old_context = self._context
+        reset = self._head
+        reset_for_style, padding, style = False, "", None
+        self._head += len(markup)
         if not self._can_recurse():
             self._emit_text(markup)
-            self._head += len(markup) - 1
+            self._head -= 1
             return
 
-        old_context = self._context
-        reset = self._head
-        self._head += len(markup)
-        reset_for_style, padding, style = False, "", None
         try:
             cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
             cell_context = self._context
@@ -1149,8 +1149,6 @@ class Tokenizer(object):
     def _handle_table_cell_end(self, reset_for_style=False):
         """Returns the current context, with the TABLE_CELL_STYLE flag set if
         it is necessary to reset and parse style attributes."""
-        if self._context & (contexts.FAIL & ~contexts.TABLE):
-            raise BadRoute
         if reset_for_style:
             self._context |= contexts.TABLE_CELL_STYLE
         else:

From 8dc70bc20b4f4f0926db267ed4430ff175bcb37b Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Tue, 22 Jul 2014 16:31:56 -0700
Subject: [PATCH 064/102] Add test coverage

Add some table tests to increase coverage. Also reorder some tests.
---
 tests/test_tag.py             |  4 +++-
 tests/tokenizer/tables.mwtest | 51 ++++++++++++++++++++++++++++++-------------
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/tests/test_tag.py b/tests/test_tag.py
index c2c751b..b33b0c2 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -249,9 +249,11 @@ class TestTag(TreeEqualityTestCase):
         node.wiki_markup = False
         self.assertFalse(node.closing_wiki_markup)
         self.assertEqual("<table>\n</table>", node)
-        node2 = Tag(wraptext("table"), wraptext("\n"), wiki_markup="{|",
+        node2 = Tag(wraptext("table"), wraptext("\n"),
+                    attrs=[agen("id", "foo")], wiki_markup="{|",
                     closing_wiki_markup="|}")
         self.assertEqual("|}", node2.closing_wiki_markup)
+        self.assertEqual('{| id="foo"\n|}', node2)
 
     def test_has(self):
         """test Tag.has()"""
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 455da67..39acf0c 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -106,7 +106,7 @@ output: [Text(text="foo \n  foo  \t {|\n|}")]
 name:   table_row_simple
 label:  Simple table row.
 input:  "{|\n |- \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Tag Text(text="table"), TagCloseClose()]
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
@@ -131,6 +131,41 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 
 ---
 
+name:   table_cell_multiple
+label:  Multiple table cells (non-inline).
+input:  "{|\n| foo \n| bar \n| test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_header_simple
+label:  Simple header cell.
+input:  "{|\n ! foo \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_header_inline
+label:  Multiple inline header cells.
+input:  "{|\n ! foo || bar !! test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   table_header_multiple
+label:  Multiple table header cells (non-inline).
+input:  "{|\n! foo \n! bar \n! test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   nested_cells_and_rows
+label:  Combination of cells and rows in a table.
+input:  "{|\n|- \n| foo \n|- \n| bar\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
 name:   table_cell_fake_close
 label:  Looks like a table close but is not.
 input:  "{|\n | |} \n|}"
@@ -152,20 +187,6 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 
 ---
 
-name:   table_header_simple
-label:  Simple header cell.
-input:  "{|\n ! foo \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
-
----
-
-name:   table_header_inline
-label:  Multiple inline header cells.
-input:  "{|\n ! foo || bar !! test \n|}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
-
----
-
 name:   nowiki_inside_table
 label:  Nowiki handles pipe characters in tables.
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"

From c802b1f8143018e8d014c682eb98c14d11b06c54 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Fri, 25 Jul 2014 15:53:35 -0700
Subject: [PATCH 065/102] Change context to uint64_t

One-line fix
---
 mwparserfromhell/parser/tokenizer.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 57a0121..8d2d428 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -53,7 +53,8 @@ static const char MARKERS[] = {
 #define MAX_BRACES 255
 #define MAX_ENTITY_SIZE 8
 
-static int route_state = 0, route_context = 0;
+static int route_state = 0;
+static uint64_t route_context = 0;
 #define BAD_ROUTE            route_state
 #define BAD_ROUTE_CONTEXT    route_context
 #define FAIL_ROUTE(context)  route_state = 1; route_context = context

From 1a4c88e11f8b6403e4a15a1e24b67b3185c884c6 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Fri, 25 Jul 2014 15:54:37 -0700
Subject: [PATCH 066/102] Correctly handle no table endings

Tests were not correctly testing the situations without a table close.
Fixed tests and then fixed tokenizers for failing tests. Also refactored
pytokenizer to more closely match the ctokenizer by only holding the
`_parse` methods in the try blocks and no other code.
---
 mwparserfromhell/parser/tokenizer.c  | 28 ++++++++++++---
 mwparserfromhell/parser/tokenizer.py | 70 +++++++++++++++++++++++-------------
 tests/tokenizer/tables.mwtest        | 49 +++++++++++++++++++++----
 3 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index c902c3d..bad72ef 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2636,8 +2636,9 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
         self->head++;
         table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
         if (BAD_ROUTE) {
+            Py_DECREF(padding);
+            Py_DECREF(style);
             RESET_ROUTE();
-            // offset displacement done by parse()
             self->head = reset;
             if (Tokenizer_emit_text(self, "{|"))
                 return -1;
@@ -2676,7 +2677,7 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
     Py_ssize_t reset = self->head;
-    PyObject *padding, *style, *row;
+    PyObject *padding, *style, *row, *trash;
     self->head += 2;
 
     if (!Tokenizer_CAN_RECURSE(self)) {
@@ -2690,6 +2691,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
         return -1;
     padding = Tokenizer_parse_as_table_style(self, '\n', 0);
     if (BAD_ROUTE) {
+        trash = Tokenizer_pop(self);
+        Py_XDECREF(trash);
         self->head = reset;
         return 0;
     }
@@ -2704,6 +2707,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
     self->head++;
     row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1);
     if (BAD_ROUTE) {
+        trash = Tokenizer_pop(self);
+        Py_XDECREF(trash);
         Py_DECREF(padding);
         Py_DECREF(style);
         self->head = reset;
@@ -2712,7 +2717,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
     if (!row) {
         Py_DECREF(padding);
         Py_DECREF(style);
-        Py_DECREF(row);
         return -1;
     }
 
@@ -2741,7 +2745,7 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     uint64_t old_context = self->topstack->context;
     uint64_t cell_context;
     Py_ssize_t reset = self->head;
-    PyObject *padding, *cell;
+    PyObject *padding, *cell, *trash;
     PyObject *style = NULL;
     const char *close_open_markup = NULL;
     self->head += strlen(markup);
@@ -2755,6 +2759,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
 
     cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
     if (BAD_ROUTE) {
+        trash = Tokenizer_pop(self);
+        Py_XDECREF(trash);
         self->head = reset;
         return 0;
     }
@@ -2770,6 +2776,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
             return -1;
         padding = Tokenizer_parse_as_table_style(self, '|', 0);
         if (BAD_ROUTE) {
+            trash = Tokenizer_pop(self);
+            Py_XDECREF(trash);
             self->head = reset;
             return 0;
         }
@@ -2784,11 +2792,18 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
         self->head++;
         cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
         if (BAD_ROUTE) {
+            Py_DECREF(padding);
+            Py_DECREF(style);
+            trash = Tokenizer_pop(self);
+            Py_XDECREF(trash);
             self->head = reset;
             return 0;
         }
-        if (!cell)
+        if (!cell) {
+            Py_DECREF(padding);
+            Py_DECREF(style);
             return -1;
+        }
         cell_context = self->topstack->context;
         self->topstack->context = old_context;
     }
@@ -3148,6 +3163,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
             }
             else if (Tokenizer_emit_char(self, this))
                 return NULL;
+            // Raise BadRoute to table start
+            if (BAD_ROUTE)
+                return NULL;
         }
         else if (Tokenizer_emit_char(self, this))
             return NULL;
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 59f2156..527d364 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1053,24 +1053,30 @@ class Tokenizer(object):
         reset = self._head + 1
         style, table = None, None
         self._head += 2
+
+        self._push(contexts.TABLE_OPEN)
         try:
-            self._push(contexts.TABLE_OPEN)
             padding = self._parse_as_table_style("\n", break_on_table_end=True)
-            style = self._pop()
-            # continue to parse if it is NOT an inline table
-            if "\n" in padding:
-                self._head += 1
-                table = self._parse(contexts.TABLE_OPEN)
-            else:
-                # close tag
-                self._head += 2
         except BadRoute:
-            # offset displacement done by _parse()
             self._head = reset
             self._emit_text("{|")
+            return
+        style = self._pop()
+        # continue to parse if it is NOT an inline table
+        if "\n" in padding:
+            self._head += 1
+            try:
+                table = self._parse(contexts.TABLE_OPEN)
+            except BadRoute:
+                self._head = reset
+                self._emit_text("{|")
+                return
         else:
-            self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
-            self._head -= 1
+            # close tag
+            self._head += 2
+        self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
+        # offset displacement done by _parse()
+        self._head -= 1
 
     def _handle_table_end(self):
         """Return the stack in order to handle the table end."""
@@ -1087,15 +1093,21 @@ class Tokenizer(object):
             self._head -= 1
             return
 
+        self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
         try:
-            self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
             padding = self._parse_as_table_style("\n")
-            style = self._pop()
-            # don't parse the style separator
-            self._head += 1
+        except BadRoute:
+            self._head = reset
+            self._pop()
+            raise
+        style = self._pop()
+        # don't parse the style separator
+        self._head += 1
+        try:
             row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
         except BadRoute:
             self._head = reset
+            self._pop()
             raise
         self._emit_table_tag("|-", "tr", style, padding, None, row, "")
         # offset displacement done by parse()
@@ -1119,26 +1131,34 @@ class Tokenizer(object):
 
         try:
             cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
-            cell_context = self._context
-            self._context = old_context
-            reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
         except BadRoute:
             self._head = reset
+            self._pop()
             raise
+        cell_context = self._context
+        self._context = old_context
+        reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
         if reset_for_style:
             self._head = reset + len(markup)
+            self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
             try:
-                self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
                 padding = self._parse_as_table_style("|")
-                style = self._pop()
-                # Don't parse the style separator
-                self._head += 1
+            except BadRoute:
+                self._head = reset
+                self._pop()
+                raise
+            style = self._pop()
+            # Don't parse the style separator
+            self._head += 1
+            try:
                 cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
-                cell_context = self._context
-                self._context = old_context
             except BadRoute:
                 self._head = reset
+                ret = self._pop()
                 raise
+            cell_context = self._context
+            self._context = old_context
+
         close_open_markup = "|" if reset_for_style else None
         self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "")
         # keep header/cell line contexts
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 39acf0c..ecace32 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -13,23 +13,51 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 ---
 
 name:   no_table_close_simple
-label:  Handle case when there is no table close.
+label:  No table close on inline table
 input:  "{| "
 output: [Text(text="{| ")]
 
 ---
 
+name:   no_table_close_newline
+label:  No table close with a newline
+input:  "{| \n "
+output: [Text(text="{| \n ")]
+
+---
+
 name:   no_table_close_inside_cell
-label:  Handle case when there is no table close while inside of a cell.
-input:  "{| | "
-output: [Text(text="{| | ")]
+label:  No table close while inside of a cell
+input:  "{| \n| "
+output: [Text(text="{| \n| ")]
+
+---
+
+name:   no_table_close_inside_cell_after_newline
+label:  No table close while inside of a cell after a newline
+input:  "{| \n| \n "
+output: [Text(text="{| \n| \n ")]
+
+---
+
+name:   no_table_close_inside_cell_with_attributes
+label:  No table close while inside of a cell with attributes
+input:  "{| \n| red | test"
+output: [Text(text="{| \n| red | test")]
 
 ---
 
 name:   no_table_close_inside_row
-label:  Handle case when there is no table close while inside of a row.
-input:  "{| |- "
-output: [Text(text="{| |- ")]
+label:  No table close while inside of a row
+input:  "{| \n|- "
+output: [Text(text="{| \n|- ")]
+
+---
+
+name:   no_table_close_inside_row_after_newline
+label:  No table close while inside of a row after a newline
+input:  "{| \n|- \n "
+output: [Text(text="{| \n|- \n ")]
 
 ---
 
@@ -40,6 +68,13 @@ output: [Text(text="{| border=\"1\"")]
 
 ---
 
+name:   no_table_close_unclosed_attributes
+label:  Don't parse unclosed attributes if the table doesn't exist.
+input:  "{| border="
+output: [Text(text="{| border=")]
+
+---
+
 name:   no_table_close_row_attributes
 label:  Don't parse row attributes as attributes if the table doesn't exist.
 input:  "{| |- border="1""

From cb875ae347f0b746d99159fa7edb235006145fbd Mon Sep 17 00:00:00 2001
From: Kunal Mehta <legoktm@gmail.com>
Date: Sat, 2 Aug 2014 19:30:20 -0700
Subject: [PATCH 067/102] Force opening README.rst as utf-8

Causes issues if the locale is not set to utf-8
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d488650..68943ac 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ from setuptools import setup, find_packages, Extension
 from mwparserfromhell import __version__
 from mwparserfromhell.compat import py26, py3k
 
-with open("README.rst") as fp:
+with open("README.rst", **{'encoding':'utf-8'} if py3k else {}) as fp:
     long_docs = fp.read()
 
 tokenizer = Extension("mwparserfromhell.parser._tokenizer",

From 810c24e123c3adf67cd09f7bfe5a356305917612 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 19 Oct 2014 01:11:46 -0500
Subject: [PATCH 068/102] Don't check quotes when attributes are built from
 tokens.

---
 mwparserfromhell/nodes/extras/attribute.py | 4 ++--
 mwparserfromhell/parser/builder.py         | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py
index cb50194..7d296dc 100644
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -37,9 +37,9 @@ class Attribute(StringMixIn):
     """
 
     def __init__(self, name, value=None, quotes='"', pad_first=" ",
-                 pad_before_eq="", pad_after_eq=""):
+                 pad_before_eq="", pad_after_eq="", check_quotes=True):
         super(Attribute, self).__init__()
-        if not quotes and self._value_needs_quotes(value):
+        if check_quotes and not quotes and self._value_needs_quotes(value):
             raise ValueError("given value {0!r} requires quotes".format(value))
         self._name = name
         self._value = value
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 2d68036..97123f7 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -237,7 +237,8 @@ class Builder(object):
                 else:
                     name, value = self._pop(), None
                 return Attribute(name, value, quotes, start.pad_first,
-                                 start.pad_before_eq, start.pad_after_eq)
+                                 start.pad_before_eq, start.pad_after_eq,
+                                 check_quotes=False)
             else:
                 self._write(self._handle_token(token))
         raise ParserError("_handle_attribute() missed a close token")

From e446c51347f061670e78d47840a34c1028317798 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 19 Oct 2014 01:51:44 -0500
Subject: [PATCH 069/102] Adjust table test labels for consistency.

---
 tests/tokenizer/tables.mwtest | 102 +++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index ecace32..b411045 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -1,355 +1,355 @@
 name:   empty_table
-label:  Parsing an empty table.
+label:  parsing an empty table
 input:  "{|\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_table
-label:  Correctly handle tables with close on the same line.
+label:  correctly handle tables with close on the same line
 input:  "{||}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=""), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_close_simple
-label:  No table close on inline table
+label:  no table close on inline table
 input:  "{| "
 output: [Text(text="{| ")]
 
 ---
 
 name:   no_table_close_newline
-label:  No table close with a newline
+label:  no table close with a newline
 input:  "{| \n "
 output: [Text(text="{| \n ")]
 
 ---
 
 name:   no_table_close_inside_cell
-label:  No table close while inside of a cell
+label:  no table close while inside of a cell
 input:  "{| \n| "
 output: [Text(text="{| \n| ")]
 
 ---
 
 name:   no_table_close_inside_cell_after_newline
-label:  No table close while inside of a cell after a newline
+label:  no table close while inside of a cell after a newline
 input:  "{| \n| \n "
 output: [Text(text="{| \n| \n ")]
 
 ---
 
 name:   no_table_close_inside_cell_with_attributes
-label:  No table close while inside of a cell with attributes
+label:  no table close while inside of a cell with attributes
 input:  "{| \n| red | test"
 output: [Text(text="{| \n| red | test")]
 
 ---
 
 name:   no_table_close_inside_row
-label:  No table close while inside of a row
+label:  no table close while inside of a row
 input:  "{| \n|- "
 output: [Text(text="{| \n|- ")]
 
 ---
 
 name:   no_table_close_inside_row_after_newline
-label:  No table close while inside of a row after a newline
+label:  no table close while inside of a row after a newline
 input:  "{| \n|- \n "
 output: [Text(text="{| \n|- \n ")]
 
 ---
 
 name:   no_table_close_attributes
-label:  Don't parse attributes as attributes if the table doesn't exist.
+label:  don't parse attributes as attributes if the table doesn't exist
 input:  "{| border="1""
 output: [Text(text="{| border=\"1\"")]
 
 ---
 
 name:   no_table_close_unclosed_attributes
-label:  Don't parse unclosed attributes if the table doesn't exist.
+label:  don't parse unclosed attributes if the table doesn't exist
 input:  "{| border="
 output: [Text(text="{| border=")]
 
 ---
 
 name:   no_table_close_row_attributes
-label:  Don't parse row attributes as attributes if the table doesn't exist.
+label:  don't parse row attributes as attributes if the table doesn't exist
 input:  "{| |- border="1""
 output: [Text(text="{| |- border=\"1\"")]
 
 ---
 
 name:   no_table_close_cell
-label:  Don't parse cells if the table doesn't close.
+label:  don't parse cells if the table doesn't close
 input:  "{| | border="1"| test || red | foo"
 output: [Text(text="{| | border=\"1\"| test || red | foo")]
 
 ---
 
 name:   crazy_no_table_close
-label:  Lost of opened wiki syntax without closes.
+label:  lost of opened wiki syntax without closes
 input:  "{{{ {{ {| <ref"
 output: [Text(text="{{{ {{ {| <ref")]
 
 ---
 
 name:   leading_whitespace_table
-label:  Handle leading whitespace for a table.
+label:  handle leading whitespace for a table
 input:  "foo \n    \t {|\n|}"
 output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   whitespace_after_table
-label:  Handle whitespace after a table close.
+label:  handle whitespace after a table close
 input:  "{|\n|}\n    \t "
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text="\n    \t ")]
 
 ---
 
 name:   different_whitespace_after_table
-label:  Handle spaces after a table close.
+label:  handle spaces after a table close
 input:  "{|\n|} \n  "
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n  ")]
 
 ---
 
 name:   characters_after_table
-label:  Handle characters after a table close.
+label:  handle characters after a table close
 input:  "{|\n|} tsta"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")]
 
 ---
 
 name:   characters_after_inline_table
-label:  Handle characters after an inline table close.
+label:  handle characters after an inline table close
 input:  "{| |} tsta"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")]
 
 ---
 
 name:   leading_characters_table
-label:  Don't parse as a table when leading characters are not newline or whitespace.
+label:  don't parse as a table when leading characters are not newline or whitespace
 input:  "foo \n  foo  \t {|\n|}"
 output: [Text(text="foo \n  foo  \t {|\n|}")]
 
 ---
 
 name:   table_row_simple
-label:  Simple table row.
+label:  simple table row
 input:  "{|\n |- \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_multiple
-label:  Simple table row.
+label:  simple table row
 input:  "{|\n |- \n|- \n   |-\n |}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), Text(text="   "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_simple
-label:  Simple table cell.
+label:  simple table cell
 input:  "{|\n | foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_inline
-label:  Multiple inline table cells.
+label:  multiple inline table cells
 input:  "{|\n | foo || bar || test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_multiple
-label:  Multiple table cells (non-inline).
+label:  multiple table cells (non-inline)
 input:  "{|\n| foo \n| bar \n| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_simple
-label:  Simple header cell.
+label:  simple header cell
 input:  "{|\n ! foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_inline
-label:  Multiple inline header cells.
+label:  multiple inline header cells
 input:  "{|\n ! foo || bar !! test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_header_multiple
-label:  Multiple table header cells (non-inline).
+label:  multiple table header cells (non-inline)
 input:  "{|\n! foo \n! bar \n! test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   nested_cells_and_rows
-label:  Combination of cells and rows in a table.
+label:  combination of cells and rows in a table
 input:  "{|\n|- \n| foo \n|- \n| bar\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_fake_close
-label:  Looks like a table close but is not.
+label:  looks like a table close but is not
 input:  "{|\n | |} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text="} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_more_fake_close
-label:  Looks like a table close but is not.
+label:  looks like a table close but is not
 input:  "{|\n || |} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" |} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_extra_close
-label:  Process second close as text.
+label:  process second close as text
 input:  "{| \n |} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n|}")]
 
 ---
 
 name:   nowiki_inside_table
-label:  Nowiki handles pipe characters in tables.
+label:  nowiki handles pipe characters in tables
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_text_outside_cell
-label:  Parse text inside table but outside of a cell.
+label:  parse text inside table but outside of a cell
 input:  "{|\n bar \n | foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_cell_with_leading_characters
-label:  Fail to create a table cell when there are leading non-whitespace characters.
+label:  fail to create a table cell when there are leading non-whitespace characters
 input:  "{|\n bar | foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   no_table_row_with_leading_characters
-label:  Fail to create a table row when there are leading non-whitespace characters.
+label:  fail to create a table row when there are leading non-whitespace characters
 input:  "{|\n bar |- foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   template_inside_table_cell
-label:  Template within table cell.
+label:  template within table cell
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes
-label:  Parse table cell style attributes.
+label:  parse table cell style attributes
 input:  "{| \n | name="foo bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_empty_attributes
-label:  Parse table cell with style markers but no attributes.
+label:  parse table cell with style markers but no attributes
 input:  "{| \n | | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_with_dash
-label:  Parse a situation in which a cell line looks like a row line.
+label:  parse a situation in which a cell line looks like a row line
 input:  "{|\n ||- \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="- \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_quote_with_pipe
-label:  Pipe inside an attribute quote should still be used as a style separator.
+label:  pipe inside an attribute quote should still be used as a style separator
 input:  "{| \n | name="foo|bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_name_with_pipe
-label:  Pipe inside an attribute name should still be used as a style separator.
+label:  pipe inside an attribute name should still be used as a style separator
 input:  "{| \n | name|="foo bar" | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="=\"foo bar\" | test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_pipe_after_equals
-label:  Pipe inside an attribute should still be used as a style separator after an equals.
+label:  pipe inside an attribute should still be used as a style separator after an equals
 input:  "{| \n | name=|"foo|bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseOpen(wiki_markup="|", padding=""), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_cell_attributes_templates
-label:  Pipe inside attributes shouldn't be style separator.
+label:  pipe inside attributes shouldn't be style separator
 input:  "{| \n | {{comment|template=baz}} | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   header_cell_attributes
-label:  Parse header cell style attributes.
+label:  parse header cell style attributes
 input:  "{| \n ! name="foo bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_cell_attributes
-label:  Parse cell style attributes of inline cells.
+label:  parse cell style attributes of inline cells
 input:  "{| \n ! name="foo bar" | test ||color="red"| markup!!foo | time \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" markup"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" time \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes
-label:  Parse table row style attributes.
+label:  parse table row style attributes
 input:  "{| \n |- name="foo bar"\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_row_attributes_crazy_whitespace
-label:  Parse table row style attributes with different whitespace.
+label:  parse table row style attributes with different whitespace
 input:  "{| \t    \n |- \t   name="foo bar"  \t \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="  \t \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_attributes
-label:  Parse table style attributes.
+label:  parse table style attributes
 input:  "{| name="foo bar"\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   inline_table_attributes
-label:  Correctly handle attributes in inline tables.
+label:  correctly handle attributes in inline tables
 input:  "{| foo="tee bar" |}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="tee bar"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
 name:   table_incorrect_attributes
-label:  Parse incorrect table style attributes.
+label:  parse incorrect table style attributes
 input:  "{| name="foo\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 

From b7c46a6dca5ed71326a7a8e9c3f7071a9297524b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 19 Oct 2014 20:44:57 -0500
Subject: [PATCH 070/102] Add tables to changelog.

---
 CHANGELOG          | 1 +
 docs/changelog.rst | 1 +
 2 files changed, 2 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index b4b01d6..9c05482 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@ v0.4 (unreleased):
 
 - The parser is now distributed with Windows binaries, fixing an issue that
   prevented Windows users from using the C tokenizer.
+- Added support for parsing wikicode tables.
 - Added a script to test for memory leaks in scripts/memtest.py.
 - Added a script to do releases in scripts/release.sh.
 - skip_style_tags can now be passed to mwparserfromhell.parse() (previously,
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 9fdfef2..1854fa0 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,6 +9,7 @@ Unreleased
 
 - The parser is now distributed with Windows binaries, fixing an issue that
   prevented Windows users from using the C tokenizer.
+- Added support for parsing wikicode tables.
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.
 - *skip_style_tags* can now be passed to :func:`mwparserfromhell.parse()

From bd85805f8fc693b8c4b2b32f700b74d4eb4e774b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 19 Oct 2014 20:49:16 -0500
Subject: [PATCH 071/102] Add integration tests for token roundtripping.

---
 tests/_test_tokenizer.py    | 11 ++++++++---
 tests/test_roundtripping.py | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_roundtripping.py

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index bfd4857..e44280b 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -25,8 +25,9 @@ import codecs
 from os import listdir, path
 import sys
 
-from mwparserfromhell.compat import py3k
+from mwparserfromhell.compat import py3k, str
 from mwparserfromhell.parser import tokens
+from mwparserfromhell.parser.builder import Builder
 
 class _TestParseError(Exception):
     """Raised internally when a test could not be parsed."""
@@ -50,8 +51,12 @@ class TokenizerTestCase(object):
         *label* for the method's docstring.
         """
         def inner(self):
-            expected = data["output"]
-            actual = self.tokenizer().tokenize(data["input"])
+            if hasattr(self, "roundtrip"):
+                expected = data["input"]
+                actual = str(Builder().build(data["output"]))
+            else:
+                expected = data["output"]
+                actual = self.tokenizer().tokenize(data["input"])
             self.assertEqual(expected, actual)
         if not py3k:
             inner.__name__ = funcname.encode("utf8")
diff --git a/tests/test_roundtripping.py b/tests/test_roundtripping.py
new file mode 100644
index 0000000..5360387
--- /dev/null
+++ b/tests/test_roundtripping.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8  -*-
+#
+# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import unicode_literals
+
+try:
+    import unittest2 as unittest
+except ImportError:
+    import unittest
+
+from ._test_tokenizer import TokenizerTestCase
+
+class TestRoundtripping(TokenizerTestCase, unittest.TestCase):
+    """Test cases for roundtripping tokens back to wikitext."""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.roundtrip = True
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From 7489253e3289dd821144e324f375d31039cc4a6f Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 19 Oct 2014 21:45:17 -0500
Subject: [PATCH 072/102] Break at 80 cols for most lines.

---
 mwparserfromhell/parser/tokenizer.c  | 64 ++++++++++++++++++++++--------------
 mwparserfromhell/parser/tokenizer.py | 18 ++++++----
 2 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index bad72ef..ce46388 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2456,13 +2456,15 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 /*
     Emit a table tag.
 */
-static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
-                                    const char* tag, PyObject* style, PyObject* padding,
-                                    const char* close_open_markup, PyObject* contents,
-                                    const char* open_close_markup)
+static int
+Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
+                         const char* tag, PyObject* style, PyObject* padding,
+                         const char* close_open_markup, PyObject* contents,
+                         const char* open_close_markup)
 {
-    PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs, *close_open_markup_unicode,
-             *open_close_kwargs, *open_close_markup_unicode;
+    PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs,
+             *close_open_markup_unicode, *open_close_kwargs,
+             *open_close_markup_unicode;
 
     open_open_kwargs = PyDict_New();
     if (!open_open_kwargs)
@@ -2472,7 +2474,8 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
         Py_DECREF(open_open_kwargs);
         goto fail_decref_all;
     }
-    PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_open_markup_unicode);
+    PyDict_SetItemString(open_open_kwargs, "wiki_markup",
+                         open_open_markup_unicode);
     Py_DECREF(open_open_markup_unicode);
     if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
         goto fail_decref_all;
@@ -2494,7 +2497,8 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
             Py_DECREF(close_open_kwargs);
             goto fail_decref_padding_contents;
         }
-        PyDict_SetItemString(close_open_kwargs, "wiki_markup", close_open_markup_unicode);
+        PyDict_SetItemString(close_open_kwargs, "wiki_markup",
+                             close_open_markup_unicode);
         Py_DECREF(close_open_markup_unicode);
     }
     PyDict_SetItemString(close_open_kwargs, "padding", padding);
@@ -2516,7 +2520,8 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
         Py_DECREF(open_close_kwargs);
         return -1;
     }
-    PyDict_SetItemString(open_close_kwargs, "wiki_markup", open_close_markup_unicode);
+    PyDict_SetItemString(open_close_kwargs, "wiki_markup",
+                         open_close_markup_unicode);
     Py_DECREF(open_close_markup_unicode);
     if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
         return -1;
@@ -2538,8 +2543,9 @@ static int Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_marku
 /*
     Parse until ``end_token`` as style attributes for a table.
 */
-static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
-                                                int break_on_table_end)
+static PyObject*
+Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
+                               int break_on_table_end)
 {
     TagData *data = TagData_new();
     PyObject *padding, *trash;
@@ -2655,7 +2661,8 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
         self->head += 2;
     }
 
-    if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL, table, "|}"))
+    if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL,
+                                 table, "|}"))
         return -1;
     // offset displacement done by _parse()
     self->head--;
@@ -2665,7 +2672,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
 /*
     Return the stack in order to handle the table end.
 */
-static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
+static PyObject* Tokenizer_handle_table_end(Tokenizer* self)
 {
     self->head += 2;
     return Tokenizer_pop(self);
@@ -2720,7 +2727,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
         return -1;
     }
 
-    if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row, ""))
+    if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row,
+                                 ""))
         return -1;
     // offset displacement done by _parse()
     self->head--;
@@ -2739,8 +2747,9 @@ static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
     Parse as normal syntax unless we hit a style marker, then parse style
     as HTML attributes and the remainder as normal syntax.
 */
-static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
-                                       const char *tag, uint64_t line_context)
+static int
+Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
+                            const char *tag, uint64_t line_context)
 {
     uint64_t old_context = self->topstack->context;
     uint64_t cell_context;
@@ -2757,7 +2766,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
         return 0;
     }
 
-    cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
+    cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
+                           LC_TABLE_CELL_STYLE | line_context, 1);
     if (BAD_ROUTE) {
         trash = Tokenizer_pop(self);
         Py_XDECREF(trash);
@@ -2772,7 +2782,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     if (cell_context & LC_TABLE_CELL_STYLE) {
         Py_DECREF(cell);
         self->head = reset + strlen(markup);
-        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context))
+        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
+                          line_context))
             return -1;
         padding = Tokenizer_parse_as_table_style(self, '|', 0);
         if (BAD_ROUTE) {
@@ -2790,7 +2801,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
         }
         // Don't parse the style separator
         self->head++;
-        cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
+        cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
+                               line_context, 1);
         if (BAD_ROUTE) {
             Py_DECREF(padding);
             Py_DECREF(style);
@@ -2818,10 +2830,12 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     if (style) {
         close_open_markup = "|";
     }
-    if (Tokenizer_emit_table_tag(self, markup, tag, style, padding, close_open_markup, cell, ""))
+    if (Tokenizer_emit_table_tag(self, markup, tag, style, padding,
+                                 close_open_markup, cell, ""))
         return -1;
     // keep header/cell line contexts
-    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
+    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE |
+                                               LC_TABLE_TD_LINE);
     // offset displacement done by parse()
     self->head--;
     return 0;
@@ -2831,7 +2845,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     Returns the context, stack, and whether to reset the cell for style
     in a tuple.
 */
-static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
+static PyObject*
+Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
 {
     if (reset_for_style)
         self->topstack->context |= LC_TABLE_CELL_STYLE;
@@ -2844,7 +2859,8 @@ static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_
     Make sure we are not trying to write an invalid character. Return 0 if
     everything is safe, or -1 if the route must be failed.
 */
-static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
+static int
+Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
 {
     if (context & LC_FAIL_NEXT)
         return -1;
@@ -2895,7 +2911,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE d
         }
         else if (context & LC_FAIL_ON_LBRACE) {
             if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' &&
-                                 Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
+                                Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
                 if (context & LC_TEMPLATE)
                     self->topstack->context |= LC_FAIL_ON_EQUALS;
                 else
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 527d364..ad4895e 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1010,7 +1010,8 @@ class Tokenizer(object):
         if style:
             self._emit_all(style)
         if close_open_markup:
-            self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup, padding=padding))
+            self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup,
+                                           padding=padding))
         else:
             self._emit(tokens.TagCloseOpen(padding=padding))
         if contents:
@@ -1130,7 +1131,8 @@ class Tokenizer(object):
             return
 
         try:
-            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
+            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
+                               line_context | contexts.TABLE_CELL_STYLE)
         except BadRoute:
             self._head = reset
             self._pop()
@@ -1140,7 +1142,8 @@ class Tokenizer(object):
         reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
         if reset_for_style:
             self._head = reset + len(markup)
-            self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
+            self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
+                       line_context)
             try:
                 padding = self._parse_as_table_style("|")
             except BadRoute:
@@ -1151,7 +1154,8 @@ class Tokenizer(object):
             # Don't parse the style separator
             self._head += 1
             try:
-                cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
+                cell = self._parse(contexts.TABLE_OPEN |
+                                   contexts.TABLE_CELL_OPEN | line_context)
             except BadRoute:
                 self._head = reset
                 ret = self._pop()
@@ -1160,9 +1164,11 @@ class Tokenizer(object):
             self._context = old_context
 
         close_open_markup = "|" if reset_for_style else None
-        self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "")
+        self._emit_table_tag(markup, tag, style, padding, close_open_markup,
+                             cell, "")
         # keep header/cell line contexts
-        self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
+        self._context |= cell_context & (contexts.TABLE_TH_LINE |
+                                         contexts.TABLE_TD_LINE)
         # offset displacement done by parse()
         self._head -= 1
 

From 92cf8f2c03a8b339baa9e5a31c18c80ce635b2fb Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 15:13:53 -0500
Subject: [PATCH 073/102] Add a couple more tests involving templates.

---
 tests/tokenizer/tables.mwtest | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index b411045..4e4fe74 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -90,7 +90,7 @@ output: [Text(text="{| | border=\"1\"| test || red | foo")]
 ---
 
 name:   crazy_no_table_close
-label:  lost of opened wiki syntax without closes
+label:  lots of opened wiki syntax without closes
 input:  "{{{ {{ {| <ref"
 output: [Text(text="{{{ {{ {| <ref")]
 
@@ -355,6 +355,20 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_fir
 
 ---
 
+name:   templates_in_table_attribute
+label:  templates in the attributes of a table, after the start
+input:  "{| {{class}}="{{wikitable}}"\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="class"), TemplateClose(), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="wikitable"), TemplateClose(), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   templates_in_table_attribute_2
+label:  templates in the attributes of a table, after the start
+input:  "{|{{foo}} \n | name="foo bar" | test \n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
 name:   recursion_five_hundred_opens
 label:  test potentially dangerous recursion: five hundred table openings, without spaces
 input:  "{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|"

From c638746850c88e6eb3c75cc08346755a2db030ac Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 15:38:13 -0500
Subject: [PATCH 074/102] Add a test for tokenizer line 1384.

---
 tests/tokenizer/tables.mwtest | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 4e4fe74..59ad934 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -369,6 +369,13 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_fir
 
 ---
 
+name:   inappropriate_marker_at_line_start
+label:  an inappropriate marker (a right bracket) at the start of a line in the table
+input:  "{|\n}"
+output: [Text(text="{|\n}")]
+
+---
+
 name:   recursion_five_hundred_opens
 label:  test potentially dangerous recursion: five hundred table openings, without spaces
 input:  "{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|"

From 457355d4bf976986f3471a2e1de39e9762a5dac3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 18:52:58 -0500
Subject: [PATCH 075/102] Remove try/except that is impossible to fail inside
 of.

---
 mwparserfromhell/parser/tokenizer.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index ad4895e..9787c5f 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1119,8 +1119,8 @@ class Tokenizer(object):
         return self._pop()
 
     def _handle_table_cell(self, markup, tag, line_context):
-        """Parse as normal syntax unless we hit a style marker, then parse style
-        as HTML attributes and the remainder as normal syntax."""
+        """Parse as normal syntax unless we hit a style marker, then parse
+        style as HTML attributes and the remainder as normal syntax."""
         old_context = self._context
         reset = self._head
         reset_for_style, padding, style = False, "", None
@@ -1144,12 +1144,7 @@ class Tokenizer(object):
             self._head = reset + len(markup)
             self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
                        line_context)
-            try:
-                padding = self._parse_as_table_style("|")
-            except BadRoute:
-                self._head = reset
-                self._pop()
-                raise
+            padding = self._parse_as_table_style("|")
             style = self._pop()
             # Don't parse the style separator
             self._head += 1

From 5d29bff918ad80b150bfc51aa407019ff51229e2 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 19:04:11 -0500
Subject: [PATCH 076/102] Remove an incorrect usage of Py_XDECREF().

---
 mwparserfromhell/parser/tokenizer.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index ce46388..10a03a9 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -676,11 +676,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
                 RESET_ROUTE();
                 for (i = 0; i < braces; i++) text[i] = '{';
                 text[braces] = '\0';
-                if (Tokenizer_emit_text_then_stack(self, text)) {
-                    Py_XDECREF(text);
+                if (Tokenizer_emit_text_then_stack(self, text))
                     return -1;
-                }
-                Py_XDECREF(text);
                 return 0;
             }
             else

From 504b8bace08429e6a778f1fa69331cb5e849c043 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 19:22:50 -0500
Subject: [PATCH 077/102] Add test code for a missing branch of
 Tag.wiki_markup.setter; cleanup.

---
 mwparserfromhell/nodes/tag.py | 2 +-
 tests/test_tag.py             | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index e9531e7..e3c7260 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -223,7 +223,7 @@ class Tag(Node):
     def wiki_markup(self, value):
         self._wiki_markup = str(value) if value else None
         if not value or not self.closing_wiki_markup:
-            self.closing_wiki_markup = str(value) if value else None
+            self._closing_wiki_markup = self._wiki_markup
 
     @self_closing.setter
     def self_closing(self, value):
diff --git a/tests/test_tag.py b/tests/test_tag.py
index b33b0c2..3beea98 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -246,6 +246,9 @@ class TestTag(TreeEqualityTestCase):
         node.closing_wiki_markup = "|}"
         self.assertEqual("|}", node.closing_wiki_markup)
         self.assertEqual("{|\n|}", node)
+        node.wiki_markup = "!!"
+        self.assertEqual("|}", node.closing_wiki_markup)
+        self.assertEqual("!!\n|}", node)
         node.wiki_markup = False
         self.assertFalse(node.closing_wiki_markup)
         self.assertEqual("<table>\n</table>", node)

From 913ff590c8e90f771e16e150b239147bd32f1c8d Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 20:34:36 -0500
Subject: [PATCH 078/102] Cleanup; add a missing test.

---
 mwparserfromhell/parser/tokenizer.c    | 6 ------
 mwparserfromhell/parser/tokenizer.py   | 2 +-
 tests/tokenizer/tags_wikimarkup.mwtest | 7 +++++++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 10a03a9..faed5d7 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2783,12 +2783,6 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
                           line_context))
             return -1;
         padding = Tokenizer_parse_as_table_style(self, '|', 0);
-        if (BAD_ROUTE) {
-            trash = Tokenizer_pop(self);
-            Py_XDECREF(trash);
-            self->head = reset;
-            return 0;
-        }
         if (!padding)
             return -1;
         style = Tokenizer_pop(self);
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 9787c5f..dd5d6d9 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1325,7 +1325,7 @@ class Tokenizer(object):
             elif this in ("\n", ":") and self._context & contexts.DL_TERM:
                 self._handle_dl_term()
                 if this == "\n":
-                    # kill potential table contexts
+                    # Kill potential table contexts
                     self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
             # Start of table parsing
             elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or
diff --git a/tests/tokenizer/tags_wikimarkup.mwtest b/tests/tokenizer/tags_wikimarkup.mwtest
index 04f617a..c709ba7 100644
--- a/tests/tokenizer/tags_wikimarkup.mwtest
+++ b/tests/tokenizer/tags_wikimarkup.mwtest
@@ -447,6 +447,13 @@ output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Tag
 
 ---
 
+name:   dt_dd_mix4
+label:  another example of correct dt/dd usage, with a trigger for a specific parse route
+input:  ";foo]:bar"
+output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="foo]"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="bar")]
+
+---
+
 name:   ul_ol_dt_dd_mix
 label:  an assortment of uls, ols, dds, and dts
 input:  ";:#*foo\n:#*;foo\n#*;:foo\n*;:#foo"

From e1ebb59b9e1be3fe2ffd64c679e02983234d20ae Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 22 Oct 2014 22:59:42 -0500
Subject: [PATCH 079/102] Ensure token list is copied before being fed to the
 builder.

---
 tests/_test_tokenizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index e44280b..17d588b 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -53,7 +53,7 @@ class TokenizerTestCase(object):
         def inner(self):
             if hasattr(self, "roundtrip"):
                 expected = data["input"]
-                actual = str(Builder().build(data["output"]))
+                actual = str(Builder().build(data["output"][:]))
             else:
                 expected = data["output"]
                 actual = self.tokenizer().tokenize(data["input"])

From 640005dbb2eb641572f9880aaa72c3c6347802f9 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 23 Oct 2014 21:27:21 -0500
Subject: [PATCH 080/102] Tokenizer cleanup; make inline table syntax invalid
 as it should be.

---
 mwparserfromhell/parser/tokenizer.c  | 56 ++++++++++++++---------------
 mwparserfromhell/parser/tokenizer.py | 70 +++++++++++++++++-------------------
 2 files changed, 61 insertions(+), 65 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index faed5d7..c53a420 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2423,34 +2423,6 @@ static int Tokenizer_handle_dl_term(Tokenizer* self)
 }
 
 /*
-    Handle the end of the stream of wikitext.
-*/
-static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
-{
-    PyObject *token, *text, *trash;
-    int single;
-
-    if (context & AGG_FAIL) {
-        if (context & LC_TAG_BODY) {
-            token = PyList_GET_ITEM(self->topstack->stack, 1);
-            text = PyObject_GetAttrString(token, "text");
-            if (!text)
-                return NULL;
-            single = IS_SINGLE(text);
-            Py_DECREF(text);
-            if (single)
-                return Tokenizer_handle_single_tag_end(self);
-        }
-        else if (context & AGG_DOUBLE) {
-            trash = Tokenizer_pop(self);
-            Py_XDECREF(trash);
-        }
-        return Tokenizer_fail_route(self);
-    }
-    return Tokenizer_pop(self);
-}
-
-/*
     Emit a table tag.
 */
 static int
@@ -2847,6 +2819,34 @@ Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
 }
 
 /*
+    Handle the end of the stream of wikitext.
+*/
+static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
+{
+    PyObject *token, *text, *trash;
+    int single;
+
+    if (context & AGG_FAIL) {
+        if (context & LC_TAG_BODY) {
+            token = PyList_GET_ITEM(self->topstack->stack, 1);
+            text = PyObject_GetAttrString(token, "text");
+            if (!text)
+                return NULL;
+            single = IS_SINGLE(text);
+            Py_DECREF(text);
+            if (single)
+                return Tokenizer_handle_single_tag_end(self);
+        }
+        else if (context & AGG_DOUBLE) {
+            trash = Tokenizer_pop(self);
+            Py_XDECREF(trash);
+        }
+        return Tokenizer_fail_route(self);
+    }
+    return Tokenizer_pop(self);
+}
+
+/*
     Make sure we are not trying to write an invalid character. Return 0 if
     everything is safe, or -1 if the route must be failed.
 */
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index dd5d6d9..7921e7c 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -991,17 +991,6 @@ class Tokenizer(object):
         else:
             self._emit_text("\n")
 
-    def _handle_end(self):
-        """Handle the end of the stream of wikitext."""
-        if self._context & contexts.FAIL:
-            if self._context & contexts.TAG_BODY:
-                if is_single(self._stack[1].text):
-                    return self._handle_single_tag_end()
-            if self._context & contexts.DOUBLE:
-                self._pop()
-            self._fail_route()
-        return self._pop()
-
     def _emit_table_tag(self, open_open_markup, tag, style, padding,
                         close_open_markup, contents, open_close_markup):
         """Emit a table tag."""
@@ -1020,22 +1009,21 @@ class Tokenizer(object):
         self._emit_text(tag)
         self._emit(tokens.TagCloseClose())
 
-    def _parse_as_table_style(self, end_token, break_on_table_end=False):
+    def _parse_as_table_style(self, end_token):
         """Parse until ``end_token`` as style attributes for a table."""
         data = _TagOpenData()
         data.context = _TagOpenData.CX_ATTR_READY
         while True:
-            this, next = self._read(), self._read(1)
-            table_end = break_on_table_end and this == "|" and next == "}"
+            this = self._read()
             can_exit = (not data.context & data.CX_QUOTED or
                         data.context & data.CX_NOTE_SPACE)
-            if (this == end_token and can_exit) or table_end:
+            if this == end_token and can_exit:
                 if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
                     self._push_tag_buffer(data)
                 if this.isspace():
                     data.padding_buffer["first"] += this
                 return data.padding_buffer["first"]
-            elif this is self.END or table_end or this == end_token:
+            elif this is self.END or this == end_token:
                 if self._context & contexts.TAG_ATTR:
                     if data.context & data.CX_QUOTED:
                         # Unclosed attribute quote: reset, don't die
@@ -1052,31 +1040,27 @@ class Tokenizer(object):
     def _handle_table_start(self):
         """Handle the start of a table."""
         reset = self._head + 1
-        style, table = None, None
         self._head += 2
 
         self._push(contexts.TABLE_OPEN)
         try:
-            padding = self._parse_as_table_style("\n", break_on_table_end=True)
+            padding = self._parse_as_table_style("\n")
         except BadRoute:
             self._head = reset
             self._emit_text("{|")
             return
         style = self._pop()
-        # continue to parse if it is NOT an inline table
-        if "\n" in padding:
-            self._head += 1
-            try:
-                table = self._parse(contexts.TABLE_OPEN)
-            except BadRoute:
-                self._head = reset
-                self._emit_text("{|")
-                return
-        else:
-            # close tag
-            self._head += 2
+
+        self._head += 1
+        try:
+            table = self._parse(contexts.TABLE_OPEN)
+        except BadRoute:
+            self._head = reset
+            self._emit_text("{|")
+            return
+
         self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
-        # offset displacement done by _parse()
+        # Offset displacement done by _parse():
         self._head -= 1
 
     def _handle_table_end(self):
@@ -1087,7 +1071,6 @@ class Tokenizer(object):
     def _handle_table_row(self):
         """Parse as style until end of the line, then continue."""
         reset = self._head
-        style, padding = None, ""
         self._head += 2
         if not self._can_recurse():
             self._emit_text("|-")
@@ -1102,7 +1085,8 @@ class Tokenizer(object):
             self._pop()
             raise
         style = self._pop()
-        # don't parse the style separator
+
+        # Don't parse the style separator:
         self._head += 1
         try:
             row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
@@ -1110,8 +1094,9 @@ class Tokenizer(object):
             self._head = reset
             self._pop()
             raise
+
         self._emit_table_tag("|-", "tr", style, padding, None, row, "")
-        # offset displacement done by parse()
+        # Offset displacement done by parse():
         self._head -= 1
 
     def _handle_table_row_end(self):
@@ -1146,7 +1131,7 @@ class Tokenizer(object):
                        line_context)
             padding = self._parse_as_table_style("|")
             style = self._pop()
-            # Don't parse the style separator
+            # Don't parse the style separator:
             self._head += 1
             try:
                 cell = self._parse(contexts.TABLE_OPEN |
@@ -1161,10 +1146,10 @@ class Tokenizer(object):
         close_open_markup = "|" if reset_for_style else None
         self._emit_table_tag(markup, tag, style, padding, close_open_markup,
                              cell, "")
-        # keep header/cell line contexts
+        # Keep header/cell line contexts:
         self._context |= cell_context & (contexts.TABLE_TH_LINE |
                                          contexts.TABLE_TD_LINE)
-        # offset displacement done by parse()
+        # Offset displacement done by parse():
         self._head -= 1
 
     def _handle_table_cell_end(self, reset_for_style=False):
@@ -1176,6 +1161,17 @@ class Tokenizer(object):
             self._context &= ~contexts.TABLE_CELL_STYLE
         return self._pop(keep_context=True)
 
+    def _handle_end(self):
+        """Handle the end of the stream of wikitext."""
+        if self._context & contexts.FAIL:
+            if self._context & contexts.TAG_BODY:
+                if is_single(self._stack[1].text):
+                    return self._handle_single_tag_end()
+            if self._context & contexts.DOUBLE:
+                self._pop()
+            self._fail_route()
+        return self._pop()
+
     def _verify_safe(self, this):
         """Make sure we are not trying to write an invalid character."""
         context = self._context

From 4d4045902d1b56369c962a79a8e6a95e09a068c5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 23 Oct 2014 21:27:55 -0500
Subject: [PATCH 081/102] Update table tests to reflect new grammar.

---
 tests/tokenizer/tables.mwtest | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index 59ad934..e042467 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -6,9 +6,9 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 ---
 
 name:   inline_table
-label:  correctly handle tables with close on the same line
+label:  tables with a close on the same line are not valid
 input:  "{||}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=""), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [Text(text="{||}")]
 
 ---
 
@@ -127,7 +127,7 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
 name:   characters_after_inline_table
 label:  handle characters after an inline table close
 input:  "{| |} tsta"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")]
+output: [Text(text="{| |} tsta")]
 
 ---
 
@@ -342,9 +342,9 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_fir
 ---
 
 name:   inline_table_attributes
-label:  correctly handle attributes in inline tables
+label:  handle attributes in inline tables
 input:  "{| foo="tee bar" |}"
-output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="tee bar"), TagCloseOpen(padding=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+output: [Text(text='{| foo="tee bar" |}')]
 
 ---
 
@@ -376,14 +376,28 @@ output: [Text(text="{|\n}")]
 
 ---
 
-name:   recursion_five_hundred_opens
-label:  test potentially dangerous recursion: five hundred table openings, without spaces
-input:  "{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|"
-output: [Text(text="{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|{|")]
+name:   fake_close_near_start
+label:  a fake closing token at the end of the first line in the table
+input:  "{| class="wikitable" style="text-align: center; width=100%;|}\n|\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"text-align:"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="center;"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="width"), TagAttrEquals(), Text(text="100%;|}"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
 
 ---
 
-name:   recursion_one_hundred_opens
-label:  test potentially dangerous recursion: one hundred table openings, with spaces
-input:  "{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|"
-output: [Text(text="{| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {| {|")]
+name:   fake_close_near_start_2
+label:  a fake closing token at the end of the first line in the table
+input:  "{| class="wikitable|}"\n|\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable|}"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   junk_after_table_start
+label:  ignore more junk on the first line of the table
+input:  "{| class="wikitable" | foobar\n|\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="foobar"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
+
+---
+
+name:   junk_after_table_row
+label:  ignore junk on the first line of a table row
+input:  "{|\n|- foo="bar" | baz\n|blerp\n|}"
+output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="bar"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="baz"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="blerp\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

From fb261450d8fa0d3e666fe48a000a6afd6694c89a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 23 Oct 2014 21:40:50 -0500
Subject: [PATCH 082/102] Port tokenizer updates to C.

---
 mwparserfromhell/parser/tokenizer.c | 80 ++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 49 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index c53a420..1b68b46 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2513,13 +2513,12 @@ Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
     Parse until ``end_token`` as style attributes for a table.
 */
 static PyObject*
-Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
-                               int break_on_table_end)
+Tokenizer_parse_as_table_style(Tokenizer* self, char end_token)
 {
     TagData *data = TagData_new();
     PyObject *padding, *trash;
-    Py_UNICODE this, next;
-    int can_exit, table_end;
+    Py_UNICODE this;
+    int can_exit;
 
     if (!data)
         return NULL;
@@ -2527,10 +2526,8 @@ Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
 
     while (1) {
         this = Tokenizer_READ(self, 0);
-        next = Tokenizer_READ(self, 1);
         can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE);
-        table_end = (break_on_table_end && this == '|' && next == '}');
-        if ((this == end_token && can_exit) || table_end) {
+        if (this == end_token && can_exit) {
             if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) {
                 if (Tokenizer_push_tag_buffer(self, data)) {
                     TagData_dealloc(data);
@@ -2545,7 +2542,7 @@ Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
                 return NULL;
             return padding;
         }
-        else if (!this || table_end || this == end_token) {
+        else if (!this || this == end_token) {
            if (self->topstack->context & LC_TAG_ATTR) {
                 if (data->context & TAG_QUOTED) {
                     // Unclosed attribute quote: reset, don't die
@@ -2577,13 +2574,13 @@ Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
 static int Tokenizer_handle_table_start(Tokenizer* self)
 {
     Py_ssize_t reset = self->head + 1;
-    PyObject *style, *padding, *newline_character;
+    PyObject *style, *padding;
     PyObject *table = NULL;
     self->head += 2;
 
     if(Tokenizer_push(self, LC_TABLE_OPEN))
         return -1;
-    padding = Tokenizer_parse_as_table_style(self, '\n', 1);
+    padding = Tokenizer_parse_as_table_style(self, '\n');
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
@@ -2599,41 +2596,27 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
         return -1;
     }
 
-    newline_character = PyUnicode_FromString("\n");
-    if (!newline_character) {
+    self->head++;
+    table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
         Py_DECREF(padding);
         Py_DECREF(style);
-        return -1;
-    }
-    // continue to parse if it is NOT an inline table
-    if (PyUnicode_Contains(padding, newline_character)) {
-        Py_DECREF(newline_character);
-        self->head++;
-        table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
-        if (BAD_ROUTE) {
-            Py_DECREF(padding);
-            Py_DECREF(style);
-            RESET_ROUTE();
-            self->head = reset;
-            if (Tokenizer_emit_text(self, "{|"))
-                return -1;
-            return 0;
-        }
-        if (!table) {
-            Py_DECREF(padding);
-            Py_DECREF(style);
+        self->head = reset;
+        if (Tokenizer_emit_text(self, "{|"))
             return -1;
-        }
-    } else {
-        Py_DECREF(newline_character);
-        // close tag
-        self->head += 2;
+        return 0;
+    }
+    if (!table) {
+        Py_DECREF(padding);
+        Py_DECREF(style);
+        return -1;
     }
 
     if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL,
                                  table, "|}"))
         return -1;
-    // offset displacement done by _parse()
+    // Offset displacement done by _parse()
     self->head--;
     return 0;
 }
@@ -2665,7 +2648,7 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
 
     if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
         return -1;
-    padding = Tokenizer_parse_as_table_style(self, '\n', 0);
+    padding = Tokenizer_parse_as_table_style(self, '\n');
     if (BAD_ROUTE) {
         trash = Tokenizer_pop(self);
         Py_XDECREF(trash);
@@ -2679,7 +2662,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
         Py_DECREF(padding);
         return -1;
     }
-    // don't parse the style separator
+
+    // Don't parse the style separator
     self->head++;
     row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1);
     if (BAD_ROUTE) {
@@ -2696,10 +2680,9 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
         return -1;
     }
 
-    if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row,
-                                 ""))
+    if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row, ""))
         return -1;
-    // offset displacement done by _parse()
+    // Offset displacement done by _parse()
     self->head--;
     return 0;
 }
@@ -2754,7 +2737,7 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
         if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                           line_context))
             return -1;
-        padding = Tokenizer_parse_as_table_style(self, '|', 0);
+        padding = Tokenizer_parse_as_table_style(self, '|');
         if (!padding)
             return -1;
         style = Tokenizer_pop(self);
@@ -2796,10 +2779,9 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     if (Tokenizer_emit_table_tag(self, markup, tag, style, padding,
                                  close_open_markup, cell, ""))
         return -1;
-    // keep header/cell line contexts
-    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE |
-                                               LC_TABLE_TD_LINE);
-    // offset displacement done by parse()
+    // Keep header/cell line contexts
+    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
+    // Offset displacement done by parse()
     self->head--;
     return 0;
 }
@@ -3092,7 +3074,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
         else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
             if (Tokenizer_handle_dl_term(self))
                 return NULL;
-            // kill potential table contexts
+            // Kill potential table contexts
             if (this == '\n')
                 self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
         }
@@ -3130,7 +3112,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
             else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) {
                 return Tokenizer_handle_table_cell_end(self, 1);
             }
-            // on newline, clear out cell line contexts
+            // On newline, clear out cell line contexts
             else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) {
                 self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
                 if (Tokenizer_emit_char(self, this))

From 8480381a31b5da4571e32f75a18f9f15e03d770c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 23 Oct 2014 21:53:55 -0500
Subject: [PATCH 083/102] Credit for table parsing code. [skip ci]

---
 CHANGELOG          | 2 +-
 docs/changelog.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 9c05482..3471531 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,7 +2,7 @@ v0.4 (unreleased):
 
 - The parser is now distributed with Windows binaries, fixing an issue that
   prevented Windows users from using the C tokenizer.
-- Added support for parsing wikicode tables.
+- Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in scripts/memtest.py.
 - Added a script to do releases in scripts/release.sh.
 - skip_style_tags can now be passed to mwparserfromhell.parse() (previously,
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 1854fa0..b3e7548 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,7 +9,7 @@ Unreleased
 
 - The parser is now distributed with Windows binaries, fixing an issue that
   prevented Windows users from using the C tokenizer.
-- Added support for parsing wikicode tables.
+- Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.
 - *skip_style_tags* can now be passed to :func:`mwparserfromhell.parse()

From 9fc4b909e150cd786e97caf7daeb479733e5330e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 24 Oct 2014 03:40:37 -0500
Subject: [PATCH 084/102] Refactor a lot of table error recovery code.

---
 mwparserfromhell/parser/contexts.py  |   4 +-
 mwparserfromhell/parser/tokenizer.c  | 100 +++++++++++++++--------------------
 mwparserfromhell/parser/tokenizer.h  |   2 +-
 mwparserfromhell/parser/tokenizer.py |  82 ++++++++++++----------------
 tests/tokenizer/tables.mwtest        |   7 +++
 5 files changed, 87 insertions(+), 108 deletions(-)

diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index ef44ce2..17912cb 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -171,7 +171,7 @@ TABLE_ROW_OPEN =   1 << 33
 TABLE_TD_LINE =    1 << 34
 TABLE_TH_LINE =    1 << 35
 TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
-TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + + TABLE_ROW_OPEN +
+TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN +
          TABLE_TD_LINE + TABLE_TH_LINE)
 
 # Global contexts:
@@ -184,6 +184,6 @@ FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG +
         STYLE + TABLE)
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE +
           TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE)
-DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
+DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN
 NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI
 NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 1b68b46..301ecfc 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2510,10 +2510,9 @@ Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
 }
 
 /*
-    Parse until ``end_token`` as style attributes for a table.
+    Handle style attributes for a table until an ending token.
 */
-static PyObject*
-Tokenizer_parse_as_table_style(Tokenizer* self, char end_token)
+static PyObject* Tokenizer_handle_table_style(Tokenizer* self, char end_token)
 {
     TagData *data = TagData_new();
     PyObject *padding, *trash;
@@ -2569,9 +2568,9 @@ Tokenizer_parse_as_table_style(Tokenizer* self, char end_token)
 }
 
 /*
-    Handle the start of a table.
+    Parse a wikicode table by starting with the first line.
 */
-static int Tokenizer_handle_table_start(Tokenizer* self)
+static int Tokenizer_parse_table(Tokenizer* self)
 {
     Py_ssize_t reset = self->head + 1;
     PyObject *style, *padding;
@@ -2580,7 +2579,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
 
     if(Tokenizer_push(self, LC_TABLE_OPEN))
         return -1;
-    padding = Tokenizer_parse_as_table_style(self, '\n');
+    padding = Tokenizer_handle_table_style(self, '\n');
     if (BAD_ROUTE) {
         RESET_ROUTE();
         self->head = reset;
@@ -2622,20 +2621,10 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
 }
 
 /*
-    Return the stack in order to handle the table end.
-*/
-static PyObject* Tokenizer_handle_table_end(Tokenizer* self)
-{
-    self->head += 2;
-    return Tokenizer_pop(self);
-}
-
-/*
     Parse as style until end of the line, then continue.
 */
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
-    Py_ssize_t reset = self->head;
     PyObject *padding, *style, *row, *trash;
     self->head += 2;
 
@@ -2648,11 +2637,10 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
 
     if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
         return -1;
-    padding = Tokenizer_parse_as_table_style(self, '\n');
+    padding = Tokenizer_handle_table_style(self, '\n');
     if (BAD_ROUTE) {
         trash = Tokenizer_pop(self);
         Py_XDECREF(trash);
-        self->head = reset;
         return 0;
     }
     if (!padding)
@@ -2666,14 +2654,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
     // Don't parse the style separator
     self->head++;
     row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1);
-    if (BAD_ROUTE) {
-        trash = Tokenizer_pop(self);
-        Py_XDECREF(trash);
-        Py_DECREF(padding);
-        Py_DECREF(style);
-        self->head = reset;
-        return 0;
-    }
     if (!row) {
         Py_DECREF(padding);
         Py_DECREF(style);
@@ -2688,14 +2668,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
 }
 
 /*
-    Return the stack in order to handle the table row end.
-*/
-static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
-{
-    return Tokenizer_pop(self);
-}
-
-/*
     Parse as normal syntax unless we hit a style marker, then parse style
     as HTML attributes and the remainder as normal syntax.
 */
@@ -2705,11 +2677,10 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
 {
     uint64_t old_context = self->topstack->context;
     uint64_t cell_context;
-    Py_ssize_t reset = self->head;
-    PyObject *padding, *cell, *trash;
-    PyObject *style = NULL;
+    PyObject *padding, *cell, *style = NULL;
     const char *close_open_markup = NULL;
     self->head += strlen(markup);
+    Py_ssize_t reset = self->head;
 
     if (!Tokenizer_CAN_RECURSE(self)) {
         if (Tokenizer_emit_text(self, markup))
@@ -2720,12 +2691,6 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
 
     cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                            LC_TABLE_CELL_STYLE | line_context, 1);
-    if (BAD_ROUTE) {
-        trash = Tokenizer_pop(self);
-        Py_XDECREF(trash);
-        self->head = reset;
-        return 0;
-    }
     if (!cell)
         return -1;
     cell_context = self->topstack->context;
@@ -2733,11 +2698,11 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
 
     if (cell_context & LC_TABLE_CELL_STYLE) {
         Py_DECREF(cell);
-        self->head = reset + strlen(markup);
+        self->head = reset;
         if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                           line_context))
             return -1;
-        padding = Tokenizer_parse_as_table_style(self, '|');
+        padding = Tokenizer_handle_table_style(self, '|');
         if (!padding)
             return -1;
         style = Tokenizer_pop(self);
@@ -2749,14 +2714,6 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
         self->head++;
         cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                                line_context, 1);
-        if (BAD_ROUTE) {
-            Py_DECREF(padding);
-            Py_DECREF(style);
-            trash = Tokenizer_pop(self);
-            Py_XDECREF(trash);
-            self->head = reset;
-            return 0;
-        }
         if (!cell) {
             Py_DECREF(padding);
             Py_DECREF(style);
@@ -2801,6 +2758,23 @@ Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
 }
 
 /*
+    Return the stack in order to handle the table row end.
+*/
+static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
+{
+    return Tokenizer_pop(self);
+}
+
+/*
+    Return the stack in order to handle the table end.
+*/
+static PyObject* Tokenizer_handle_table_end(Tokenizer* self)
+{
+    self->head += 2;
+    return Tokenizer_pop(self);
+}
+
+/*
     Handle the end of the stream of wikitext.
 */
 static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
@@ -2819,9 +2793,16 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
             if (single)
                 return Tokenizer_handle_single_tag_end(self);
         }
-        else if (context & AGG_DOUBLE) {
-            trash = Tokenizer_pop(self);
-            Py_XDECREF(trash);
+        else {
+            if (context & LC_TABLE_CELL_OPEN) {
+                trash = Tokenizer_pop(self);
+                Py_XDECREF(trash);
+                context = self->topstack->context;
+            }
+            if (context & AGG_DOUBLE) {
+                trash = Tokenizer_pop(self);
+                Py_XDECREF(trash);
+            }
         }
         return Tokenizer_fail_route(self);
     }
@@ -3082,7 +3063,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
         // Start of table parsing
         else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) {
             if (Tokenizer_CAN_RECURSE(self)) {
-                if (Tokenizer_handle_table_start(self))
+                if (Tokenizer_parse_table(self))
                     return NULL;
             }
             else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
@@ -3197,7 +3178,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
     self->skip_style_tags = skip_style_tags;
     tokens = Tokenizer_parse(self, context, 1);
 
-    if (!tokens && !PyErr_Occurred()) {
+    if ((!tokens && !PyErr_Occurred()) || self->topstack) {
         if (!ParserError) {
             if (load_exceptions())
                 return NULL;
@@ -3206,6 +3187,9 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
             RESET_ROUTE();
             PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE");
         }
+        else if (self->topstack)
+            PyErr_SetString(ParserError,
+                            "C tokenizer exited with non-empty token stack");
         else
             PyErr_SetString(ParserError, "C tokenizer exited unexpectedly");
         return NULL;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 8d2d428..33ba0e1 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -175,7 +175,7 @@ static PyObject* TagCloseClose;
 
 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
-#define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
+#define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN)
 #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
 #define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK)
 
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 7921e7c..3ac25a5 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1009,8 +1009,8 @@ class Tokenizer(object):
         self._emit_text(tag)
         self._emit(tokens.TagCloseClose())
 
-    def _parse_as_table_style(self, end_token):
-        """Parse until ``end_token`` as style attributes for a table."""
+    def _handle_table_style(self, end_token):
+        """Handle style attributes for a table until ``end_token``."""
         data = _TagOpenData()
         data.context = _TagOpenData.CX_ATTR_READY
         while True:
@@ -1037,14 +1037,13 @@ class Tokenizer(object):
                 self._handle_tag_data(data, this)
             self._head += 1
 
-    def _handle_table_start(self):
-        """Handle the start of a table."""
+    def _parse_table(self):
+        """Parse a wikicode table by starting with the first line."""
         reset = self._head + 1
         self._head += 2
-
         self._push(contexts.TABLE_OPEN)
         try:
-            padding = self._parse_as_table_style("\n")
+            padding = self._handle_table_style("\n")
         except BadRoute:
             self._head = reset
             self._emit_text("{|")
@@ -1063,14 +1062,8 @@ class Tokenizer(object):
         # Offset displacement done by _parse():
         self._head -= 1
 
-    def _handle_table_end(self):
-        """Return the stack in order to handle the table end."""
-        self._head += 2
-        return self._pop()
-
     def _handle_table_row(self):
         """Parse as style until end of the line, then continue."""
-        reset = self._head
         self._head += 2
         if not self._can_recurse():
             self._emit_text("|-")
@@ -1079,67 +1072,47 @@ class Tokenizer(object):
 
         self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
         try:
-            padding = self._parse_as_table_style("\n")
+            padding = self._handle_table_style("\n")
         except BadRoute:
-            self._head = reset
             self._pop()
             raise
         style = self._pop()
 
         # Don't parse the style separator:
         self._head += 1
-        try:
-            row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
-        except BadRoute:
-            self._head = reset
-            self._pop()
-            raise
+        row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
 
         self._emit_table_tag("|-", "tr", style, padding, None, row, "")
         # Offset displacement done by parse():
         self._head -= 1
 
-    def _handle_table_row_end(self):
-        """Return the stack in order to handle the table row end."""
-        return self._pop()
-
     def _handle_table_cell(self, markup, tag, line_context):
         """Parse as normal syntax unless we hit a style marker, then parse
         style as HTML attributes and the remainder as normal syntax."""
         old_context = self._context
-        reset = self._head
-        reset_for_style, padding, style = False, "", None
+        padding, style = "", None
         self._head += len(markup)
+        reset = self._head
         if not self._can_recurse():
             self._emit_text(markup)
             self._head -= 1
             return
 
-        try:
-            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
-                               line_context | contexts.TABLE_CELL_STYLE)
-        except BadRoute:
-            self._head = reset
-            self._pop()
-            raise
+        cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
+                           line_context | contexts.TABLE_CELL_STYLE)
         cell_context = self._context
         self._context = old_context
         reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
         if reset_for_style:
-            self._head = reset + len(markup)
+            self._head = reset
             self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
                        line_context)
-            padding = self._parse_as_table_style("|")
+            padding = self._handle_table_style("|")
             style = self._pop()
             # Don't parse the style separator:
             self._head += 1
-            try:
-                cell = self._parse(contexts.TABLE_OPEN |
-                                   contexts.TABLE_CELL_OPEN | line_context)
-            except BadRoute:
-                self._head = reset
-                ret = self._pop()
-                raise
+            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
+                               line_context)
             cell_context = self._context
             self._context = old_context
 
@@ -1161,12 +1134,23 @@ class Tokenizer(object):
             self._context &= ~contexts.TABLE_CELL_STYLE
         return self._pop(keep_context=True)
 
+    def _handle_table_row_end(self):
+        """Return the stack in order to handle the table row end."""
+        return self._pop()
+
+    def _handle_table_end(self):
+        """Return the stack in order to handle the table end."""
+        self._head += 2
+        return self._pop()
+
     def _handle_end(self):
         """Handle the end of the stream of wikitext."""
         if self._context & contexts.FAIL:
             if self._context & contexts.TAG_BODY:
                 if is_single(self._stack[1].text):
                     return self._handle_single_tag_end()
+            if self._context & contexts.TABLE_CELL_OPEN:
+                self._pop()
             if self._context & contexts.DOUBLE:
                 self._pop()
             self._fail_route()
@@ -1327,19 +1311,19 @@ class Tokenizer(object):
             elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or
                     (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                 if self._can_recurse():
-                    self._handle_table_start()
+                    self._parse_table()
                 else:
                     self._emit_text("{|")
             elif self._context & contexts.TABLE_OPEN:
-                if this == "|" and next == "|" and self._context & contexts.TABLE_TD_LINE:
+                if this == next == "|" and self._context & contexts.TABLE_TD_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
                     self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE)
-                elif this == "|" and next == "|" and self._context & contexts.TABLE_TH_LINE:
+                elif this == next == "|" and self._context & contexts.TABLE_TH_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
                     self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE)
-                elif this == "!" and next == "!" and self._context & contexts.TABLE_TH_LINE:
+                elif this == next == "!" and self._context & contexts.TABLE_TH_LINE:
                     if self._context & contexts.TABLE_CELL_OPEN:
                         return self._handle_table_cell_end()
                     self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE)
@@ -1387,6 +1371,10 @@ class Tokenizer(object):
         self._text = [segment for segment in split if segment]
         self._head = self._global = self._depth = self._cycles = 0
         try:
-            return self._parse(context)
+            tokens = self._parse(context)
         except BadRoute:  # pragma: no cover (untestable/exceptional case)
             raise ParserError("Python tokenizer exited with BadRoute")
+        if self._stacks:  # pragma: no cover (untestable/exceptional case)
+            err = "Python tokenizer exited with non-empty token stack"
+            raise ParserError(err)
+        return tokens
diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest
index e042467..16012cf 100644
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -61,6 +61,13 @@ output: [Text(text="{| \n|- \n ")]
 
 ---
 
+name:   no_table_close_row_and_cell
+label:  no table close while inside a cell inside a row
+input:  "{| \n|- \n|"
+output: [Text(text="{| \n|- \n|")]
+
+---
+
 name:   no_table_close_attributes
 label:  don't parse attributes as attributes if the table doesn't exist
 input:  "{| border="1""

From a15f6172c09ee22aae4899547975eec4b2b0ced3 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 24 Oct 2014 03:43:22 -0500
Subject: [PATCH 085/102] Minor bugfix.

---
 mwparserfromhell/parser/tokenizer.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 301ecfc..38e3a4c 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2677,10 +2677,12 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
 {
     uint64_t old_context = self->topstack->context;
     uint64_t cell_context;
+    Py_ssize_t reset;
     PyObject *padding, *cell, *style = NULL;
     const char *close_open_markup = NULL;
+
     self->head += strlen(markup);
-    Py_ssize_t reset = self->head;
+    reset = self->head;
 
     if (!Tokenizer_CAN_RECURSE(self)) {
         if (Tokenizer_emit_text(self, markup))

From 0ae8460cb7a5c30383dec33ae8d045bb5f63a28b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 24 Oct 2014 16:33:50 -0500
Subject: [PATCH 086/102] Add changelog entry for roundtripping tests. [skip
 ci]

---
 CHANGELOG          | 2 ++
 docs/changelog.rst | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 3471531..848305d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -26,6 +26,8 @@ v0.4 (unreleased):
   an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags, and another involving comments in
   template names.
+- Added tests to ensure that parsed trees convert back to wikicode without
+  unintentional modifications.
 - Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index b3e7548..a04410f 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -37,6 +37,8 @@ Unreleased
   exception or allow an incorrect node tree to be build.
 - Fixed a parser bug involving nested tags, and another involving comments in
   template names.
+- Added tests to ensure that parsed trees convert back to wikicode without
+  unintentional modifications.
 - Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 

From 5f6afe7bb58b45baa6752f0c968577df6033943e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 24 Dec 2014 13:18:46 -0500
Subject: [PATCH 087/102] Fix version string to 0.4.dev0.

---
 mwparserfromhell/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 9c29fd2..287536a 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.4.dev"
+__version__ = "0.4.dev0"
 __email__ = "ben.kurtovic@gmail.com"
 
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,

From 47b44a973092c5de0cefedcce5d11f39f7652d5a Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 24 Dec 2014 13:19:24 -0500
Subject: [PATCH 088/102] Add a failing test for #89.

---
 tests/tokenizer/tags.mwtest | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest
index f979329..55b18f7 100644
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -611,3 +611,10 @@ name:   capitalization
 label:  caps should be ignored within tag names
 input:  "<NoWiKi>{{test}}</nOwIkI>"
 output: [TagOpenOpen(), Text(text="NoWiKi"), TagCloseOpen(padding=""), Text(text="{{test}}"), TagOpenClose(), Text(text="nOwIkI"), TagCloseClose()]
+
+---
+
+name:   unparsable_with_intermediates
+label:  an unparsable tag with intermediate tags inside of it
+input:  "<nowiki><ref></ref></nowiki>"
+output: [TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="<ref></ref>"), TagOpenClose(), Text(text="nowiki"), TagCloseClose()]

From a00c645bd8692efdb3a667a7dd8f3d7bc7e9da44 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Wed, 24 Dec 2014 14:38:50 -0500
Subject: [PATCH 089/102] Fix handling of tag closes within <nowiki> (fixes
 #89).

---
 mwparserfromhell/parser/tokenizer.c  | 69 ++++++++++++++++++++++++++++++------
 mwparserfromhell/parser/tokenizer.py | 14 ++++++--
 tests/tokenizer/tags.mwtest          | 14 ++++++++
 3 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 38e3a4c..7d07ed8 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -69,15 +69,19 @@ static int call_def_func(const char* funcname, PyObject* in1, PyObject* in2,
 /*
     Sanitize the name of a tag so it can be compared with others for equality.
 */
-static PyObject* strip_tag_name(PyObject* token)
+static PyObject* strip_tag_name(PyObject* token, int take_attr)
 {
     PyObject *text, *rstripped, *lowered;
 
-    text = PyObject_GetAttrString(token, "text");
-    if (!text)
-        return NULL;
-    rstripped = PyObject_CallMethod(text, "rstrip", NULL);
-    Py_DECREF(text);
+    if (take_attr) {
+        text = PyObject_GetAttrString(token, "text");
+        if (!text)
+            return NULL;
+        rstripped = PyObject_CallMethod(text, "rstrip", NULL);
+        Py_DECREF(text);
+    }
+    else
+        rstripped = PyObject_CallMethod(token, "rstrip", NULL);
     if (!rstripped)
         return NULL;
     lowered = PyObject_CallMethod(rstripped, "lower", NULL);
@@ -1812,8 +1816,9 @@ static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self)
                 valid = 0;
                 break;
             case 1: {
-                so = strip_tag_name(first);
-                sc = strip_tag_name(PyList_GET_ITEM(self->topstack->stack, 1));
+                so = strip_tag_name(first, 1);
+                sc = strip_tag_name(
+                    PyList_GET_ITEM(self->topstack->stack, 1), 1);
                 if (so && sc) {
                     if (PyUnicode_Compare(so, sc))
                         valid = 0;
@@ -1848,7 +1853,11 @@ static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self)
 */
 static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self)
 {
+    Textbuffer* buffer;
+    PyObject *buf_tmp, *end_tag, *start_tag;
     Py_UNICODE this, next;
+    Py_ssize_t reset;
+    int cmp;
 
     while (1) {
         this = Tokenizer_READ(self, 0);
@@ -1856,10 +1865,48 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self)
         if (!this)
             return Tokenizer_fail_route(self);
         else if (this == '<' && next == '/') {
-            if (Tokenizer_handle_tag_open_close(self))
+            self->head += 2;
+            reset = self->head - 1;
+            buffer = Textbuffer_new();
+            if (!buffer)
                 return NULL;
-            self->head++;
-            return Tokenizer_parse(self, 0, 0);
+            while ((this = Tokenizer_READ(self, 0))) {
+                if (this == '>') {
+                    buf_tmp = Textbuffer_render(buffer);
+                    if (!buf_tmp)
+                        return NULL;
+                    end_tag = strip_tag_name(buf_tmp, 0);
+                    Py_DECREF(buf_tmp);
+                    if (!end_tag)
+                        return NULL;
+                    start_tag = strip_tag_name(
+                        PyList_GET_ITEM(self->topstack->stack, 1), 1);
+                    if (!start_tag)
+                        return NULL;
+                    cmp = PyUnicode_Compare(start_tag, end_tag);
+                    Py_DECREF(end_tag);
+                    Py_DECREF(start_tag);
+                    if (cmp)
+                        goto no_matching_end;
+                    if (Tokenizer_emit(self, TagOpenClose))
+                        return NULL;
+                    if (Tokenizer_emit_textbuffer(self, buffer, 0))
+                        return NULL;
+                    if (Tokenizer_emit(self, TagCloseClose))
+                        return NULL;
+                    return Tokenizer_pop(self);
+                }
+                if (!this || this == '\n') {
+                    no_matching_end:
+                    Textbuffer_dealloc(buffer);
+                    self->head = reset;
+                    if (Tokenizer_emit_text(self, "</"))
+                        return NULL;
+                    break;
+                }
+                Textbuffer_write(&buffer, this);
+                self->head++;
+            }
         }
         else if (this == '&') {
             if (Tokenizer_parse_entity(self))
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 3ac25a5..607cc69 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -735,14 +735,22 @@ class Tokenizer(object):
 
     def _handle_blacklisted_tag(self):
         """Handle the body of an HTML tag that is parser-blacklisted."""
+        strip = lambda text: text.rstrip().lower()
         while True:
             this, next = self._read(), self._read(1)
             if this is self.END:
                 self._fail_route()
             elif this == "<" and next == "/":
-                self._handle_tag_open_close()
-                self._head += 1
-                return self._parse(push=False)
+                self._head += 3
+                if self._read() != ">" or (strip(self._read(-1)) !=
+                                           strip(self._stack[1].text)):
+                    self._head -= 1
+                    self._emit_text("</")
+                    continue
+                self._emit(tokens.TagOpenClose())
+                self._emit_text(self._read(-1))
+                self._emit(tokens.TagCloseClose())
+                return self._pop()
             elif this == "&":
                 self._parse_entity()
             else:
diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest
index 55b18f7..ff39f26 100644
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -614,7 +614,21 @@ output: [TagOpenOpen(), Text(text="NoWiKi"), TagCloseOpen(padding=""), Text(text
 
 ---
 
+name:   unparsable_incomplete_close
+label:  an unparsable tag with an incomplete close afterwards
+input:  "<nowiki>foo</nowiki"
+output: [Text(text="<nowiki>foo</nowiki")]
+
+---
+
 name:   unparsable_with_intermediates
 label:  an unparsable tag with intermediate tags inside of it
 input:  "<nowiki><ref></ref></nowiki>"
 output: [TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="<ref></ref>"), TagOpenClose(), Text(text="nowiki"), TagCloseClose()]
+
+---
+
+name:   unparsable_with_intermediates_normalize
+label:  an unparsable tag with intermediate tags inside of it, requiring normalization
+input:  "<nowiki><ref></ref></nowIKI  >"
+output: [TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="<ref></ref>"), TagOpenClose(), Text(text="nowIKI  "), TagCloseClose()]

From 53e92ae04c81678f034a456189501df556fe30d3 Mon Sep 17 00:00:00 2001
From: ricordisamoa <ricordisamoa@openmailbox.org>
Date: Wed, 24 Dec 2014 23:26:36 +0100
Subject: [PATCH 090/102] Update README.rst for Pywikibot core

The new version ('core') is much more used and developed
than the previous one ('compat').
---
 README.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index b6bf7e7..27b112a 100644
--- a/README.rst
+++ b/README.rst
@@ -121,13 +121,13 @@ Integration
 ``Page`` objects have a ``parse`` method that essentially calls
 ``mwparserfromhell.parse()`` on ``page.get()``.
 
-If you're using Pywikipedia_, your code might look like this::
+If you're using Pywikibot_, your code might look like this::
 
     import mwparserfromhell
-    import wikipedia as pywikibot
+    import pywikibot
 
     def parse(title):
-        site = pywikibot.getSite()
+        site = pywikibot.Site()
         page = pywikibot.Page(site, title)
         text = page.get()
         return mwparserfromhell.parse(text)
@@ -158,5 +158,5 @@ If you're not using a library, you can parse any page using the following code
 .. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat
 .. _get pip:                http://pypi.python.org/pypi/pip
 .. _EarwigBot:              https://github.com/earwig/earwigbot
-.. _Pywikipedia:            https://www.mediawiki.org/wiki/Manual:Pywikipediabot
+.. _Pywikibot:              https://www.mediawiki.org/wiki/Manual:Pywikibot
 .. _API:                    http://mediawiki.org/wiki/API

From 0f16d0c63ee024ea38391805bab7ad54b46bfd92 Mon Sep 17 00:00:00 2001
From: Kunal Mehta <legoktm@gmail.com>
Date: Sat, 27 Dec 2014 21:19:17 -0800
Subject: [PATCH 091/102] Target documentation for Python 3 usage

2 is dead, long live 3. Mainly turning print info a function
and urllib import fixes
---
 README.rst                   | 33 +++++++++++++++++----------------
 docs/integration.rst         |  4 ++--
 docs/usage.rst               | 36 ++++++++++++++++++------------------
 mwparserfromhell/wikicode.py |  2 +-
 4 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/README.rst b/README.rst
index 27b112a..93dee92 100644
--- a/README.rst
+++ b/README.rst
@@ -47,19 +47,19 @@ For example::
 
     >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
     >>> wikicode = mwparserfromhell.parse(text)
-    >>> print wikicode
+    >>> print(wikicode)
     I has a template! {{foo|bar|baz|eggs=spam}} See it?
     >>> templates = wikicode.filter_templates()
-    >>> print templates
+    >>> print(templates)
     ['{{foo|bar|baz|eggs=spam}}']
     >>> template = templates[0]
-    >>> print template.name
+    >>> print(template.name)
     foo
-    >>> print template.params
+    >>> print(template.params)
     ['bar', 'baz', 'eggs=spam']
-    >>> print template.get(1).value
+    >>> print(template.get(1).value)
     bar
-    >>> print template.get("eggs").value
+    >>> print(template.get("eggs").value)
     spam
 
 Since nodes can contain other nodes, getting nested templates is trivial::
@@ -73,14 +73,14 @@ templates manually. This is possible because nodes can contain additional
 ``Wikicode`` objects::
 
     >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
-    >>> print code.filter_templates(recursive=False)
+    >>> print(code.filter_templates(recursive=False))
     ['{{foo|this {{includes a|template}}}}']
     >>> foo = code.filter_templates(recursive=False)[0]
-    >>> print foo.get(1).value
+    >>> print(foo.get(1).value)
     this {{includes a|template}}
-    >>> print foo.get(1).value.filter_templates()[0]
+    >>> print(foo.get(1).value.filter_templates()[0])
     {{includes a|template}}
-    >>> print foo.get(1).value.filter_templates()[0].get(1).value
+    >>> print(foo.get(1).value.filter_templates()[0].get(1).value)
     template
 
 Templates can be easily modified to add, remove, or alter params. ``Wikicode``
@@ -95,19 +95,19 @@ whitespace::
     ...     if template.name.matches("Cleanup") and not template.has("date"):
     ...         template.add("date", "July 2012")
     ...
-    >>> print code
+    >>> print(code)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}
     >>> code.replace("{{uncategorized}}", "{{bar-stub}}")
-    >>> print code
+    >>> print(code)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
-    >>> print code.filter_templates()
+    >>> print(code.filter_templates())
     ['{{cleanup|date=July 2012}}', '{{bar-stub}}']
 
 You can then convert ``code`` back into a regular ``unicode`` object (for
 saving the page!) by calling ``unicode()`` on it::
 
     >>> text = unicode(code)
-    >>> print text
+    >>> print(text)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
     >>> text == code
     True
@@ -136,14 +136,15 @@ If you're not using a library, you can parse any page using the following code
 (via the API_)::
 
     import json
-    import urllib
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
     import mwparserfromhell
     API_URL = "http://en.wikipedia.org/w/api.php"
 
     def parse(title):
         data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                 "rvprop": "content", "format": "json", "titles": title}
-        raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read()
+        raw = urlopen(API_URL, urlencode(data).encode()).read()
         res = json.loads(raw)
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
         return mwparserfromhell.parse(text)
diff --git a/docs/integration.rst b/docs/integration.rst
index 102b3b9..f6f3610 100644
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -22,12 +22,12 @@ If you're not using a library, you can parse any page using the following code
 (via the API_)::
 
     import json
-    import urllib
+    import urllib.request
     import mwparserfromhell
     API_URL = "http://en.wikipedia.org/w/api.php"
 
     def parse(title):
-        raw = urllib.urlopen(API_URL, data).read()
+        raw = urllib.request.urlopen(API_URL, data).read()
         res = json.loads(raw)
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
         return mwparserfromhell.parse(text)
diff --git a/docs/usage.rst b/docs/usage.rst
index c471397..a1adfce 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -12,19 +12,19 @@ extra methods. For example::
 
     >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
     >>> wikicode = mwparserfromhell.parse(text)
-    >>> print wikicode
+    >>> print(wikicode)
     I has a template! {{foo|bar|baz|eggs=spam}} See it?
     >>> templates = wikicode.filter_templates()
-    >>> print templates
+    >>> print(templates)
     ['{{foo|bar|baz|eggs=spam}}']
     >>> template = templates[0]
-    >>> print template.name
+    >>> print(template.name)
     foo
-    >>> print template.params
+    >>> print(template.params)
     ['bar', 'baz', 'eggs=spam']
-    >>> print template.get(1).value
+    >>> print(template.get(1).value)
     bar
-    >>> print template.get("eggs").value
+    >>> print(template.get("eggs").value)
     spam
 
 Since nodes can contain other nodes, getting nested templates is trivial::
@@ -38,14 +38,14 @@ templates manually. This is possible because nodes can contain additional
 :class:`.Wikicode` objects::
 
     >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
-    >>> print code.filter_templates(recursive=False)
+    >>> print(code.filter_templates(recursive=False))
     ['{{foo|this {{includes a|template}}}}']
     >>> foo = code.filter_templates(recursive=False)[0]
-    >>> print foo.get(1).value
+    >>> print(foo.get(1).value)
     this {{includes a|template}}
-    >>> print foo.get(1).value.filter_templates()[0]
+    >>> print(foo.get(1).value.filter_templates()[0])
     {{includes a|template}}
-    >>> print foo.get(1).value.filter_templates()[0].get(1).value
+    >>> print(foo.get(1).value.filter_templates()[0].get(1).value)
     template
 
 Templates can be easily modified to add, remove, or alter params.
@@ -61,24 +61,24 @@ takes care of capitalization and whitespace::
     ...     if template.name.matches("Cleanup") and not template.has("date"):
     ...         template.add("date", "July 2012")
     ...
-    >>> print code
+    >>> print(code)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}
     >>> code.replace("{{uncategorized}}", "{{bar-stub}}")
-    >>> print code
+    >>> print(code)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
-    >>> print code.filter_templates()
+    >>> print(code.filter_templates())
     ['{{cleanup|date=July 2012}}', '{{bar-stub}}']
 
-You can then convert ``code`` back into a regular :class:`unicode` object (for
-saving the page!) by calling :func:`unicode` on it::
+You can then convert ``code`` back into a regular :class:`str` object (for
+saving the page!) by calling :func:`str` on it::
 
-    >>> text = unicode(code)
-    >>> print text
+    >>> text = str(code)
+    >>> print(text)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
     >>> text == code
     True
 
-(Likewise, use :func:`str(code) <str>` in Python 3.)
+(Likewise, use :func:`unicode(code) <unicode>` in Python 2.)
 
 For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and
 the :mod:`list of Nodes <.nodes>`.
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index c24bc5f..ebfa9c7 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -567,7 +567,7 @@ class Wikicode(StringMixIn):
         following::
 
             >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
-            >>> print mwparserfromhell.parse(text).get_tree()
+            >>> print(mwparserfromhell.parse(text).get_tree())
             Lorem ipsum
             {{
                   foo

From c9ef040a0fed403a7bfec826b3b0485ecb984d27 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 28 Dec 2014 15:41:25 -0500
Subject: [PATCH 092/102] lego missed a spot in #91

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 93dee92..ae338e7 100644
--- a/README.rst
+++ b/README.rst
@@ -42,7 +42,7 @@ Normal usage is rather straightforward (where ``text`` is page text)::
     >>> wikicode = mwparserfromhell.parse(text)
 
 ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an
-ordinary ``unicode`` object (or ``str`` in Python 3) with some extra methods.
+ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods.
 For example::
 
     >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"

From 77644ea0edbccadcd532cd932996a416105f38d5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 28 Dec 2014 15:42:04 -0500
Subject: [PATCH 093/102] lego missed a spot in #91

---
 docs/usage.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage.rst b/docs/usage.rst
index a1adfce..ee667fd 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -7,7 +7,7 @@ Normal usage is rather straightforward (where ``text`` is page text)::
     >>> wikicode = mwparserfromhell.parse(text)
 
 ``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which
-acts like an ordinary ``unicode`` object (or ``str`` in Python 3) with some
+acts like an ordinary ``str`` object (or ``unicode`` in Python 2) with some
 extra methods. For example::
 
     >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"

From c8b8cd6a605fdbd2d9cd73d4e9b486f7e3c883ac Mon Sep 17 00:00:00 2001
From: Kunal Mehta <legoktm@gmail.com>
Date: Sun, 28 Dec 2014 12:50:19 -0800
Subject: [PATCH 094/102] Another thing missed in #91

---
 README.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index ae338e7..45c7286 100644
--- a/README.rst
+++ b/README.rst
@@ -103,16 +103,16 @@ whitespace::
     >>> print(code.filter_templates())
     ['{{cleanup|date=July 2012}}', '{{bar-stub}}']
 
-You can then convert ``code`` back into a regular ``unicode`` object (for
-saving the page!) by calling ``unicode()`` on it::
+You can then convert ``code`` back into a regular ``str`` object (for
+saving the page!) by calling ``str()`` on it::
 
-    >>> text = unicode(code)
+    >>> text = str(code)
     >>> print(text)
     {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
     >>> text == code
     True
 
-Likewise, use ``str(code)`` in Python 3.
+Likewise, use ``unicode(code)`` in Python 2.
 
 Integration
 -----------

From d30222e126d86ed82f19ed091817391c139c62bc Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 28 Dec 2014 15:56:18 -0500
Subject: [PATCH 095/102] Fix integration docs based on README.md

---
 docs/integration.rst | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/docs/integration.rst b/docs/integration.rst
index f6f3610..bbd00bb 100644
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -7,13 +7,13 @@ Integration
 :func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on
 :meth:`~earwigbot.wiki.page.Page.get`.
 
-If you're using Pywikipedia_, your code might look like this::
+If you're using Pywikibot_, your code might look like this::
 
     import mwparserfromhell
-    import wikipedia as pywikibot
+    import pywikibot
 
     def parse(title):
-        site = pywikibot.getSite()
+        site = pywikibot.Site()
         page = pywikibot.Page(site, title)
         text = page.get()
         return mwparserfromhell.parse(text)
@@ -22,16 +22,19 @@ If you're not using a library, you can parse any page using the following code
 (via the API_)::
 
     import json
-    import urllib.request
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
     import mwparserfromhell
     API_URL = "http://en.wikipedia.org/w/api.php"
 
     def parse(title):
-        raw = urllib.request.urlopen(API_URL, data).read()
+        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
+                "rvprop": "content", "format": "json", "titles": title}
+        raw = urlopen(API_URL, urlencode(data).encode()).read()
         res = json.loads(raw)
         text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
         return mwparserfromhell.parse(text)
 
 .. _EarwigBot:            https://github.com/earwig/earwigbot
-.. _Pywikipedia:          https://www.mediawiki.org/wiki/Manual:Pywikipediabot
+.. _Pywikibot:            https://www.mediawiki.org/wiki/Manual:Pywikibot
 .. _API:                  http://mediawiki.org/wiki/API

From de325a0aea1f428a52639668d6cfa15cdac13e00 Mon Sep 17 00:00:00 2001
From: John Vandenberg <jayvdb@gmail.com>
Date: Sat, 10 Jan 2015 15:53:48 +0700
Subject: [PATCH 096/102] Issue #26 Use pure python on compilation failure

Allow the compilation of the extension to fail, and switch
to pure python mode.
---
 setup.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 68943ac..226c1cc 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+import os
 import sys
 
 if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \
@@ -39,7 +40,57 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer",
                       sources=["mwparserfromhell/parser/tokenizer.c"],
                       depends=["mwparserfromhell/parser/tokenizer.h"])
 
-setup(
+
+def optional_compile_setup(func=setup, use_ext=True, *args, **kwargs):
+    """
+    Wrap setup to allow optional compilation of extensions.
+
+    Falls back to pure python mode (no extensions)
+    if compilation of extensions fails.
+    """
+    extensions = kwargs.get('ext_modules', None)
+
+    if use_ext and extensions:
+        try:
+            func(*args, **kwargs)
+            return
+        except (Exception, SystemExit) as e:
+            print('Building extension failed: %s' % repr(e))
+
+    if extensions:
+        if use_ext:
+            print('Falling back to pure python mode.')
+        else:
+            print('Using pure python mode.')
+
+        del kwargs['ext_modules']
+
+        # Basic algorithm to push the extension sources into
+        # the package as data.
+        ext_files = [(ext, filename)
+                     for ext in extensions
+                     for filename in ext.sources + ext.depends]
+
+        pkg_data = kwargs.get('package_data', {})
+        for ext, filename in ext_files:
+            ext_name_parts = ext.name.split('.')
+            pkg_name = '.'.join(ext_name_parts[0:-1])
+            pkg = pkg_data.setdefault(pkg_name, [])
+            # This assumes the extension's package name
+            # is the same prefix as the filename.
+            pkg.append(os.path.basename(filename))
+
+        kwargs['package_data'] = pkg_data
+
+        # Ensure the extension package is in the main packages list.
+        for name in pkg_data.keys():
+            if name not in kwargs['packages']:
+                kwargs['packages'].append(name)
+
+    func(*args, **kwargs)
+
+
+optional_compile_setup(
     name = "mwparserfromhell",
     packages = find_packages(exclude=("tests",)),
     ext_modules = [tokenizer],

From a64bae35c9395389d2ef96adc37d828cd217b911 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 11 Jan 2015 23:57:28 -0500
Subject: [PATCH 097/102] Add support for a NOWEB env var, update docs.

---
 CHANGELOG          | 8 ++++++--
 docs/changelog.rst | 8 ++++++--
 tests/test_docs.py | 2 ++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 848305d..1e9801b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -24,10 +24,14 @@ v0.4 (unreleased):
 - If something goes wrong while parsing, ParserError will now be raised.
   Previously, the parser would produce an unclear BadRoute exception or allow
   an incorrect node tree to be build.
-- Fixed a parser bug involving nested tags, and another involving comments in
-  template names.
+- Fixed parser bugs involving:
+  - nested tags;
+  - comments in template names;
+  - tags inside of <nowiki> tags.
 - Added tests to ensure that parsed trees convert back to wikicode without
   unintentional modifications.
+- Added support for a NOWEB environment variable, which disables a unit test
+  that makes a web call.
 - Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index a04410f..7ab211b 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -35,10 +35,14 @@ Unreleased
 - If something goes wrong while parsing, :exc:`.ParserError` will now be
   raised. Previously, the parser would produce an unclear :exc:`.BadRoute`
   exception or allow an incorrect node tree to be build.
-- Fixed a parser bug involving nested tags, and another involving comments in
-  template names.
+- Fixed parser bugs involving:
+  - nested tags;
+  - comments in template names;
+  - tags inside of ``<nowiki>`` tags.
 - Added tests to ensure that parsed trees convert back to wikicode without
   unintentional modifications.
+- Added support for a :envvar:`NOWEB` environment variable, which disables a
+  unit test that makes a web call.
 - Test coverage has been improved, and some minor related bugs have been fixed.
 - Updated and fixed some documentation.
 
diff --git a/tests/test_docs.py b/tests/test_docs.py
index c873f0e..566a281 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -22,6 +22,7 @@
 
 from __future__ import print_function, unicode_literals
 import json
+import os
 
 try:
     import unittest2 as unittest
@@ -111,6 +112,7 @@ class TestDocs(unittest.TestCase):
         self.assertPrint(text, res)
         self.assertEqual(text, code)
 
+    @unittest.skipIf("NOWEB" in os.environ, "web test disabled by environ var")
     def test_readme_5(self):
         """test a block of example code in the README; includes a web call"""
         url1 = "http://en.wikipedia.org/w/api.php"

From 4e8ce523858fb5d8777b4ab2ee89635fa721a08f Mon Sep 17 00:00:00 2001
From: John Vandenberg <jayvdb@gmail.com>
Date: Mon, 12 Jan 2015 18:33:26 +1100
Subject: [PATCH 098/102] Support 'setup.py test' and test without extension

'setup.py test' also uses SystemExit, with args[0] as False.
Detect and re-raise.

Add support for building without extension even when compiler
is functional, and set up extension-less travis builds.
---
 .travis.yml |  5 +++++
 setup.py    | 59 ++++++++++++++++++++++++++++++++++-------------------------
 2 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c8dbb88..daa31ac 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,3 +12,8 @@ script:
     - coverage run --source=mwparserfromhell setup.py -q test
 after_success:
     - coveralls
+
+env:
+  matrix:
+    - WITHOUT_EXTENSION=0
+    - WITHOUT_EXTENSION=1
diff --git a/setup.py b/setup.py
index 226c1cc..761bb40 100644
--- a/setup.py
+++ b/setup.py
@@ -28,6 +28,9 @@ if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \
    (sys.version_info[1] == 3 and sys.version_info[1] < 2):
     raise Exception("mwparserfromhell needs Python 2.6+ or 3.2+")
 
+if sys.version_info >= (3, 0):
+    basestring = (str, )
+
 from setuptools import setup, find_packages, Extension
 
 from mwparserfromhell import __version__
@@ -40,8 +43,25 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer",
                       sources=["mwparserfromhell/parser/tokenizer.c"],
                       depends=["mwparserfromhell/parser/tokenizer.h"])
 
+use_extension=True
+
+# Allow env var WITHOUT_EXTENSION and args --with[out]-extension
+if '--without-extension' in sys.argv:
+    use_extension = False
+elif '--with-extension' in sys.argv:
+    pass
+elif os.environ.get('WITHOUT_EXTENSION', '0') == '1':
+    use_extension = False
+
+# Remove the command line argument as it isnt understood by
+# setuptools/distutils
+sys.argv = [arg for arg in sys.argv
+            if not arg.startswith('--with')
+            and not arg.endswith('-extension')]
 
-def optional_compile_setup(func=setup, use_ext=True, *args, **kwargs):
+
+def optional_compile_setup(func=setup, use_ext=use_extension,
+                           *args, **kwargs):
     """
     Wrap setup to allow optional compilation of extensions.
 
@@ -54,8 +74,19 @@ def optional_compile_setup(func=setup, use_ext=True, *args, **kwargs):
         try:
             func(*args, **kwargs)
             return
-        except (Exception, SystemExit) as e:
-            print('Building extension failed: %s' % repr(e))
+        except SystemExit as e:
+            assert(e.args)
+            if e.args[0] is False:
+                raise
+            elif isinstance(e.args[0], basestring):
+                if e.args[0].startswith('usage: '):
+                    raise
+                else:
+                    # Fallback to pure python mode
+                    print('setup with extension failed: %s' % repr(e))
+                    pass
+        except Exception as e:
+            print('setup with extension failed: %s' % repr(e))
 
     if extensions:
         if use_ext:
@@ -65,28 +96,6 @@ def optional_compile_setup(func=setup, use_ext=True, *args, **kwargs):
 
         del kwargs['ext_modules']
 
-        # Basic algorithm to push the extension sources into
-        # the package as data.
-        ext_files = [(ext, filename)
-                     for ext in extensions
-                     for filename in ext.sources + ext.depends]
-
-        pkg_data = kwargs.get('package_data', {})
-        for ext, filename in ext_files:
-            ext_name_parts = ext.name.split('.')
-            pkg_name = '.'.join(ext_name_parts[0:-1])
-            pkg = pkg_data.setdefault(pkg_name, [])
-            # This assumes the extension's package name
-            # is the same prefix as the filename.
-            pkg.append(os.path.basename(filename))
-
-        kwargs['package_data'] = pkg_data
-
-        # Ensure the extension package is in the main packages list.
-        for name in pkg_data.keys():
-            if name not in kwargs['packages']:
-                kwargs['packages'].append(name)
-
     func(*args, **kwargs)
 
 

From e71e7b4ece8c3185f9a0ae8a14ddc0995f470570 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 12 Jan 2015 02:40:40 -0500
Subject: [PATCH 099/102] Update copyright years for 2015; fix whitespace in
 docs.

---
 LICENSE                                    | 2 +-
 docs/changelog.rst                         | 2 ++
 docs/conf.py                               | 2 +-
 mwparserfromhell/__init__.py               | 4 ++--
 mwparserfromhell/definitions.py            | 2 +-
 mwparserfromhell/nodes/__init__.py         | 2 +-
 mwparserfromhell/nodes/argument.py         | 2 +-
 mwparserfromhell/nodes/comment.py          | 2 +-
 mwparserfromhell/nodes/external_link.py    | 2 +-
 mwparserfromhell/nodes/extras/__init__.py  | 2 +-
 mwparserfromhell/nodes/extras/attribute.py | 2 +-
 mwparserfromhell/nodes/extras/parameter.py | 2 +-
 mwparserfromhell/nodes/heading.py          | 2 +-
 mwparserfromhell/nodes/html_entity.py      | 2 +-
 mwparserfromhell/nodes/tag.py              | 2 +-
 mwparserfromhell/nodes/template.py         | 2 +-
 mwparserfromhell/nodes/text.py             | 2 +-
 mwparserfromhell/nodes/wikilink.py         | 2 +-
 mwparserfromhell/parser/__init__.py        | 2 +-
 mwparserfromhell/parser/builder.py         | 2 +-
 mwparserfromhell/parser/contexts.py        | 2 +-
 mwparserfromhell/parser/tokenizer.c        | 2 +-
 mwparserfromhell/parser/tokenizer.h        | 2 +-
 mwparserfromhell/parser/tokenizer.py       | 2 +-
 mwparserfromhell/parser/tokens.py          | 2 +-
 mwparserfromhell/smart_list.py             | 2 +-
 mwparserfromhell/string_mixin.py           | 2 +-
 mwparserfromhell/utils.py                  | 2 +-
 mwparserfromhell/wikicode.py               | 2 +-
 scripts/memtest.py                         | 2 +-
 setup.py                                   | 2 +-
 tests/_test_tokenizer.py                   | 2 +-
 tests/_test_tree_equality.py               | 2 +-
 tests/test_argument.py                     | 2 +-
 tests/test_attribute.py                    | 2 +-
 tests/test_builder.py                      | 2 +-
 tests/test_comment.py                      | 2 +-
 tests/test_ctokenizer.py                   | 2 +-
 tests/test_docs.py                         | 2 +-
 tests/test_external_link.py                | 2 +-
 tests/test_heading.py                      | 2 +-
 tests/test_html_entity.py                  | 2 +-
 tests/test_parameter.py                    | 2 +-
 tests/test_parser.py                       | 2 +-
 tests/test_pytokenizer.py                  | 2 +-
 tests/test_roundtripping.py                | 2 +-
 tests/test_smart_list.py                   | 2 +-
 tests/test_string_mixin.py                 | 2 +-
 tests/test_tag.py                          | 2 +-
 tests/test_template.py                     | 2 +-
 tests/test_text.py                         | 2 +-
 tests/test_tokens.py                       | 2 +-
 tests/test_utils.py                        | 2 +-
 tests/test_wikicode.py                     | 2 +-
 tests/test_wikilink.py                     | 2 +-
 55 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/LICENSE b/LICENSE
index 327905b..92f5e42 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 7ab211b..2285a82 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -36,9 +36,11 @@ Unreleased
   raised. Previously, the parser would produce an unclear :exc:`.BadRoute`
   exception or allow an incorrect node tree to be build.
 - Fixed parser bugs involving:
+
   - nested tags;
   - comments in template names;
   - tags inside of ``<nowiki>`` tags.
+
 - Added tests to ensure that parsed trees convert back to wikicode without
   unintentional modifications.
 - Added support for a :envvar:`NOWEB` environment variable, which disables a
diff --git a/docs/conf.py b/docs/conf.py
index dd1d6e1..3f82ea7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -42,7 +42,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'mwparserfromhell'
-copyright = u'2012, 2013, 2014 Ben Kurtovic'
+copyright = u'2012, 2013, 2014, 2015 Ben Kurtovic'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 287536a..1c50753 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -27,7 +27,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 """
 
 __author__ = "Ben Kurtovic"
-__copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic"
+__copyright__ = "Copyright (C) 2012, 2013, 2014, 2015 Ben Kurtovic"
 __license__ = "MIT License"
 __version__ = "0.4.dev0"
 __email__ = "ben.kurtovic@gmail.com"
diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py
index af41f49..e0ba16b 100644
--- a/mwparserfromhell/definitions.py
+++ b/mwparserfromhell/definitions.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py
index 8e71c8b..d0258ca 100644
--- a/mwparserfromhell/nodes/__init__.py
+++ b/mwparserfromhell/nodes/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py
index a595dfb..39c33ae 100644
--- a/mwparserfromhell/nodes/argument.py
+++ b/mwparserfromhell/nodes/argument.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py
index fcfd946..3e82be7 100644
--- a/mwparserfromhell/nodes/comment.py
+++ b/mwparserfromhell/nodes/comment.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py
index f98a1e5..a07e985 100644
--- a/mwparserfromhell/nodes/external_link.py
+++ b/mwparserfromhell/nodes/external_link.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py
index 7c0262b..854fa45 100644
--- a/mwparserfromhell/nodes/extras/__init__.py
+++ b/mwparserfromhell/nodes/extras/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py
index 7d296dc..7c7dd56 100644
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py
index 50c9ac0..48f610c 100644
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py
index 696b5ee..0db56f3 100644
--- a/mwparserfromhell/nodes/heading.py
+++ b/mwparserfromhell/nodes/heading.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 95f1492..e7f1bbc 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index e3c7260..cf3b4a5 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index a9b14aa..7cbeb7d 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py
index 55c714e..e793c1f 100644
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py
index f9c221c..88eaacc 100644
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py
index 36cb511..ae13c76 100644
--- a/mwparserfromhell/parser/__init__.py
+++ b/mwparserfromhell/parser/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index decbe60..ad29f4d 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 17912cb..e98d8f7 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 7d07ed8..c125021 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1,6 +1,6 @@
 /*
 Tokenizer for MWParserFromHell
-Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 33ba0e1..842e65d 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -1,6 +1,6 @@
 /*
 Tokenizer Header File for MWParserFromHell
-Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 607cc69..36c83e1 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index 2e38a1c..4668780 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index b4cfd1b..c552050 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index 8da8692..01809a7 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index 8f518a6..28823fc 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index ebfa9c7..c623971 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/scripts/memtest.py b/scripts/memtest.py
index e6b8011..824d992 100644
--- a/scripts/memtest.py
+++ b/scripts/memtest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/setup.py b/setup.py
index 68943ac..310b616 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 17d588b..1cbbc3d 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index bb713c2..086f113 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_argument.py b/tests/test_argument.py
index 3539ec4..70d8006 100644
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_attribute.py b/tests/test_attribute.py
index 15e546d..b3e325d 100644
--- a/tests/test_attribute.py
+++ b/tests/test_attribute.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_builder.py b/tests/test_builder.py
index d4e6f73..9af4f21 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_comment.py b/tests/test_comment.py
index cac8719..ad13f4a 100644
--- a/tests/test_comment.py
+++ b/tests/test_comment.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 52427e3..0d37485 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 566a281..d50e90e 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_external_link.py b/tests/test_external_link.py
index c81470e..5137247 100644
--- a/tests/test_external_link.py
+++ b/tests/test_external_link.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_heading.py b/tests/test_heading.py
index 7c7a7ee..effc03b 100644
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index 3df596a..a13fd71 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_parameter.py b/tests/test_parameter.py
index 2a4bb75..71b298c 100644
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 955f455..6885c37 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py
index 40e2caf..f009c14 100644
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_roundtripping.py b/tests/test_roundtripping.py
index 5360387..5c64535 100644
--- a/tests/test_roundtripping.py
+++ b/tests/test_roundtripping.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 13d96d2..a7106e4 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index bc44f55..09e2e63 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 3beea98..0f0040a 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_template.py b/tests/test_template.py
index e015a6a..7ba3f64 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_text.py b/tests/test_text.py
index ee2e5c7..9093824 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 3efce86..98f9a56 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ddcc078..a9d4119 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 7a30a75..d97830c 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
index 1bdc907..e95cd84 100644
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal

From ef18166c1240002def94295b4ea0bc046e186086 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 12 Jan 2015 18:26:13 -0500
Subject: [PATCH 100/102] Update changelog following #94.

---
 .travis.yml        | 1 -
 CHANGELOG          | 5 +++--
 docs/changelog.rst | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index daa31ac..07dab97 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,6 @@ script:
     - coverage run --source=mwparserfromhell setup.py -q test
 after_success:
     - coveralls
-
 env:
   matrix:
     - WITHOUT_EXTENSION=0
diff --git a/CHANGELOG b/CHANGELOG
index 1e9801b..584ade4 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,8 @@
 v0.4 (unreleased):
 
-- The parser is now distributed with Windows binaries, fixing an issue that
-  prevented Windows users from using the C tokenizer.
+- The parser is now distributed with Windows binaries, and falls back on a pure
+  Python mode if C extensions cannot be built. This fixes an issue that
+  prevented some Windows users from installing the parser.
 - Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in scripts/memtest.py.
 - Added a script to do releases in scripts/release.sh.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 2285a82..16963b0 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,8 +7,9 @@ v0.4
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.3...develop>`__):
 
-- The parser is now distributed with Windows binaries, fixing an issue that
-  prevented Windows users from using the C tokenizer.
+- The parser is now distributed with Windows binaries, and falls back on a pure
+  Python mode if C extensions cannot be built. This fixes an issue that
+  prevented some Windows users from installing the parser.
 - Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.

From 432da1260f3ec2ae876b4de8cc02052dd43d1f7e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 23 May 2015 23:29:15 -0400
Subject: [PATCH 101/102] Changelog update for 0.4.

---
 CHANGELOG          | 6 +++---
 docs/changelog.rst | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 584ade4..0ab103a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,8 +1,8 @@
 v0.4 (unreleased):
 
-- The parser is now distributed with Windows binaries, and falls back on a pure
-  Python mode if C extensions cannot be built. This fixes an issue that
-  prevented some Windows users from installing the parser.
+- The parser now falls back on pure Python mode if C extensions cannot be
+  built. This fixes an issue that prevented some Windows users from installing
+  the parser.
 - Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in scripts/memtest.py.
 - Added a script to do releases in scripts/release.sh.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 16963b0..9811b5c 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,9 +7,9 @@ v0.4
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.3...develop>`__):
 
-- The parser is now distributed with Windows binaries, and falls back on a pure
-  Python mode if C extensions cannot be built. This fixes an issue that
-  prevented some Windows users from installing the parser.
+- The parser now falls back on pure Python mode if C extensions cannot be
+  built. This fixes an issue that prevented some Windows users from installing
+  the parser.
 - Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.

From 8f5f9b402db36aeb157318137972bd2196a4c19e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 23 May 2015 23:35:42 -0400
Subject: [PATCH 102/102] release/0.4

---
 mwparserfromhell/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 1c50753..94b6e03 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014, 2015 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.4.dev0"
+__version__ = "0.4"
 __email__ = "ben.kurtovic@gmail.com"
 
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,