From d54509e190faf97e8adda385a47d09a7ab15901c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 30 Dec 2016 04:15:16 -0500
Subject: [PATCH 01/24] Fix release script.

---
 CHANGELOG          | 4 ++++
 docs/changelog.rst | 8 ++++++++
 scripts/release.sh | 6 +++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 053b37e..05b64ef 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+v0.5 (unreleased):
+
+- Fixed release script after changes to PyPI.
+
 v0.4.4 (released December 30, 2016):
 
 - Added support for Python 3.6.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 43400a9..ec12e6d 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,14 @@
 Changelog
 =========
 
+v0.5
+----
+
+Unreleased
+(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...develop>`__):
+
+- Fixed release script after changes to PyPI.
+
 v0.4.4
 ------
 
diff --git a/scripts/release.sh b/scripts/release.sh
index 1171718..4f1e9b0 100755
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -117,11 +117,11 @@ test_release() {
     fi
     pip -q uninstall -y mwparserfromhell
     echo -n "Downloading mwparserfromhell source tarball and GPG signature..."
-    curl -sL "https://pypi.python.org/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz" -o "mwparserfromhell.tar.gz"
-    curl -sL "https://pypi.python.org/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz.asc" -o "mwparserfromhell.tar.gz.asc"
+    curl -sL "https://pypi.io/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz" -o "mwparserfromhell.tar.gz"
+    curl -sL "https://pypi.io/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz.asc" -o "mwparserfromhell.tar.gz.asc"
     echo " done."
     echo "Verifying tarball..."
-    gpg --verify mwparserfromhell.tar.gz.asc
+    gpg --verify mwparserfromhell.tar.gz.asc mwparserfromhell.tar.gz
     if [[ "$?" != "0" ]]; then
         echo "*** ERROR: GPG signature verification failed!"
         deactivate

From f34f662f35075cd51c893979c2353ccf92e7c6a1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Tue, 10 Jan 2017 02:34:21 -0500
Subject: [PATCH 02/24] Fix len() sometimes raising ValueError on empty node
 lists (fixes #174)

---
 CHANGELOG                      | 1 +
 docs/changelog.rst             | 1 +
 mwparserfromhell/smart_list.py | 2 +-
 tests/test_smart_list.py       | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 05b64ef..f3728dd 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,6 @@
 v0.5 (unreleased):
 
+- Fixed len() sometimes raising ValueError on empty node lists.
 - Fixed release script after changes to PyPI.
 
 v0.4.4 (released December 30, 2016):
diff --git a/docs/changelog.rst b/docs/changelog.rst
index ec12e6d..edf5ab9 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,6 +7,7 @@ v0.5
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...develop>`__):
 
+- Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists.
 - Fixed release script after changes to PyPI.
 
 v0.4.4
diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py
index c59a363..e7fa59f 100644
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -271,7 +271,7 @@ class _ListProxy(_SliceNormalizerMixIn, list):
             return bool(self._render())
 
     def __len__(self):
-        return (self._stop - self._start) // self._step
+        return max((self._stop - self._start) // self._step, 0)
 
     def __getitem__(self, key):
         if isinstance(key, slice):
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index 0330aed..3de7db7 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -398,6 +398,7 @@ class TestSmartList(unittest.TestCase):
         self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6], child1)
         self.assertEqual([4, 3, 2, 1.9, 1.8], child2)
         self.assertEqual([], child3)
+        self.assertEqual(0, len(child3))
 
         del child1
         self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6], parent)

From 120d6a036607d911a58527ae43c789aa0cc348ed Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 14 Jan 2017 23:32:05 -0600
Subject: [PATCH 03/24] Fix Wikicode.matches behavior on non-list/tuple
 iterables.

---
 CHANGELOG                    |  1 +
 docs/changelog.rst           |  1 +
 mwparserfromhell/wikicode.py | 27 ++++++++++++++-------------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index f3728dd..4988112 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,6 @@
 v0.5 (unreleased):
 
+- Fixed Wikicode.matches() on iterables besides lists and tuples.
 - Fixed len() sometimes raising ValueError on empty node lists.
 - Fixed release script after changes to PyPI.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index edf5ab9..e1e8ac8 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,6 +7,7 @@ v0.5
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...develop>`__):
 
+- Fixed :meth:`.Wikicode.matches` on iterables besides lists and tuples.
 - Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists.
 - Fixed release script after changes to PyPI.
 
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index e3f6b92..447f6ff 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ from __future__ import unicode_literals
 from itertools import chain
 import re
 
-from .compat import py3k, range, str
+from .compat import bytes, py3k, range, str
 from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity,
                     Node, Tag, Template, Text, Wikilink)
 from .string_mixin import StringMixIn
@@ -413,22 +413,23 @@ class Wikicode(StringMixIn):
         """Do a loose equivalency test suitable for comparing page names.
 
         *other* can be any string-like object, including :class:`.Wikicode`, or
-        a tuple of these. This operation is symmetric; both sides are adjusted.
-        Specifically, whitespace and markup is stripped and the first letter's
-        case is normalized. Typical usage is
+        an iterable of these. This operation is symmetric; both sides are
+        adjusted. Specifically, whitespace and markup is stripped and the first
+        letter's case is normalized. Typical usage is
         ``if template.name.matches("stub"): ...``.
         """
         cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:]
                             if a and b else a == b)
         this = self.strip_code().strip()
-        if isinstance(other, (tuple, list)):
-            for obj in other:
-                that = parse_anything(obj).strip_code().strip()
-                if cmp(this, that):
-                    return True
-            return False
-        that = parse_anything(other).strip_code().strip()
-        return cmp(this, that)
+        if isinstance(other, (str, bytes, Wikicode, Node)):
+            that = parse_anything(other).strip_code().strip()
+            return cmp(this, that)
+
+        for obj in other:
+            that = parse_anything(obj).strip_code().strip()
+            if cmp(this, that):
+                return True
+        return False
 
     def ifilter(self, recursive=True, matches=None, flags=FLAGS,
                 forcetype=None):

From 6159171e0464428a8568566d34e2ebffee413530 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sun, 15 Jan 2017 04:12:34 -0600
Subject: [PATCH 04/24] Make Template.remove(keep_field=True) slightly more
 reasonable.

---
 CHANGELOG                          |  4 +++-
 docs/changelog.rst                 |  5 ++++-
 mwparserfromhell/nodes/template.py | 34 +++++++++++++++++++++-------------
 tests/test_template.py             |  3 +++
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 4988112..5b592cd 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 v0.5 (unreleased):
 
-- Fixed Wikicode.matches() on iterables besides lists and tuples.
+- Made Template.remove(keep_field=True) behave more reasonably when the
+  parameter is already empty.
+- Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples.
 - Fixed len() sometimes raising ValueError on empty node lists.
 - Fixed release script after changes to PyPI.
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index e1e8ac8..bf0f492 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,7 +7,10 @@ v0.5
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...develop>`__):
 
-- Fixed :meth:`.Wikicode.matches` on iterables besides lists and tuples.
+- Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more
+  reasonably when the parameter is already empty.
+- Fixed :meth:`.Wikicode.matches`\ 's behavior on iterables besides lists and
+  tuples.
 - Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists.
 - Fixed release script after changes to PyPI.
 
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 57fec70..ccc63fd 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -70,7 +70,8 @@ class Template(Node):
             get(param.value)
         write("}}")
 
-    def _surface_escape(self, code, char):
+    @staticmethod
+    def _surface_escape(code, char):
         """Return *code* with *char* escaped as an HTML entity.
 
         The main use of this is to escape pipes (``|``) or equal signs (``=``)
@@ -82,7 +83,8 @@ class Template(Node):
             if char in node:
                 code.replace(node, node.replace(char, replacement), False)
 
-    def _select_theory(self, theories):
+    @staticmethod
+    def _select_theory(theories):
         """Return the most likely spacing convention given different options.
 
         Given a dictionary of convention options as keys and their occurrence
@@ -96,6 +98,22 @@ class Template(Node):
             if confidence >= 0.75:
                 return tuple(theories.keys())[values.index(best)]
 
+    @staticmethod
+    def _blank_param_value(value):
+        """Remove the content from *value* while keeping its whitespace.
+
+        Replace *value*\ 's nodes with two text nodes, the first containing
+        whitespace from before its content and the second containing whitespace
+        from after its content.
+        """
+        sval = str(value)
+        if sval.isspace():
+            before, after = "", sval
+        else:
+            match = re.search(r"^(\s*).*?(\s*)$", sval, FLAGS)
+            before, after = match.group(1), match.group(2)
+        value.nodes = [Text(before), Text(after)]
+
     def _get_spacing_conventions(self, use_names):
         """Try to determine the whitespace conventions for parameters.
 
@@ -119,16 +137,6 @@ class Template(Node):
         after = self._select_theory(after_theories)
         return before, after
 
-    def _blank_param_value(self, value):
-        """Remove the content from *value* while keeping its whitespace.
-
-        Replace *value*\ 's nodes with two text nodes, the first containing
-        whitespace from before its content and the second containing whitespace
-        from after its content.
-        """
-        match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
-        value.nodes = [Text(match.group(1)), Text(match.group(2))]
-
     def _fix_dependendent_params(self, i):
         """Unhide keys if necessary after removing the param at index *i*."""
         if not self.params[i].showkey:
diff --git a/tests/test_template.py b/tests/test_template.py
index c306b60..a97d6de 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -216,6 +216,7 @@ class TestTemplate(TreeEqualityTestCase):
         node39 = Template(wraptext("a"), [pgenh("1", " b ")])
         node40 = Template(wraptext("a"), [pgenh("1", " b"), pgenh("2", " c")])
         node41 = Template(wraptext("a"), [pgens("1", " b"), pgens("2", " c")])
+        node42 = Template(wraptext("a"), [pgens("b", "  \n")])
 
         node1.add("e", "f", showkey=True)
         node2.add(2, "g", showkey=False)
@@ -261,6 +262,7 @@ class TestTemplate(TreeEqualityTestCase):
         node39.add("1", "c")
         node40.add("3", "d")
         node41.add("3", "d")
+        node42.add("b", "hello")
 
         self.assertEqual("{{a|b=c|d|e=f}}", node1)
         self.assertEqual("{{a|b=c|d|g}}", node2)
@@ -308,6 +310,7 @@ class TestTemplate(TreeEqualityTestCase):
         self.assertEqual("{{a|c}}", node39)
         self.assertEqual("{{a| b| c|d}}", node40)
         self.assertEqual("{{a|1= b|2= c|3= d}}", node41)
+        self.assertEqual("{{a|b=hello  \n}}", node42)
 
     def test_remove(self):
         """test Template.remove()"""

From 6ffdfa52efdde478d667add0b850742a084c9838 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 3 Mar 2017 20:42:23 -0600
Subject: [PATCH 05/24] Allow Wikicode objects to be pickled properly.

---
 CHANGELOG                        | 2 ++
 docs/changelog.rst               | 3 +++
 mwparserfromhell/string_mixin.py | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 5b592cd..4480035 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ v0.5 (unreleased):
 
 - Made Template.remove(keep_field=True) behave more reasonably when the
   parameter is already empty.
+- Wikicode objects can now be pickled properly (fixed infinite recursion error
+  on incompletely-constructed StringMixIn subclasses).
 - Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples.
 - Fixed len() sometimes raising ValueError on empty node lists.
 - Fixed release script after changes to PyPI.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index bf0f492..669b448 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,6 +9,9 @@ Unreleased
 
 - Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more
   reasonably when the parameter is already empty.
+- :class:`.Wikicode` objects can now be pickled properly (fixed infinite
+  recursion error on incompletely-constructed :class:`.StringMixIn`
+  subclasses).
 - Fixed :meth:`.Wikicode.matches`\ 's behavior on iterables besides lists and
   tuples.
 - Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists.
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index b5ba5a4..88898a1 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -108,6 +108,9 @@ class StringMixIn(object):
         return str(item) in self.__unicode__()
 
     def __getattr__(self, attr):
+        if not hasattr(str, attr):
+            raise AttributeError("{0!r} object has no attribute {1!r}".format(
+                type(self).__name__, attr))
         return getattr(self.__unicode__(), attr)
 
     if py3k:

From 68ded2f890c7965cc560471602f5cdad5ca435bc Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 18 Mar 2017 23:43:30 -0400
Subject: [PATCH 06/24] Add keep_template_params to Wikicode.strip_code (#175)

---
 CHANGELOG                               |  2 ++
 docs/changelog.rst                      |  2 ++
 mwparserfromhell/nodes/__init__.py      |  2 +-
 mwparserfromhell/nodes/argument.py      |  4 ++--
 mwparserfromhell/nodes/external_link.py |  6 +++---
 mwparserfromhell/nodes/heading.py       |  4 ++--
 mwparserfromhell/nodes/html_entity.py   |  4 ++--
 mwparserfromhell/nodes/tag.py           |  4 ++--
 mwparserfromhell/nodes/template.py      |  6 ++++++
 mwparserfromhell/nodes/text.py          |  2 +-
 mwparserfromhell/nodes/wikilink.py      |  6 +++---
 mwparserfromhell/wikicode.py            | 20 +++++++++++++++-----
 tests/test_argument.py                  |  8 +++-----
 tests/test_comment.py                   |  4 +---
 tests/test_external_link.py             | 11 +++++------
 tests/test_heading.py                   |  4 +---
 tests/test_html_entity.py               | 14 +++++++-------
 tests/test_tag.py                       |  9 ++++-----
 tests/test_template.py                  | 19 +++++++++++++------
 tests/test_text.py                      |  4 +---
 tests/test_wikicode.py                  |  5 ++++-
 tests/test_wikilink.py                  |  6 ++----
 22 files changed, 82 insertions(+), 64 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 4480035..3832524 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ v0.5 (unreleased):
 
 - Made Template.remove(keep_field=True) behave more reasonably when the
   parameter is already empty.
+- Added the keep_template_params argument to Wikicode.strip_code(). If True,
+  then template parameters will be preserved in the output.
 - Wikicode objects can now be pickled properly (fixed infinite recursion error
   on incompletely-constructed StringMixIn subclasses).
 - Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 669b448..2c6be16 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,6 +9,8 @@ Unreleased
 
 - Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more
   reasonably when the parameter is already empty.
+- Added the *keep_template_params* argument to :meth:`.Wikicode.strip_code`.
+  If *True*, then template parameters will be preserved in the output.
 - :class:`.Wikicode` objects can now be pickled properly (fixed infinite
   recursion error on incompletely-constructed :class:`.StringMixIn`
   subclasses).
diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py
index 91678c8..17ad3c3 100644
--- a/mwparserfromhell/nodes/__init__.py
+++ b/mwparserfromhell/nodes/__init__.py
@@ -58,7 +58,7 @@ class Node(StringMixIn):
         return
         yield  # pragma: no cover (this is a generator that yields nothing)
 
-    def __strip__(self, normalize, collapse):
+    def __strip__(self, **kwargs):
         return None
 
     def __showtree__(self, write, get, mark):
diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py
index 9146704..4259a35 100644
--- a/mwparserfromhell/nodes/argument.py
+++ b/mwparserfromhell/nodes/argument.py
@@ -47,9 +47,9 @@ class Argument(Node):
         if self.default is not None:
             yield self.default
 
-    def __strip__(self, normalize, collapse):
+    def __strip__(self, **kwargs):
         if self.default is not None:
-            return self.default.strip_code(normalize, collapse)
+            return self.default.strip_code(**kwargs)
         return None
 
     def __showtree__(self, write, get, mark):
diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py
index 8493a25..f2659ab 100644
--- a/mwparserfromhell/nodes/external_link.py
+++ b/mwparserfromhell/nodes/external_link.py
@@ -49,12 +49,12 @@ class ExternalLink(Node):
         if self.title is not None:
             yield self.title
 
-    def __strip__(self, normalize, collapse):
+    def __strip__(self, **kwargs):
         if self.brackets:
             if self.title:
-                return self.title.strip_code(normalize, collapse)
+                return self.title.strip_code(**kwargs)
             return None
-        return self.url.strip_code(normalize, collapse)
+        return self.url.strip_code(**kwargs)
 
     def __showtree__(self, write, get, mark):
         if self.brackets:
diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py
index 7bba702..79f3364 100644
--- a/mwparserfromhell/nodes/heading.py
+++ b/mwparserfromhell/nodes/heading.py
@@ -42,8 +42,8 @@ class Heading(Node):
     def __children__(self):
         yield self.title
 
-    def __strip__(self, normalize, collapse):
-        return self.title.strip_code(normalize, collapse)
+    def __strip__(self, **kwargs):
+        return self.title.strip_code(**kwargs)
 
     def __showtree__(self, write, get, mark):
         write("=" * self.level)
diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 8b7f270..d5e9d73 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -58,8 +58,8 @@ class HTMLEntity(Node):
             return "&#{0}{1};".format(self.hex_char, self.value)
         return "&#{0};".format(self.value)
 
-    def __strip__(self, normalize, collapse):
-        if normalize:
+    def __strip__(self, **kwargs):
+        if kwargs.get("normalize"):
             return self.normalize()
         return self
 
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index d393e2c..f0611a6 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -98,9 +98,9 @@ class Tag(Node):
         if not self.self_closing and not self.wiki_markup and self.closing_tag:
             yield self.closing_tag
 
-    def __strip__(self, normalize, collapse):
+    def __strip__(self, **kwargs):
         if self.contents and is_visible(self.tag):
-            return self.contents.strip_code(normalize, collapse)
+            return self.contents.strip_code(**kwargs)
         return None
 
     def __showtree__(self, write, get, mark):
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index ccc63fd..9c89fbd 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -58,6 +58,12 @@ class Template(Node):
                 yield param.name
             yield param.value
 
+    def __strip__(self, **kwargs):
+        if kwargs.get("keep_template_params"):
+            parts = [param.value.strip_code(**kwargs) for param in self.params]
+            return " ".join(part for part in parts if part)
+        return None
+
     def __showtree__(self, write, get, mark):
         write("{{")
         get(self.name)
diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py
index 08ac205..a49930f 100644
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -37,7 +37,7 @@ class Text(Node):
     def __unicode__(self):
         return self.value
 
-    def __strip__(self, normalize, collapse):
+    def __strip__(self, **kwargs):
         return self
 
     def __showtree__(self, write, get, mark):
diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py
index f71b5f6..8f4bf7d 100644
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -46,10 +46,10 @@ class Wikilink(Node):
         if self.text is not None:
             yield self.text
 
-    def __strip__(self, normalize, collapse):
+    def __strip__(self, **kwargs):
         if self.text is not None:
-            return self.text.strip_code(normalize, collapse)
-        return self.title.strip_code(normalize, collapse)
+            return self.text.strip_code(**kwargs)
+        return self.title.strip_code(**kwargs)
 
     def __showtree__(self, write, get, mark):
         write("[[")
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 447f6ff..73aea41 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -531,23 +531,33 @@ class Wikicode(StringMixIn):
         # Ensure that earlier sections are earlier in the returned list:
         return [section for i, section in sorted(sections)]
 
-    def strip_code(self, normalize=True, collapse=True):
+    def strip_code(self, normalize=True, collapse=True,
+                   keep_template_params=False):
         """Return a rendered string without unprintable code such as templates.
 
         The way a node is stripped is handled by the
         :meth:`~.Node.__strip__` method of :class:`.Node` objects, which
         generally return a subset of their nodes or ``None``. For example,
         templates and tags are removed completely, links are stripped to just
-        their display part, headings are stripped to just their title. If
-        *normalize* is ``True``, various things may be done to strip code
+        their display part, headings are stripped to just their title.
+
+        If *normalize* is ``True``, various things may be done to strip code
         further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
         and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
         remove excess whitespace as well (three or more newlines are converted
-        to two, for example).
+        to two, for example). If *keep_template_params* is ``True``, then
+        template parameters will be preserved in the output (normally, they are
+        removed completely).
         """
+        kwargs = {
+            "normalize": normalize,
+            "collapse": collapse,
+            "keep_template_params": keep_template_params
+        }
+
         nodes = []
         for node in self.nodes:
-            stripped = node.__strip__(normalize, collapse)
+            stripped = node.__strip__(**kwargs)
             if stripped:
                 nodes.append(str(stripped))
 
diff --git a/tests/test_argument.py b/tests/test_argument.py
index de12eab..6209b2f 100644
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -56,12 +56,10 @@ class TestArgument(TreeEqualityTestCase):
 
     def test_strip(self):
         """test Argument.__strip__()"""
-        node = Argument(wraptext("foobar"))
+        node1 = Argument(wraptext("foobar"))
         node2 = Argument(wraptext("foo"), wraptext("bar"))
-        for a in (True, False):
-            for b in (True, False):
-                self.assertIs(None, node.__strip__(a, b))
-                self.assertEqual("bar", node2.__strip__(a, b))
+        self.assertIs(None, node1.__strip__())
+        self.assertEqual("bar", node2.__strip__())
 
     def test_showtree(self):
         """test Argument.__showtree__()"""
diff --git a/tests/test_comment.py b/tests/test_comment.py
index 97a6503..27129c9 100644
--- a/tests/test_comment.py
+++ b/tests/test_comment.py
@@ -49,9 +49,7 @@ class TestComment(TreeEqualityTestCase):
     def test_strip(self):
         """test Comment.__strip__()"""
         node = Comment("foobar")
-        for a in (True, False):
-            for b in (True, False):
-                self.assertIs(None, node.__strip__(a, b))
+        self.assertIs(None, node.__strip__())
 
     def test_showtree(self):
         """test Comment.__showtree__()"""
diff --git a/tests/test_external_link.py b/tests/test_external_link.py
index 3432ae1..8cb3158 100644
--- a/tests/test_external_link.py
+++ b/tests/test_external_link.py
@@ -66,12 +66,11 @@ class TestExternalLink(TreeEqualityTestCase):
         node2 = ExternalLink(wraptext("http://example.com"))
         node3 = ExternalLink(wraptext("http://example.com"), wrap([]))
         node4 = ExternalLink(wraptext("http://example.com"), wraptext("Link"))
-        for a in (True, False):
-            for b in (True, False):
-                self.assertEqual("http://example.com", node1.__strip__(a, b))
-                self.assertEqual(None, node2.__strip__(a, b))
-                self.assertEqual(None, node3.__strip__(a, b))
-                self.assertEqual("Link", node4.__strip__(a, b))
+
+        self.assertEqual("http://example.com", node1.__strip__())
+        self.assertEqual(None, node2.__strip__())
+        self.assertEqual(None, node3.__strip__())
+        self.assertEqual("Link", node4.__strip__())
 
     def test_showtree(self):
         """test ExternalLink.__showtree__()"""
diff --git a/tests/test_heading.py b/tests/test_heading.py
index cb7ac8b..5e6776a 100644
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -52,9 +52,7 @@ class TestHeading(TreeEqualityTestCase):
     def test_strip(self):
         """test Heading.__strip__()"""
         node = Heading(wraptext("foobar"), 3)
-        for a in (True, False):
-            for b in (True, False):
-                self.assertEqual("foobar", node.__strip__(a, b))
+        self.assertEqual("foobar", node.__strip__())
 
     def test_showtree(self):
         """test Heading.__showtree__()"""
diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py
index 4aa176f..4db1c13 100644
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -57,13 +57,13 @@ class TestHTMLEntity(TreeEqualityTestCase):
         node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
         node2 = HTMLEntity("107", named=False, hexadecimal=False)
         node3 = HTMLEntity("e9", named=False, hexadecimal=True)
-        for a in (True, False):
-            self.assertEqual("\xa0", node1.__strip__(True, a))
-            self.assertEqual("&nbsp;", node1.__strip__(False, a))
-            self.assertEqual("k", node2.__strip__(True, a))
-            self.assertEqual("&#107;", node2.__strip__(False, a))
-            self.assertEqual("é", node3.__strip__(True, a))
-            self.assertEqual("&#xe9;", node3.__strip__(False, a))
+
+        self.assertEqual("\xa0", node1.__strip__(normalize=True))
+        self.assertEqual("&nbsp;", node1.__strip__(normalize=False))
+        self.assertEqual("k", node2.__strip__(normalize=True))
+        self.assertEqual("&#107;", node2.__strip__(normalize=False))
+        self.assertEqual("é", node3.__strip__(normalize=True))
+        self.assertEqual("&#xe9;", node3.__strip__(normalize=False))
 
     def test_showtree(self):
         """test HTMLEntity.__showtree__()"""
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 0ac75a9..2e6d8a3 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -103,11 +103,10 @@ class TestTag(TreeEqualityTestCase):
         node1 = Tag(wraptext("i"), wraptext("foobar"))
         node2 = Tag(wraptext("math"), wraptext("foobar"))
         node3 = Tag(wraptext("br"), self_closing=True)
-        for a in (True, False):
-            for b in (True, False):
-                self.assertEqual("foobar", node1.__strip__(a, b))
-                self.assertEqual(None, node2.__strip__(a, b))
-                self.assertEqual(None, node3.__strip__(a, b))
+
+        self.assertEqual("foobar", node1.__strip__())
+        self.assertEqual(None, node2.__strip__())
+        self.assertEqual(None, node3.__strip__())
 
     def test_showtree(self):
         """test Tag.__showtree__()"""
diff --git a/tests/test_template.py b/tests/test_template.py
index a97d6de..76a45cf 100644
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -67,12 +67,19 @@ class TestTemplate(TreeEqualityTestCase):
     def test_strip(self):
         """test Template.__strip__()"""
         node1 = Template(wraptext("foobar"))
-        node2 = Template(wraptext("foo"),
-                         [pgenh("1", "bar"), pgens("abc", "def")])
-        for a in (True, False):
-            for b in (True, False):
-                self.assertEqual(None, node1.__strip__(a, b))
-                self.assertEqual(None, node2.__strip__(a, b))
+        node2 = Template(wraptext("foo"), [
+            pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")])
+        node3 = Template(wraptext("foo"), [
+            pgenh("1", "foo"),
+            Parameter(wraptext("2"), wrap([Template(wraptext("hello"))]),
+                      showkey=False),
+            pgenh("3", "bar")])
+
+        self.assertEqual(None, node1.__strip__(keep_template_params=False))
+        self.assertEqual(None, node2.__strip__(keep_template_params=False))
+        self.assertEqual("", node1.__strip__(keep_template_params=True))
+        self.assertEqual("bar def", node2.__strip__(keep_template_params=True))
+        self.assertEqual("foo bar", node3.__strip__(keep_template_params=True))
 
     def test_showtree(self):
         """test Template.__showtree__()"""
diff --git a/tests/test_text.py b/tests/test_text.py
index d890323..aaf8db2 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -49,9 +49,7 @@ class TestText(unittest.TestCase):
     def test_strip(self):
         """test Text.__strip__()"""
         node = Text("foobar")
-        for a in (True, False):
-            for b in (True, False):
-                self.assertIs(node, node.__strip__(a, b))
+        self.assertIs(node, node.__strip__())
 
     def test_showtree(self):
         """test Text.__showtree__()"""
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index d0c11fd..5457920 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -433,7 +433,7 @@ class TestWikicode(TreeEqualityTestCase):
         """test Wikicode.strip_code()"""
         # Since individual nodes have test cases for their __strip__ methods,
         # we're only going to do an integration test:
-        code = parse("Foo [[bar]]\n\n{{baz}}\n\n[[a|b]] &Sigma;")
+        code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] &Sigma;")
         self.assertEqual("Foo bar\n\nb Σ",
                          code.strip_code(normalize=True, collapse=True))
         self.assertEqual("Foo bar\n\n\n\nb Σ",
@@ -442,6 +442,9 @@ class TestWikicode(TreeEqualityTestCase):
                          code.strip_code(normalize=False, collapse=True))
         self.assertEqual("Foo bar\n\n\n\nb &Sigma;",
                          code.strip_code(normalize=False, collapse=False))
+        self.assertEqual("Foo bar\n\nhello\n\nb Σ",
+                         code.strip_code(normalize=True, collapse=True,
+                                         keep_template_params=True))
 
     def test_get_tree(self):
         """test Wikicode.get_tree()"""
diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py
index 965d8d5..80116ca 100644
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -58,10 +58,8 @@ class TestWikilink(TreeEqualityTestCase):
         """test Wikilink.__strip__()"""
         node = Wikilink(wraptext("foobar"))
         node2 = Wikilink(wraptext("foo"), wraptext("bar"))
-        for a in (True, False):
-            for b in (True, False):
-                self.assertEqual("foobar", node.__strip__(a, b))
-                self.assertEqual("bar", node2.__strip__(a, b))
+        self.assertEqual("foobar", node.__strip__())
+        self.assertEqual("bar", node2.__strip__())
 
     def test_showtree(self):
         """test Wikilink.__showtree__()"""

From d7c755f5263cbd5d57ff0631b95b8dfded94daf5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Sat, 3 Jun 2017 19:17:21 -0400
Subject: [PATCH 07/24] Add Wikicode.contains(), Wikicode.get_ancestors(),
 Wikicode.get_parent() (#177)

---
 CHANGELOG                    |  4 +++
 docs/changelog.rst           |  5 ++++
 mwparserfromhell/wikicode.py | 61 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_wikicode.py       | 27 ++++++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 3832524..7d34015 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,9 @@
 v0.5 (unreleased):
 
+- Added Wikicode.contains() to determine whether a Node or Wikicode object is
+  contained within another Wikicode object.
+- Added Wikicode.get_ancestors() and Wikicode.get_parent() to find all
+  ancestors and the direct parent of a Node, respectively.
 - Made Template.remove(keep_field=True) behave more reasonably when the
   parameter is already empty.
 - Added the keep_template_params argument to Wikicode.strip_code(). If True,
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 2c6be16..4d0d6fd 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,6 +7,11 @@ v0.5
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...develop>`__):
 
+- Added :meth:`.Wikicode.contains` to determine whether a :class:`.Node` or
+  :class:`.Wikicode` object is contained within another :class:`.Wikicode`
+  object.
+- Added :meth:`.Wikicode.get_ancestors` and :meth:`.Wikicode.get_parent` to
+  find all ancestors and the direct parent of a :class:`.Node`, respectively.
 - Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more
   reasonably when the parameter is already empty.
 - Added the *keep_template_params* argument to :meth:`.Wikicode.strip_code`.
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 73aea41..4379b0a 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -275,6 +275,21 @@ class Wikicode(StringMixIn):
         else:
             self.nodes.pop(index)
 
+    def contains(self, obj):
+        """Return whether this Wikicode object contains *obj*.
+
+        If *obj* is a :class:`.Node` or :class:`.Wikicode` object, then we
+        search for it exactly among all of our children, recursively.
+        Otherwise, this method just uses :meth:`.__contains__` on the string.
+        """
+        if not isinstance(obj, (Node, Wikicode)):
+            return obj in self
+        try:
+            self._do_strong_search(obj, recursive=True)
+        except ValueError:
+            return False
+        return True
+
     def index(self, obj, recursive=False):
         """Return the index of *obj* in the list of nodes.
 
@@ -294,6 +309,52 @@ class Wikicode(StringMixIn):
                 return i
         raise ValueError(obj)
 
+    def get_ancestors(self, obj):
+        """Return a list of all ancestor nodes of the :class:`.Node` *obj*.
+
+        The list is ordered from the most shallow ancestor (greatest great-
+        grandparent) to the direct parent. The node itself is not included in
+        the list. For example::
+
+            >>> text = "{{a|{{b|{{c|{{d}}}}}}}}"
+            >>> code = mwparserfromhell.parse(text)
+            >>> node = code.filter_templates(matches=lambda n: n == "{{d}}")[0]
+            >>> code.get_ancestors(node)
+            ['{{a|{{b|{{c|{{d}}}}}}}}', '{{b|{{c|{{d}}}}}}', '{{c|{{d}}}}']
+
+        Will return an empty list if *obj* is at the top level of this Wikicode
+        object. Will raise :exc:`ValueError` if it wasn't found.
+        """
+        def _get_ancestors(code, needle):
+            for node in code.nodes:
+                if node is needle:
+                    return []
+                for code in node.__children__():
+                    ancestors = _get_ancestors(code, needle)
+                    if ancestors is not None:
+                        return [node] + ancestors
+
+        if isinstance(obj, Wikicode):
+            obj = obj.get(0)
+        elif not isinstance(obj, Node):
+            raise ValueError(obj)
+
+        ancestors = _get_ancestors(self, obj)
+        if ancestors is None:
+            raise ValueError(obj)
+        return ancestors
+
+    def get_parent(self, obj):
+        """Return the direct parent node of the :class:`.Node` *obj*.
+
+        This function is equivalent to calling :meth:`.get_ancestors` and
+        taking the last element of the resulting list. Will return None if
+        the node exists but does not have a parent; i.e., it is at the top
+        level of the Wikicode object.
+        """
+        ancestors = self.get_ancestors(obj)
+        return ancestors[-1] if ancestors else None
+
     def insert(self, index, value):
         """Insert *value* at *index* in the list of nodes.
 
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 5457920..c77fdd2 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -85,6 +85,17 @@ class TestWikicode(TreeEqualityTestCase):
         self.assertRaises(IndexError, code.set, 3, "{{baz}}")
         self.assertRaises(IndexError, code.set, -4, "{{baz}}")
 
+    def test_contains(self):
+        """test Wikicode.contains()"""
+        code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]")
+        tmpl1, tmpl2, tmpl3 = code.filter_templates()
+        tmpl4 = parse("{{ccc}}").filter_templates()[0]
+        self.assertTrue(code.contains(tmpl1))
+        self.assertTrue(code.contains(tmpl3))
+        self.assertFalse(code.contains(tmpl4))
+        self.assertTrue(code.contains(str(tmpl4)))
+        self.assertTrue(code.contains(tmpl2.params[0].value))
+
     def test_index(self):
         """test Wikicode.index()"""
         code = parse("Have a {{template}} and a [[page|link]]")
@@ -102,6 +113,22 @@ class TestWikicode(TreeEqualityTestCase):
         self.assertRaises(ValueError, code.index,
                           code.get(1).get(1).value, recursive=False)
 
+    def test_get_ancestors_parent(self):
+        """test Wikicode.get_ancestors() and Wikicode.get_parent()"""
+        code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}")
+        tmpl = code.filter_templates(matches=lambda n: n.name == "f")[0]
+        parent1 = code.filter_templates(matches=lambda n: n.name == "d")[0]
+        parent2 = code.filter_templates(matches=lambda n: n.name == "b")[0]
+        parent3 = code.filter_templates(matches=lambda n: n.name == "a")[0]
+        fake = parse("{{f}}").get(0)
+
+        self.assertEqual([parent3, parent2, parent1], code.get_ancestors(tmpl))
+        self.assertIs(parent1, code.get_parent(tmpl))
+        self.assertEqual([], code.get_ancestors(parent3))
+        self.assertIs(None, code.get_parent(parent3))
+        self.assertRaises(ValueError, code.get_ancestors, fake)
+        self.assertRaises(ValueError, code.get_parent, fake)
+
     def test_insert(self):
         """test Wikicode.insert()"""
         code = parse("Have a {{template}} and a [[page|link]]")

From a25304dc444a769c1159ca736aa2bc5a1e68c06a Mon Sep 17 00:00:00 2001
From: Larivact <Larivact@users.noreply.github.com>
Date: Sun, 4 Jun 2017 11:45:15 +0200
Subject: [PATCH 08/24] partially rewrite Caveats, external link caveat

"inherent limitation in wikicode" sounds misleading it's about generating an AST instead of HTML.
---
 README.rst | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/README.rst b/README.rst
index b7d324c..86143c6 100644
--- a/README.rst
+++ b/README.rst
@@ -115,21 +115,24 @@ Likewise, use ``unicode(code)`` in Python 2.
 
 Caveats
 -------
+mwparserfromhell generates an abstract syntax tree instead of HTML.
+This has several implications:
 
-An inherent limitation in wikicode prevents us from generating complete parse
-trees in certain cases. For example, the string ``{{echo|''Hello}}, world!''``
-produces the valid output ``<i>Hello, world!</i>`` in MediaWiki, assuming
-``{{echo}}`` is a template that returns its first parameter. But since
-representing this in mwparserfromhell's node tree would be impossible, we
-compromise by treating the first node (i.e., the template) as plain text,
-parsing only the italics.
+* Crossed constructs like ``{{echo|''Hello}}, world!''`` are not supported,
+  since they cannot be represented in the node tree. We compromise by treating
+  the first node (i.e. the template) as plain text, parsing only the italics.
 
-The current workaround for cases where you are not interested in text
-formatting is to pass ``skip_style_tags=True`` to ``mwparserfromhell.parse()``.
-This treats ``''`` and ``'''`` like plain text.
+  The current workaround for cases where you are not interested in text
+  formatting is to pass ``skip_style_tags=True`` to ``mwparserfromhell.parse()``.
+  This treats ``''`` and ``'''`` like plain text.
 
-A future version of mwparserfromhell will include multiple parsing modes to get
-around this restriction.
+  A future version of mwparserfromhell will include multiple parsing modes to get
+  around this restriction.
+
+* Templates adjacent to external links e.g. ``http://example.com{{foo}}`` are
+  considered part of the link, since mwparserfromhell does not know the
+  definition of templates and even if it did the template could only be
+  partially part of the link which also couldn't be represented in the AST.
 
 Integration
 -----------

From 2d89f611be365e181d2fa3df2bfbab6fde2ab07c Mon Sep 17 00:00:00 2001
From: Larivact <Larivact@users.noreply.github.com>
Date: Sun, 4 Jun 2017 22:37:05 +0200
Subject: [PATCH 09/24] rewrite Caveats

>not supported, since they cannot be represented in the node tree.
It's not that they cannot be represented, it's that they would have to be evaluated.
---
 README.rst | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/README.rst b/README.rst
index 86143c6..5ac605a 100644
--- a/README.rst
+++ b/README.rst
@@ -115,12 +115,18 @@ Likewise, use ``unicode(code)`` in Python 2.
 
 Caveats
 -------
-mwparserfromhell generates an abstract syntax tree instead of HTML.
+While the MediaWiki parser generates HTML, mwparserfromhell acts as an interface to
+the source code. mwparserfromhell therefore is unaware of template definitions since
+if it would substitute templates with their output you could no longer change the templates.
 This has several implications:
 
-* Crossed constructs like ``{{echo|''Hello}}, world!''`` are not supported,
-  since they cannot be represented in the node tree. We compromise by treating
-  the first node (i.e. the template) as plain text, parsing only the italics.
+* Start and end tags generated by templates aren't recognized e.g. ``<b>foobar{{bold-end}}``.
+
+* Templates adjacent to external links e.g. ``http://example.com{{foo}}`` are
+  considered part of the link.
+
+* Crossed constructs like ``{{echo|''Hello}}, world!''`` are not supported.
+  We compromise by treating the first node as plain text.
 
   The current workaround for cases where you are not interested in text
   formatting is to pass ``skip_style_tags=True`` to ``mwparserfromhell.parse()``.
@@ -129,11 +135,6 @@ This has several implications:
   A future version of mwparserfromhell will include multiple parsing modes to get
   around this restriction.
 
-* Templates adjacent to external links e.g. ``http://example.com{{foo}}`` are
-  considered part of the link, since mwparserfromhell does not know the
-  definition of templates and even if it did the template could only be
-  partially part of the link which also couldn't be represented in the AST.
-
 Integration
 -----------
 

From 4d4a25152e7f504f27e8deaa9dc60cbec1981ac1 Mon Sep 17 00:00:00 2001
From: Larivact <Larivact@users.noreply.github.com>
Date: Mon, 5 Jun 2017 07:38:06 +0200
Subject: [PATCH 10/24] Caveats -> Limitations, add Config unawareness

---
 README.rst | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/README.rst b/README.rst
index 5ac605a..00fbd0b 100644
--- a/README.rst
+++ b/README.rst
@@ -113,20 +113,20 @@ saving the page!) by calling ``str()`` on it::
 
 Likewise, use ``unicode(code)`` in Python 2.
 
-Caveats
--------
+Limitations
+-----------
 While the MediaWiki parser generates HTML, mwparserfromhell acts as an interface to
 the source code. mwparserfromhell therefore is unaware of template definitions since
-if it would substitute templates with their output you could no longer change the templates.
-This has several implications:
+if it would substitute templates with their output you would no longer be working
+with the source code. This has several implications:
 
 * Start and end tags generated by templates aren't recognized e.g. ``<b>foobar{{bold-end}}``.
 
 * Templates adjacent to external links e.g. ``http://example.com{{foo}}`` are
   considered part of the link.
 
-* Crossed constructs like ``{{echo|''Hello}}, world!''`` are not supported.
-  We compromise by treating the first node as plain text.
+* Crossed constructs like ``{{echo|''Hello}}, world!''`` are not supported,
+  the first node is treated as plain text.
 
   The current workaround for cases where you are not interested in text
   formatting is to pass ``skip_style_tags=True`` to ``mwparserfromhell.parse()``.
@@ -135,6 +135,17 @@ This has several implications:
   A future version of mwparserfromhell will include multiple parsing modes to get
   around this restriction.
 
+Configuration unawareness
+-------------------------
+
+* `word-ending links`_ are not supported since the linktrail rules are language-specific. 
+
+* Localized namespace names aren't recognized, e.g. ``[[File:...]]``
+  links are treated as regular wikilinks.
+
+* Anything that looks like an XML tag is parsed as a tag since,
+  the available tags are extension-dependent.
+
 Integration
 -----------
 
@@ -178,6 +189,7 @@ Python 3 code (via the API_)::
 .. _GitHub:                 https://github.com/earwig/mwparserfromhell
 .. _Python Package Index:   http://pypi.python.org
 .. _get pip:                http://pypi.python.org/pypi/pip
+.. _word-ending links:      https://www.mediawiki.org/wiki/Help:Links#linktrail
 .. _EarwigBot:              https://github.com/earwig/earwigbot
 .. _Pywikibot:              https://www.mediawiki.org/wiki/Manual:Pywikibot
 .. _API:                    http://mediawiki.org/wiki/API

From 2e486f7544c607d0d4d966114f28c6ad651cca52 Mon Sep 17 00:00:00 2001
From: Larivact <Larivact@users.noreply.github.com>
Date: Mon, 5 Jun 2017 11:44:27 +0200
Subject: [PATCH 11/24] fix comma

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 00fbd0b..6fd3be5 100644
--- a/README.rst
+++ b/README.rst
@@ -143,8 +143,8 @@ Configuration unawareness
 * Localized namespace names aren't recognized, e.g. ``[[File:...]]``
   links are treated as regular wikilinks.
 
-* Anything that looks like an XML tag is parsed as a tag since,
-  the available tags are extension-dependent.
+* Anything that looks like an XML tag is parsed as a tag
+  since the available tags are extension-dependent.
 
 Integration
 -----------

From 784e5e7b8d72738faf2cd0d1ad212f436199dbd1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 12 Jun 2017 23:46:45 -0400
Subject: [PATCH 12/24] Revise/add to new limitations section.

---
 README.rst           | 53 +++++++++++++++++++++++++++++++---------------------
 docs/caveats.rst     | 17 -----------------
 docs/index.rst       |  2 +-
 docs/limitations.rst | 45 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 39 deletions(-)
 delete mode 100644 docs/caveats.rst
 create mode 100644 docs/limitations.rst

diff --git a/README.rst b/README.rst
index 6fd3be5..6316ed9 100644
--- a/README.rst
+++ b/README.rst
@@ -115,36 +115,47 @@ Likewise, use ``unicode(code)`` in Python 2.
 
 Limitations
 -----------
-While the MediaWiki parser generates HTML, mwparserfromhell acts as an interface to
-the source code. mwparserfromhell therefore is unaware of template definitions since
-if it would substitute templates with their output you would no longer be working
-with the source code. This has several implications:
 
-* Start and end tags generated by templates aren't recognized e.g. ``<b>foobar{{bold-end}}``.
+While the MediaWiki parser generates HTML and has access to the contents of
+templates, among other things, mwparserfromhell acts as a direct interface to
+the source code only. This has several implications:
 
-* Templates adjacent to external links e.g. ``http://example.com{{foo}}`` are
-  considered part of the link.
+* Syntax elements produced by a template transclusion cannot be detected. For
+  example, imagine a hypothetical page ``"Template:End-bold"`` that contained
+  the text ``</b>``. While MediaWiki would correctly understand that
+  ``<b>foobar{{end-bold}}`` translates to ``<b>foobar</b>``, mwparserfromhell
+  has no way of examining the contents of ``{{end-bold}}``. Instead, it would
+  treat the bold tag as unfinished, possibly extending further down the page.
 
-* Crossed constructs like ``{{echo|''Hello}}, world!''`` are not supported,
-  the first node is treated as plain text.
+* Templates adjacent to external links, as in ``http://example.com{{foo}}``,
+  are considered part of the link. In reality, this would depend on the
+  contents of the template.
 
-  The current workaround for cases where you are not interested in text
-  formatting is to pass ``skip_style_tags=True`` to ``mwparserfromhell.parse()``.
-  This treats ``''`` and ``'''`` like plain text.
+* When different syntax elements cross over each other, as in
+  ``{{echo|''Hello}}, world!''``, the parser gets confused because this cannot
+  be represented by an ordinary syntax tree. Instead, the parser will treat the
+  first syntax construct as plain text. In this case, only the italic tag would
+  be properly parsed.
 
-  A future version of mwparserfromhell will include multiple parsing modes to get
-  around this restriction.
+  **Workaround:** Since this commonly occurs with text formatting and text
+  formatting is often not of interest to users, you may pass
+  *skip_style_tags=True* to ``mwparserfromhell.parse()``. This treats ``''``
+  and ``'''`` as plain text.
 
-Configuration unawareness
--------------------------
+  A future version of mwparserfromhell may include multiple parsing modes to
+  get around this restriction more sensibly.
 
-* `word-ending links`_ are not supported since the linktrail rules are language-specific. 
+Additionally, the parser lacks awareness of certain wiki-specific settings:
 
-* Localized namespace names aren't recognized, e.g. ``[[File:...]]``
-  links are treated as regular wikilinks.
+* `word-ending links`_ are not supported, since the linktrail rules are
+  language-specific.
 
-* Anything that looks like an XML tag is parsed as a tag
-  since the available tags are extension-dependent.
+* Localized namespace names aren't recognized, so file links (such as
+  ``[[File:...]]``) are treated as regular wikilinks.
+
+* Anything that looks like an XML tag is treated as a tag, even if it is not a
+  recognized tag name, since the list of valid tags depends on loaded MediaWiki
+  extensions.
 
 Integration
 -----------
diff --git a/docs/caveats.rst b/docs/caveats.rst
deleted file mode 100644
index 927aa54..0000000
--- a/docs/caveats.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-Caveats
-=======
-
-An inherent limitation in wikicode prevents us from generating complete parse
-trees in certain cases. For example, the string ``{{echo|''Hello}}, world!''``
-produces the valid output ``<i>Hello, world!</i>`` in MediaWiki, assuming
-``{{echo}}`` is a template that returns its first parameter. But since
-representing this in mwparserfromhell's node tree would be impossible, we
-compromise by treating the first node (i.e., the template) as plain text,
-parsing only the italics.
-
-The current workaround for cases where you are not interested in text
-formatting is to pass *skip_style_tags=True* to :func:`mwparserfromhell.parse`.
-This treats ``''`` and ``'''`` like plain text.
-
-A future version of mwparserfromhell will include multiple parsing modes to get
-around this restriction.
diff --git a/docs/index.rst b/docs/index.rst
index 6593881..06dc2f9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -40,7 +40,7 @@ Contents
    :maxdepth: 2
 
    usage
-   caveats
+   limitations
    integration
    changelog
    API Reference <api/modules>
diff --git a/docs/limitations.rst b/docs/limitations.rst
new file mode 100644
index 0000000..7d5f7e7
--- /dev/null
+++ b/docs/limitations.rst
@@ -0,0 +1,45 @@
+Limitations
+===========
+
+While the MediaWiki parser generates HTML and has access to the contents of
+templates, among other things, mwparserfromhell acts as a direct interface to
+the source code only. This has several implications:
+
+* Syntax elements produced by a template transclusion cannot be detected. For
+  example, imagine a hypothetical page ``"Template:End-bold"`` that contained
+  the text ``</b>``. While MediaWiki would correctly understand that
+  ``<b>foobar{{end-bold}}`` translates to ``<b>foobar</b>``, mwparserfromhell
+  has no way of examining the contents of ``{{end-bold}}``. Instead, it would
+  treat the bold tag as unfinished, possibly extending further down the page.
+
+* Templates adjacent to external links, as in ``http://example.com{{foo}}``,
+  are considered part of the link. In reality, this would depend on the
+  contents of the template.
+
+* When different syntax elements cross over each other, as in
+  ``{{echo|''Hello}}, world!''``, the parser gets confused because this cannot
+  be represented by an ordinary syntax tree. Instead, the parser will treat the
+  first syntax construct as plain text. In this case, only the italic tag would
+  be properly parsed.
+
+  **Workaround:** Since this commonly occurs with text formatting and text
+  formatting is often not of interest to users, you may pass
+  *skip_style_tags=True* to ``mwparserfromhell.parse()``. This treats ``''``
+  and ``'''`` as plain text.
+
+  A future version of mwparserfromhell may include multiple parsing modes to
+  get around this restriction more sensibly.
+
+Additionally, the parser lacks awareness of certain wiki-specific settings:
+
+* `word-ending links`_ are not supported, since the linktrail rules are
+  language-specific.
+
+* Localized namespace names aren't recognized, so file links (such as
+  ``[[File:...]]``) are treated as regular wikilinks.
+
+* Anything that looks like an XML tag is treated as a tag, even if it is not a
+  recognized tag name, since the list of valid tags depends on loaded MediaWiki
+  extensions.
+
+.. _word-ending links:      https://www.mediawiki.org/wiki/Help:Links#linktrail

From f01bdc51eef11412a7b50b687aea8e655e7fffe5 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 12 Jun 2017 23:47:48 -0400
Subject: [PATCH 13/24] Capitalization [ci skip]

---
 README.rst           | 4 ++--
 docs/limitations.rst | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 6316ed9..ab1bef9 100644
--- a/README.rst
+++ b/README.rst
@@ -147,7 +147,7 @@ the source code only. This has several implications:
 
 Additionally, the parser lacks awareness of certain wiki-specific settings:
 
-* `word-ending links`_ are not supported, since the linktrail rules are
+* `Word-ending links`_ are not supported, since the linktrail rules are
   language-specific.
 
 * Localized namespace names aren't recognized, so file links (such as
@@ -200,7 +200,7 @@ Python 3 code (via the API_)::
 .. _GitHub:                 https://github.com/earwig/mwparserfromhell
 .. _Python Package Index:   http://pypi.python.org
 .. _get pip:                http://pypi.python.org/pypi/pip
-.. _word-ending links:      https://www.mediawiki.org/wiki/Help:Links#linktrail
+.. _Word-ending links:      https://www.mediawiki.org/wiki/Help:Links#linktrail
 .. _EarwigBot:              https://github.com/earwig/earwigbot
 .. _Pywikibot:              https://www.mediawiki.org/wiki/Manual:Pywikibot
 .. _API:                    http://mediawiki.org/wiki/API
diff --git a/docs/limitations.rst b/docs/limitations.rst
index 7d5f7e7..294f4c5 100644
--- a/docs/limitations.rst
+++ b/docs/limitations.rst
@@ -32,7 +32,7 @@ the source code only. This has several implications:
 
 Additionally, the parser lacks awareness of certain wiki-specific settings:
 
-* `word-ending links`_ are not supported, since the linktrail rules are
+* `Word-ending links`_ are not supported, since the linktrail rules are
   language-specific.
 
 * Localized namespace names aren't recognized, so file links (such as
@@ -42,4 +42,4 @@ Additionally, the parser lacks awareness of certain wiki-specific settings:
   recognized tag name, since the list of valid tags depends on loaded MediaWiki
   extensions.
 
-.. _word-ending links:      https://www.mediawiki.org/wiki/Help:Links#linktrail
+.. _Word-ending links:      https://www.mediawiki.org/wiki/Help:Links#linktrail

From 08e5f7e1a5a3f67d1be0a339f4d3596f57f71f9b Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Mon, 12 Jun 2017 23:51:11 -0400
Subject: [PATCH 14/24] Forgot version bump, sigh.

---
 appveyor.yml                 | 2 +-
 docs/conf.py                 | 2 +-
 mwparserfromhell/__init__.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index daec144..d60b14b 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,6 @@
 # This config file is used by appveyor.com to build Windows release binaries
 
-version: 0.4.4-b{build}
+version: 0.5.dev0-b{build}
 
 branches:
   only:
diff --git a/docs/conf.py b/docs/conf.py
index 8d48dff..3739429 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -42,7 +42,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'mwparserfromhell'
-copyright = u'2012, 2013, 2014, 2015, 2016 Ben Kurtovic'
+copyright = u'2012, 2013, 2014, 2015, 2016, 2017 Ben Kurtovic'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 1d3c7d7..64f3681 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014, 2015, 2016 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.4.4"
+__version__ = "0.5.dev0"
 __email__ = "ben.kurtovic@gmail.com"
 
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,

From 8a9c9224be6cb2020ed4ad67a401081096dd21d1 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 01:08:19 -0400
Subject: [PATCH 15/24] Speed up parsing deeply nested syntax by caching bad
 routes (fixes #42)

Also removed the max cycles stop-gap, allowing much more complex pages
to be parsed quickly without losing nodes at the end

Also fixes #65, fixes #102, fixes #165, fixes #183
Also fixes #81 (Rafael Nadal parsing bug)
Also fixes #53, fixes #58, fixes #88, fixes #152 (duplicate issues)
---
 CHANGELOG                                        |   4 +
 LICENSE                                          |   2 +-
 docs/changelog.rst                               |   9 +-
 mwparserfromhell/parser/contexts.py              |   6 +-
 mwparserfromhell/parser/ctokenizer/avl_tree.c    | 789 +++++++++++++++++++++++
 mwparserfromhell/parser/ctokenizer/avl_tree.h    | 358 ++++++++++
 mwparserfromhell/parser/ctokenizer/common.h      |  19 +-
 mwparserfromhell/parser/ctokenizer/contexts.h    |   4 +-
 mwparserfromhell/parser/ctokenizer/tok_parse.c   |  53 +-
 mwparserfromhell/parser/ctokenizer/tok_support.c |  58 +-
 mwparserfromhell/parser/ctokenizer/tok_support.h |  10 +-
 mwparserfromhell/parser/ctokenizer/tokenizer.c   |  14 +-
 mwparserfromhell/parser/tokenizer.py             |  55 +-
 tests/tokenizer/integration.mwtest               |   7 +
 tests/tokenizer/templates.mwtest                 |   2 +-
 15 files changed, 1337 insertions(+), 53 deletions(-)
 create mode 100644 mwparserfromhell/parser/ctokenizer/avl_tree.c
 create mode 100644 mwparserfromhell/parser/ctokenizer/avl_tree.h

diff --git a/CHANGELOG b/CHANGELOG
index 7d34015..bebacbf 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,10 @@ v0.5 (unreleased):
   contained within another Wikicode object.
 - Added Wikicode.get_ancestors() and Wikicode.get_parent() to find all
   ancestors and the direct parent of a Node, respectively.
+- Fixed a long-standing performance issue with deeply nested, invalid syntax
+  (issue #42). The parser should be much faster on certain complex pages. The
+  "max cycle" restriction has also been removed, so some situations where
+  templates at the end of a page were being skipped are now resolved.
 - Made Template.remove(keep_field=True) behave more reasonably when the
   parameter is already empty.
 - Added the keep_template_params argument to Wikicode.strip_code(). If True,
diff --git a/LICENSE b/LICENSE
index 230bc5c..588e737 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 4d0d6fd..c558579 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -12,6 +12,11 @@ Unreleased
   object.
 - Added :meth:`.Wikicode.get_ancestors` and :meth:`.Wikicode.get_parent` to
   find all ancestors and the direct parent of a :class:`.Node`, respectively.
+- Fixed a long-standing performance issue with deeply nested, invalid syntax
+  (`issue #42 <https://github.com/earwig/mwparserfromhell/issues/42>`_). The
+  parser should be much faster on certain complex pages. The "max cycle"
+  restriction has also been removed, so some situations where templates at the
+  end of a page were being skipped are now resolved.
 - Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more
   reasonably when the parameter is already empty.
 - Added the *keep_template_params* argument to :meth:`.Wikicode.strip_code`.
@@ -54,7 +59,7 @@ v0.4.3
 v0.4.2
 ------
 
-`Released July 30, 2015 <https://github.com/earwig/mwparserfromhell/tree/v0.4.2>`_
+`Released July 30, 2015 <https://github.com/earwig/mwparserfromhell/tree/v0.4.2>`__
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.1...v0.4.2>`__):
 
 - Fixed setup script not including header files in releases.
@@ -63,7 +68,7 @@ v0.4.2
 v0.4.1
 ------
 
-`Released July 30, 2015 <https://github.com/earwig/mwparserfromhell/tree/v0.4.1>`_
+`Released July 30, 2015 <https://github.com/earwig/mwparserfromhell/tree/v0.4.1>`__
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4...v0.4.1>`__):
 
 - The process for building Windows binaries has been fixed, and these should be
diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py
index 405a027..af6dea6 100644
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -100,6 +100,8 @@ Local (stack-specific) contexts:
     * :const:`TABLE_TH_LINE`
     * :const:`TABLE_CELL_LINE_CONTEXTS`
 
+* :const:`HTML_ENTITY`
+
 Global contexts:
 
 * :const:`GL_HEADING`
@@ -176,6 +178,8 @@ TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
 TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN +
          TABLE_TD_LINE + TABLE_TH_LINE)
 
+HTML_ENTITY = 1 << 37
+
 # Global contexts:
 
 GL_HEADING = 1 << 0
diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.c b/mwparserfromhell/parser/ctokenizer/avl_tree.c
new file mode 100644
index 0000000..4fdff6f
--- /dev/null
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.c
@@ -0,0 +1,789 @@
+/*
+ * avl_tree.c - intrusive, nonrecursive AVL tree data structure (self-balancing
+ *		binary search tree), implementation file
+ *
+ * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#include "avl_tree.h"
+
+/* Returns the left child (sign < 0) or the right child (sign > 0) of the
+ * specified AVL tree node.
+ * Note: for all calls of this, 'sign' is constant at compilation time,
+ * so the compiler can remove the conditional.  */
+static AVL_INLINE struct avl_tree_node *
+avl_get_child(const struct avl_tree_node *parent, int sign)
+{
+	if (sign < 0)
+		return parent->left;
+	else
+		return parent->right;
+}
+
+static AVL_INLINE struct avl_tree_node *
+avl_tree_first_or_last_in_order(const struct avl_tree_node *root, int sign)
+{
+	const struct avl_tree_node *first = root;
+
+	if (first)
+		while (avl_get_child(first, +sign))
+			first = avl_get_child(first, +sign);
+	return (struct avl_tree_node *)first;
+}
+
+/* Starts an in-order traversal of the tree: returns the least-valued node, or
+ * NULL if the tree is empty.  */
+struct avl_tree_node *
+avl_tree_first_in_order(const struct avl_tree_node *root)
+{
+	return avl_tree_first_or_last_in_order(root, -1);
+}
+
+/* Starts a *reverse* in-order traversal of the tree: returns the
+ * greatest-valued node, or NULL if the tree is empty.  */
+struct avl_tree_node *
+avl_tree_last_in_order(const struct avl_tree_node *root)
+{
+	return avl_tree_first_or_last_in_order(root, 1);
+}
+
+static AVL_INLINE struct avl_tree_node *
+avl_tree_next_or_prev_in_order(const struct avl_tree_node *node, int sign)
+{
+	const struct avl_tree_node *next;
+
+	if (avl_get_child(node, +sign))
+		for (next = avl_get_child(node, +sign);
+		     avl_get_child(next, -sign);
+		     next = avl_get_child(next, -sign))
+			;
+	else
+		for (next = avl_get_parent(node);
+		     next && node == avl_get_child(next, +sign);
+		     node = next, next = avl_get_parent(next))
+			;
+	return (struct avl_tree_node *)next;
+}
+
+/* Continues an in-order traversal of the tree: returns the next-greatest-valued
+ * node, or NULL if there is none.  */
+struct avl_tree_node *
+avl_tree_next_in_order(const struct avl_tree_node *node)
+{
+	return avl_tree_next_or_prev_in_order(node, 1);
+}
+
+/* Continues a *reverse* in-order traversal of the tree: returns the
+ * previous-greatest-valued node, or NULL if there is none.  */
+struct avl_tree_node *
+avl_tree_prev_in_order(const struct avl_tree_node *node)
+{
+	return avl_tree_next_or_prev_in_order(node, -1);
+}
+
+/* Starts a postorder traversal of the tree.  */
+struct avl_tree_node *
+avl_tree_first_in_postorder(const struct avl_tree_node *root)
+{
+	const struct avl_tree_node *first = root;
+
+	if (first)
+		while (first->left || first->right)
+			first = first->left ? first->left : first->right;
+
+	return (struct avl_tree_node *)first;
+}
+
+/* Continues a postorder traversal of the tree.  @prev will not be deferenced as
+ * it's allowed that its memory has been freed; @prev_parent must be its saved
+ * parent node.  Returns NULL if there are no more nodes (i.e. @prev was the
+ * root of the tree).  */
+struct avl_tree_node *
+avl_tree_next_in_postorder(const struct avl_tree_node *prev,
+			   const struct avl_tree_node *prev_parent)
+{
+	const struct avl_tree_node *next = prev_parent;
+
+	if (next && prev == next->left && next->right)
+		for (next = next->right;
+		     next->left || next->right;
+		     next = next->left ? next->left : next->right)
+			;
+	return (struct avl_tree_node *)next;
+}
+
+/* Sets the left child (sign < 0) or the right child (sign > 0) of the
+ * specified AVL tree node.
+ * Note: for all calls of this, 'sign' is constant at compilation time,
+ * so the compiler can remove the conditional.  */
+static AVL_INLINE void
+avl_set_child(struct avl_tree_node *parent, int sign,
+	      struct avl_tree_node *child)
+{
+	if (sign < 0)
+		parent->left = child;
+	else
+		parent->right = child;
+}
+
+/* Sets the parent and balance factor of the specified AVL tree node.  */
+static AVL_INLINE void
+avl_set_parent_balance(struct avl_tree_node *node, struct avl_tree_node *parent,
+		       int balance_factor)
+{
+	node->parent_balance = (uintptr_t)parent | (balance_factor + 1);
+}
+
+/* Sets the parent of the specified AVL tree node.  */
+static AVL_INLINE void
+avl_set_parent(struct avl_tree_node *node, struct avl_tree_node *parent)
+{
+	node->parent_balance = (uintptr_t)parent | (node->parent_balance & 3);
+}
+
+/* Returns the balance factor of the specified AVL tree node --- that is, the
+ * height of its right subtree minus the height of its left subtree.  */
+static AVL_INLINE int
+avl_get_balance_factor(const struct avl_tree_node *node)
+{
+	return (int)(node->parent_balance & 3) - 1;
+}
+
+/* Adds @amount to the balance factor of the specified AVL tree node.
+ * The caller must ensure this still results in a valid balance factor
+ * (-1, 0, or 1).  */
+static AVL_INLINE void
+avl_adjust_balance_factor(struct avl_tree_node *node, int amount)
+{
+	node->parent_balance += amount;
+}
+
+static AVL_INLINE void
+avl_replace_child(struct avl_tree_node **root_ptr,
+		  struct avl_tree_node *parent,
+		  struct avl_tree_node *old_child,
+		  struct avl_tree_node *new_child)
+{
+	if (parent) {
+		if (old_child == parent->left)
+			parent->left = new_child;
+		else
+			parent->right = new_child;
+	} else {
+		*root_ptr = new_child;
+	}
+}
+
+/*
+ * Template for performing a single rotation ---
+ *
+ * sign > 0:  Rotate clockwise (right) rooted at A:
+ *
+ *           P?            P?
+ *           |             |
+ *           A             B
+ *          / \           / \
+ *         B   C?  =>    D?  A
+ *        / \               / \
+ *       D?  E?            E?  C?
+ *
+ * (nodes marked with ? may not exist)
+ *
+ * sign < 0:  Rotate counterclockwise (left) rooted at A:
+ *
+ *           P?            P?
+ *           |             |
+ *           A             B
+ *          / \           / \
+ *         C?  B   =>    A   D?
+ *            / \       / \
+ *           E?  D?    C?  E?
+ *
+ * This updates pointers but not balance factors!
+ */
+static AVL_INLINE void
+avl_rotate(struct avl_tree_node ** const root_ptr,
+	   struct avl_tree_node * const A, const int sign)
+{
+	struct avl_tree_node * const B = avl_get_child(A, -sign);
+	struct avl_tree_node * const E = avl_get_child(B, +sign);
+	struct avl_tree_node * const P = avl_get_parent(A);
+
+	avl_set_child(A, -sign, E);
+	avl_set_parent(A, B);
+
+	avl_set_child(B, +sign, A);
+	avl_set_parent(B, P);
+
+	if (E)
+		avl_set_parent(E, A);
+
+	avl_replace_child(root_ptr, P, A, B);
+}
+
+/*
+ * Template for performing a double rotation ---
+ *
+ * sign > 0:  Rotate counterclockwise (left) rooted at B, then
+ *		     clockwise (right) rooted at A:
+ *
+ *           P?            P?          P?
+ *           |             |           |
+ *           A             A           E
+ *          / \           / \        /   \
+ *         B   C?  =>    E   C? =>  B     A
+ *        / \           / \        / \   / \
+ *       D?  E         B   G?     D?  F?G?  C?
+ *          / \       / \
+ *         F?  G?    D?  F?
+ *
+ * (nodes marked with ? may not exist)
+ *
+ * sign < 0:  Rotate clockwise (right) rooted at B, then
+ *		     counterclockwise (left) rooted at A:
+ *
+ *         P?          P?              P?
+ *         |           |               |
+ *         A           A               E
+ *        / \         / \            /   \
+ *       C?  B   =>  C?  E    =>    A     B
+ *          / \         / \        / \   / \
+ *         E   D?      G?  B      C?  G?F?  D?
+ *        / \             / \
+ *       G?  F?          F?  D?
+ *
+ * Returns a pointer to E and updates balance factors.  Except for those
+ * two things, this function is equivalent to:
+ *	avl_rotate(root_ptr, B, -sign);
+ *	avl_rotate(root_ptr, A, +sign);
+ *
+ * See comment in avl_handle_subtree_growth() for explanation of balance
+ * factor updates.
+ */
+static AVL_INLINE struct avl_tree_node *
+avl_do_double_rotate(struct avl_tree_node ** const root_ptr,
+		     struct avl_tree_node * const B,
+		     struct avl_tree_node * const A, const int sign)
+{
+	struct avl_tree_node * const E = avl_get_child(B, +sign);
+	struct avl_tree_node * const F = avl_get_child(E, -sign);
+	struct avl_tree_node * const G = avl_get_child(E, +sign);
+	struct avl_tree_node * const P = avl_get_parent(A);
+	const int e = avl_get_balance_factor(E);
+
+	avl_set_child(A, -sign, G);
+	avl_set_parent_balance(A, E, ((sign * e >= 0) ? 0 : -e));
+
+	avl_set_child(B, +sign, F);
+	avl_set_parent_balance(B, E, ((sign * e <= 0) ? 0 : -e));
+
+	avl_set_child(E, +sign, A);
+	avl_set_child(E, -sign, B);
+	avl_set_parent_balance(E, P, 0);
+
+	if (G)
+		avl_set_parent(G, A);
+
+	if (F)
+		avl_set_parent(F, B);
+
+	avl_replace_child(root_ptr, P, A, E);
+
+	return E;
+}
+
+/*
+ * This function handles the growth of a subtree due to an insertion.
+ *
+ * @root_ptr
+ *	Location of the tree's root pointer.
+ *
+ * @node
+ *	A subtree that has increased in height by 1 due to an insertion.
+ *
+ * @parent
+ *	Parent of @node; must not be NULL.
+ *
+ * @sign
+ *	-1 if @node is the left child of @parent;
+ *	+1 if @node is the right child of @parent.
+ *
+ * This function will adjust @parent's balance factor, then do a (single
+ * or double) rotation if necessary.  The return value will be %true if
+ * the full AVL tree is now adequately balanced, or %false if the subtree
+ * rooted at @parent is now adequately balanced but has increased in
+ * height by 1, so the caller should continue up the tree.
+ *
+ * Note that if %false is returned, no rotation will have been done.
+ * Indeed, a single node insertion cannot require that more than one
+ * (single or double) rotation be done.
+ */
+static AVL_INLINE bool
+avl_handle_subtree_growth(struct avl_tree_node ** const root_ptr,
+			  struct avl_tree_node * const node,
+			  struct avl_tree_node * const parent,
+			  const int sign)
+{
+	int old_balance_factor, new_balance_factor;
+
+	old_balance_factor = avl_get_balance_factor(parent);
+
+	if (old_balance_factor == 0) {
+		avl_adjust_balance_factor(parent, sign);
+		/* @parent is still sufficiently balanced (-1 or +1
+		 * balance factor), but must have increased in height.
+		 * Continue up the tree.  */
+		return false;
+	}
+
+	new_balance_factor = old_balance_factor + sign;
+
+	if (new_balance_factor == 0) {
+		avl_adjust_balance_factor(parent, sign);
+		/* @parent is now perfectly balanced (0 balance factor).
+		 * It cannot have increased in height, so there is
+		 * nothing more to do.  */
+		return true;
+	}
+
+	/* @parent is too left-heavy (new_balance_factor == -2) or
+	 * too right-heavy (new_balance_factor == +2).  */
+
+	/* Test whether @node is left-heavy (-1 balance factor) or
+	 * right-heavy (+1 balance factor).
+	 * Note that it cannot be perfectly balanced (0 balance factor)
+	 * because here we are under the invariant that @node has
+	 * increased in height due to the insertion.  */
+	if (sign * avl_get_balance_factor(node) > 0) {
+
+		/* @node (B below) is heavy in the same direction @parent
+		 * (A below) is heavy.
+		 *
+		 * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+		 * The comment, diagram, and equations below assume sign < 0.
+		 * The other case is symmetric!
+		 * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+		 *
+		 * Do a clockwise rotation rooted at @parent (A below):
+		 *
+		 *           A              B
+		 *          / \           /   \
+		 *         B   C?  =>    D     A
+		 *        / \           / \   / \
+		 *       D   E?        F?  G?E?  C?
+		 *      / \
+		 *     F?  G?
+		 *
+		 * Before the rotation:
+		 *	balance(A) = -2
+		 *	balance(B) = -1
+		 * Let x = height(C).  Then:
+		 *	height(B) = x + 2
+		 *	height(D) = x + 1
+		 *	height(E) = x
+		 *	max(height(F), height(G)) = x.
+		 *
+		 * After the rotation:
+		 *	height(D) = max(height(F), height(G)) + 1
+		 *		  = x + 1
+		 *	height(A) = max(height(E), height(C)) + 1
+		 *		  = max(x, x) + 1 = x + 1
+		 *	balance(B) = 0
+		 *	balance(A) = 0
+		 */
+		avl_rotate(root_ptr, parent, -sign);
+
+		/* Equivalent to setting @parent's balance factor to 0.  */
+		avl_adjust_balance_factor(parent, -sign); /* A */
+
+		/* Equivalent to setting @node's balance factor to 0.  */
+		avl_adjust_balance_factor(node, -sign);   /* B */
+	} else {
+		/* @node (B below) is heavy in the direction opposite
+		 * from the direction @parent (A below) is heavy.
+		 *
+		 * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+		 * The comment, diagram, and equations below assume sign < 0.
+		 * The other case is symmetric!
+		 * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+		 *
+		 * Do a counterblockwise rotation rooted at @node (B below),
+		 * then a clockwise rotation rooted at @parent (A below):
+		 *
+		 *           A             A           E
+		 *          / \           / \        /   \
+		 *         B   C?  =>    E   C? =>  B     A
+		 *        / \           / \        / \   / \
+		 *       D?  E         B   G?     D?  F?G?  C?
+		 *          / \       / \
+		 *         F?  G?    D?  F?
+		 *
+		 * Before the rotation:
+		 *	balance(A) = -2
+		 *	balance(B) = +1
+		 * Let x = height(C).  Then:
+		 *	height(B) = x + 2
+		 *	height(E) = x + 1
+		 *	height(D) = x
+		 *	max(height(F), height(G)) = x
+		 *
+		 * After both rotations:
+		 *	height(A) = max(height(G), height(C)) + 1
+		 *		  = x + 1
+		 *	balance(A) = balance(E{orig}) >= 0 ? 0 : -balance(E{orig})
+		 *	height(B) = max(height(D), height(F)) + 1
+		 *		  = x + 1
+		 *	balance(B) = balance(E{orig} <= 0) ? 0 : -balance(E{orig})
+		 *
+		 *	height(E) = x + 2
+		 *	balance(E) = 0
+		 */
+		avl_do_double_rotate(root_ptr, node, parent, -sign);
+	}
+
+	/* Height after rotation is unchanged; nothing more to do.  */
+	return true;
+}
+
+/* Rebalance the tree after insertion of the specified node.  */
+void
+avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr,
+				struct avl_tree_node *inserted)
+{
+	struct avl_tree_node *node, *parent;
+	bool done;
+
+	inserted->left = NULL;
+	inserted->right = NULL;
+
+	node = inserted;
+
+	/* Adjust balance factor of new node's parent.
+	 * No rotation will need to be done at this level.  */
+
+	parent = avl_get_parent(node);
+	if (!parent)
+		return;
+
+	if (node == parent->left)
+		avl_adjust_balance_factor(parent, -1);
+	else
+		avl_adjust_balance_factor(parent, +1);
+
+	if (avl_get_balance_factor(parent) == 0)
+		/* @parent did not change in height.  Nothing more to do.  */
+		return;
+
+	/* The subtree rooted at @parent increased in height by 1.  */
+
+	do {
+		/* Adjust balance factor of next ancestor.  */
+
+		node = parent;
+		parent = avl_get_parent(node);
+		if (!parent)
+			return;
+
+		/* The subtree rooted at @node has increased in height by 1.  */
+		if (node == parent->left)
+			done = avl_handle_subtree_growth(root_ptr, node,
+							 parent, -1);
+		else
+			done = avl_handle_subtree_growth(root_ptr, node,
+							 parent, +1);
+	} while (!done);
+}
+
+/*
+ * This function handles the shrinkage of a subtree due to a deletion.
+ *
+ * @root_ptr
+ *	Location of the tree's root pointer.
+ *
+ * @parent
+ *	A node in the tree, exactly one of whose subtrees has decreased
+ *	in height by 1 due to a deletion.  (This includes the case where
+ *	one of the child pointers has become NULL, since we can consider
+ *	the "NULL" subtree to have a height of 0.)
+ *
+ * @sign
+ *	+1 if the left subtree of @parent has decreased in height by 1;
+ *	-1 if the right subtree of @parent has decreased in height by 1.
+ *
+ * @left_deleted_ret
+ *	If the return value is not NULL, this will be set to %true if the
+ *	left subtree of the returned node has decreased in height by 1,
+ *	or %false if the right subtree of the returned node has decreased
+ *	in height by 1.
+ *
+ * This function will adjust @parent's balance factor, then do a (single
+ * or double) rotation if necessary.  The return value will be NULL if
+ * the full AVL tree is now adequately balanced, or a pointer to the
+ * parent of @parent if @parent is now adequately balanced but has
+ * decreased in height by 1.  Also in the latter case, *left_deleted_ret
+ * will be set.
+ */
+static AVL_INLINE struct avl_tree_node *
+avl_handle_subtree_shrink(struct avl_tree_node ** const root_ptr,
+			  struct avl_tree_node *parent,
+			  const int sign,
+			  bool * const left_deleted_ret)
+{
+	struct avl_tree_node *node;
+	int old_balance_factor, new_balance_factor;
+
+	old_balance_factor = avl_get_balance_factor(parent);
+
+	if (old_balance_factor == 0) {
+		/* Prior to the deletion, the subtree rooted at
+		 * @parent was perfectly balanced.  It's now
+		 * unbalanced by 1, but that's okay and its height
+		 * hasn't changed.  Nothing more to do.  */
+		avl_adjust_balance_factor(parent, sign);
+		return NULL;
+	}
+
+	new_balance_factor = old_balance_factor + sign;
+
+	if (new_balance_factor == 0) {
+		/* The subtree rooted at @parent is now perfectly
+		 * balanced, whereas before the deletion it was
+		 * unbalanced by 1.  Its height must have decreased
+		 * by 1.  No rotation is needed at this location,
+		 * but continue up the tree.  */
+		avl_adjust_balance_factor(parent, sign);
+		node = parent;
+	} else {
+		/* @parent is too left-heavy (new_balance_factor == -2) or
+		 * too right-heavy (new_balance_factor == +2).  */
+
+		node = avl_get_child(parent, sign);
+
+		/* The rotations below are similar to those done during
+		 * insertion (see avl_handle_subtree_growth()), so full
+		 * comments are not provided.  The only new case is the
+		 * one where @node has a balance factor of 0, and that is
+		 * commented.  */
+
+		if (sign * avl_get_balance_factor(node) >= 0) {
+
+			avl_rotate(root_ptr, parent, -sign);
+
+			if (avl_get_balance_factor(node) == 0) {
+				/*
+				 * @node (B below) is perfectly balanced.
+				 *
+				 * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+				 * The comment, diagram, and equations
+				 * below assume sign < 0.  The other case
+				 * is symmetric!
+				 * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+				 *
+				 * Do a clockwise rotation rooted at
+				 * @parent (A below):
+				 *
+				 *           A              B
+				 *          / \           /   \
+				 *         B   C?  =>    D     A
+				 *        / \           / \   / \
+				 *       D   E         F?  G?E   C?
+				 *      / \
+				 *     F?  G?
+				 *
+				 * Before the rotation:
+				 *	balance(A) = -2
+				 *	balance(B) =  0
+				 * Let x = height(C).  Then:
+				 *	height(B) = x + 2
+				 *	height(D) = x + 1
+				 *	height(E) = x + 1
+				 *	max(height(F), height(G)) = x.
+				 *
+				 * After the rotation:
+				 *	height(D) = max(height(F), height(G)) + 1
+				 *		  = x + 1
+				 *	height(A) = max(height(E), height(C)) + 1
+				 *		  = max(x + 1, x) + 1 = x + 2
+				 *	balance(A) = -1
+				 *	balance(B) = +1
+				 */
+
+				/* A: -2 => -1 (sign < 0)
+				 * or +2 => +1 (sign > 0)
+				 * No change needed --- that's the same as
+				 * old_balance_factor.  */
+
+				/* B: 0 => +1 (sign < 0)
+				 * or 0 => -1 (sign > 0)  */
+				avl_adjust_balance_factor(node, -sign);
+
+				/* Height is unchanged; nothing more to do.  */
+				return NULL;
+			} else {
+				avl_adjust_balance_factor(parent, -sign);
+				avl_adjust_balance_factor(node, -sign);
+			}
+		} else {
+			node = avl_do_double_rotate(root_ptr, node,
+						    parent, -sign);
+		}
+	}
+	parent = avl_get_parent(node);
+	if (parent)
+		*left_deleted_ret = (node == parent->left);
+	return parent;
+}
+
+/* Swaps node X, which must have 2 children, with its in-order successor, then
+ * unlinks node X.  Returns the parent of X just before unlinking, without its
+ * balance factor having been updated to account for the unlink.  */
+static AVL_INLINE struct avl_tree_node *
+avl_tree_swap_with_successor(struct avl_tree_node **root_ptr,
+			     struct avl_tree_node *X,
+			     bool *left_deleted_ret)
+{
+	struct avl_tree_node *Y, *ret;
+
+	Y = X->right;
+	if (!Y->left) {
+		/*
+		 *     P?           P?           P?
+		 *     |            |            |
+		 *     X            Y            Y
+		 *    / \          / \          / \
+		 *   A   Y    =>  A   X    =>  A   B?
+		 *      / \          / \
+		 *    (0)  B?      (0)  B?
+		 *
+		 * [ X unlinked, Y returned ]
+		 */
+		ret = Y;
+		*left_deleted_ret = false;
+	} else {
+		struct avl_tree_node *Q;
+
+		do {
+			Q = Y;
+			Y = Y->left;
+		} while (Y->left);
+
+		/*
+		 *     P?           P?           P?
+		 *     |            |            |
+		 *     X            Y            Y
+		 *    / \          / \          / \
+		 *   A   ...  =>  A  ...   =>  A  ...
+		 *       |            |            |
+		 *       Q            Q            Q
+		 *      /            /            /
+		 *     Y            X            B?
+		 *    / \          / \
+		 *  (0)  B?      (0)  B?
+		 *
+		 *
+		 * [ X unlinked, Q returned ]
+		 */
+
+		Q->left = Y->right;
+		if (Q->left)
+			avl_set_parent(Q->left, Q);
+		Y->right = X->right;
+		avl_set_parent(X->right, Y);
+		ret = Q;
+		*left_deleted_ret = true;
+	}
+
+	Y->left = X->left;
+	avl_set_parent(X->left, Y);
+
+	Y->parent_balance = X->parent_balance;
+	avl_replace_child(root_ptr, avl_get_parent(X), X, Y);
+
+	return ret;
+}
+
+/*
+ * Removes an item from the specified AVL tree.
+ *
+ * @root_ptr
+ *	Location of the AVL tree's root pointer.  Indirection is needed
+ *	because the root node may change if the tree needed to be rebalanced
+ *	because of the deletion or if @node was the root node.
+ *
+ * @node
+ *	Pointer to the `struct avl_tree_node' embedded in the item to
+ *	remove from the tree.
+ *
+ * Note: This function *only* removes the node and rebalances the tree.
+ * It does not free any memory, nor does it do the equivalent of
+ * avl_tree_node_set_unlinked().
+ */
+void
+avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node)
+{
+	struct avl_tree_node *parent;
+	bool left_deleted = false;
+
+	if (node->left && node->right) {
+		/* @node is fully internal, with two children.  Swap it
+		 * with its in-order successor (which must exist in the
+		 * right subtree of @node and can have, at most, a right
+		 * child), then unlink @node.  */
+		parent = avl_tree_swap_with_successor(root_ptr, node,
+						      &left_deleted);
+		/* @parent is now the parent of what was @node's in-order
+		 * successor.  It cannot be NULL, since @node itself was
+		 * an ancestor of its in-order successor.
+		 * @left_deleted has been set to %true if @node's
+		 * in-order successor was the left child of @parent,
+		 * otherwise %false.  */
+	} else {
+		struct avl_tree_node *child;
+
+		/* @node is missing at least one child.  Unlink it.  Set
+		 * @parent to @node's parent, and set @left_deleted to
+		 * reflect which child of @parent @node was.  Or, if
+		 * @node was the root node, simply update the root node
+		 * and return.  */
+		child = node->left ? node->left : node->right;
+		parent = avl_get_parent(node);
+		if (parent) {
+			if (node == parent->left) {
+				parent->left = child;
+				left_deleted = true;
+			} else {
+				parent->right = child;
+				left_deleted = false;
+			}
+			if (child)
+				avl_set_parent(child, parent);
+		} else {
+			if (child)
+				avl_set_parent(child, parent);
+			*root_ptr = child;
+			return;
+		}
+	}
+
+	/* Rebalance the tree.  */
+	do {
+		if (left_deleted)
+			parent = avl_handle_subtree_shrink(root_ptr, parent,
+							   +1, &left_deleted);
+		else
+			parent = avl_handle_subtree_shrink(root_ptr, parent,
+							   -1, &left_deleted);
+	} while (parent);
+}
diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.h b/mwparserfromhell/parser/ctokenizer/avl_tree.h
new file mode 100644
index 0000000..86ade3f
--- /dev/null
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.h
@@ -0,0 +1,358 @@
+/*
+ * avl_tree.h - intrusive, nonrecursive AVL tree data structure (self-balancing
+ *		binary search tree), header file
+ *
+ * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef _AVL_TREE_H_
+#define _AVL_TREE_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <inttypes.h> /* for uintptr_t */
+
+#ifdef __GNUC__
+#  define AVL_INLINE inline __attribute__((always_inline))
+#else
+#  define AVL_INLINE inline
+#endif
+
+/* Node in an AVL tree.  Embed this in some other data structure.  */
+struct avl_tree_node {
+
+	/* Pointer to left child or NULL  */
+	struct avl_tree_node *left;
+
+	/* Pointer to right child or NULL  */
+	struct avl_tree_node *right;
+
+	/* Pointer to parent combined with the balance factor.  This saves 4 or
+	 * 8 bytes of memory depending on the CPU architecture.
+	 *
+	 * Low 2 bits:  One greater than the balance factor of this subtree,
+	 * which is equal to height(right) - height(left).  The mapping is:
+	 *
+	 * 00 => -1
+	 * 01 =>  0
+	 * 10 => +1
+	 * 11 => undefined
+	 *
+	 * The rest of the bits are the pointer to the parent node.  It must be
+	 * 4-byte aligned, and it will be NULL if this is the root node and
+	 * therefore has no parent.  */
+	uintptr_t parent_balance;
+};
+
+/* Cast an AVL tree node to the containing data structure.  */
+#define avl_tree_entry(entry, type, member) \
+	((type*) ((char *)(entry) - offsetof(type, member)))
+
+/* Returns a pointer to the parent of the specified AVL tree node, or NULL if it
+ * is already the root of the tree.  */
+static AVL_INLINE struct avl_tree_node *
+avl_get_parent(const struct avl_tree_node *node)
+{
+	return (struct avl_tree_node *)(node->parent_balance & ~3);
+}
+
+/* Marks the specified AVL tree node as unlinked from any tree.  */
+static AVL_INLINE void
+avl_tree_node_set_unlinked(struct avl_tree_node *node)
+{
+	node->parent_balance = (uintptr_t)node;
+}
+
+/* Returns true iff the specified AVL tree node has been marked with
+ * avl_tree_node_set_unlinked() and has not subsequently been inserted into a
+ * tree.  */
+static AVL_INLINE bool
+avl_tree_node_is_unlinked(const struct avl_tree_node *node)
+{
+	return node->parent_balance == (uintptr_t)node;
+}
+
+/* (Internal use only)  */
+extern void
+avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr,
+				struct avl_tree_node *inserted);
+
+/*
+ * Looks up an item in the specified AVL tree.
+ *
+ * @root
+ *	Pointer to the root of the AVL tree.  (This can be NULL --- that just
+ *	means the tree is empty.)
+ *
+ * @cmp_ctx
+ *	First argument to pass to the comparison callback.  This generally
+ *	should be a pointer to an object equal to the one being searched for.
+ *
+ * @cmp
+ *	Comparison callback.  Must return < 0, 0, or > 0 if the first argument
+ *	is less than, equal to, or greater than the second argument,
+ *	respectively.  The first argument will be @cmp_ctx and the second
+ *	argument will be a pointer to the AVL tree node of an item in the tree.
+ *
+ * Returns a pointer to the AVL tree node of the resulting item, or NULL if the
+ * item was not found.
+ *
+ * Example:
+ *
+ * struct int_wrapper {
+ *	int data;
+ *	struct avl_tree_node index_node;
+ * };
+ *
+ * static int _avl_cmp_int_to_node(const void *intptr,
+ *				   const struct avl_tree_node *nodeptr)
+ * {
+ *	int n1 = *(const int *)intptr;
+ *	int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data;
+ *	if (n1 < n2)
+ *		return -1;
+ *	else if (n1 > n2)
+ *		return 1;
+ *	else
+ *		return 0;
+ * }
+ *
+ * bool contains_int(struct avl_tree_node *root, int n)
+ * {
+ *	struct avl_tree_node *result;
+ *
+ *	result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node);
+ *	return result ? true : false;
+ * }
+ */
+static AVL_INLINE struct avl_tree_node *
+avl_tree_lookup(const struct avl_tree_node *root,
+		const void *cmp_ctx,
+		int (*cmp)(const void *, const struct avl_tree_node *))
+{
+	const struct avl_tree_node *cur = root;
+
+	while (cur) {
+		int res = (*cmp)(cmp_ctx, cur);
+		if (res < 0)
+			cur = cur->left;
+		else if (res > 0)
+			cur = cur->right;
+		else
+			break;
+	}
+	return (struct avl_tree_node*)cur;
+}
+
+/* Same as avl_tree_lookup(), but uses a more specific type for the comparison
+ * function.  Specifically, with this function the item being searched for is
+ * expected to be in the same format as those already in the tree, with an
+ * embedded 'struct avl_tree_node'.  */
+static AVL_INLINE struct avl_tree_node *
+avl_tree_lookup_node(const struct avl_tree_node *root,
+		     const struct avl_tree_node *node,
+		     int (*cmp)(const struct avl_tree_node *,
+				const struct avl_tree_node *))
+{
+	const struct avl_tree_node *cur = root;
+
+	while (cur) {
+		int res = (*cmp)(node, cur);
+		if (res < 0)
+			cur = cur->left;
+		else if (res > 0)
+			cur = cur->right;
+		else
+			break;
+	}
+	return (struct avl_tree_node*)cur;
+}
+
+/*
+ * Inserts an item into the specified AVL tree.
+ *
+ * @root_ptr
+ *	Location of the AVL tree's root pointer.  Indirection is needed because
+ *	the root node may change as a result of rotations caused by the
+ *	insertion.  Initialize *root_ptr to NULL for an empty tree.
+ *
+ * @item
+ *	Pointer to the `struct avl_tree_node' embedded in the item to insert.
+ *	No members in it need be pre-initialized, although members in the
+ *	containing structure should be pre-initialized so that @cmp can use them
+ *	in comparisons.
+ *
+ * @cmp
+ *	Comparison callback.  Must return < 0, 0, or > 0 if the first argument
+ *	is less than, equal to, or greater than the second argument,
+ *	respectively.  The first argument will be @item and the second
+ *	argument will be a pointer to an AVL tree node embedded in some
+ *	previously-inserted item to which @item is being compared.
+ *
+ * If no item in the tree is comparatively equal (via @cmp) to @item, inserts
+ * @item and returns NULL.  Otherwise does nothing and returns a pointer to the
+ * AVL tree node embedded in the previously-inserted item which compared equal
+ * to @item.
+ *
+ * Example:
+ *
+ * struct int_wrapper {
+ *	int data;
+ *	struct avl_tree_node index_node;
+ * };
+ *
+ * #define GET_DATA(i) avl_tree_entry((i), struct int_wrapper, index_node)->data
+ *
+ * static int _avl_cmp_ints(const struct avl_tree_node *node1,
+ *			    const struct avl_tree_node *node2)
+ * {
+ *	int n1 = GET_DATA(node1);
+ *	int n2 = GET_DATA(node2);
+ *	if (n1 < n2)
+ *		return -1;
+ *	else if (n1 > n2)
+ *		return 1;
+ *	else
+ *		return 0;
+ * }
+ *
+ * bool insert_int(struct avl_tree_node **root_ptr, int data)
+ * {
+ *	struct int_wrapper *i = malloc(sizeof(struct int_wrapper));
+ *	i->data = data;
+ *	if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) {
+ *		// Duplicate.
+ *		free(i);
+ *		return false;
+ *	}
+ *	return true;
+ * }
+ */
+static AVL_INLINE struct avl_tree_node *
+avl_tree_insert(struct avl_tree_node **root_ptr,
+		struct avl_tree_node *item,
+		int (*cmp)(const struct avl_tree_node *,
+			   const struct avl_tree_node *))
+{
+	struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL;
+	int res;
+
+	while (*cur_ptr) {
+		cur = *cur_ptr;
+		res = (*cmp)(item, cur);
+		if (res < 0)
+			cur_ptr = &cur->left;
+		else if (res > 0)
+			cur_ptr = &cur->right;
+		else
+			return cur;
+	}
+	*cur_ptr = item;
+	item->parent_balance = (uintptr_t)cur | 1;
+	avl_tree_rebalance_after_insert(root_ptr, item);
+	return NULL;
+}
+
+/* Removes an item from the specified AVL tree.
+ * See implementation for details.  */
+extern void
+avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node);
+
+/* Nonrecursive AVL tree traversal functions  */
+
+extern struct avl_tree_node *
+avl_tree_first_in_order(const struct avl_tree_node *root);
+
+extern struct avl_tree_node *
+avl_tree_last_in_order(const struct avl_tree_node *root);
+
+extern struct avl_tree_node *
+avl_tree_next_in_order(const struct avl_tree_node *node);
+
+extern struct avl_tree_node *
+avl_tree_prev_in_order(const struct avl_tree_node *node);
+
+extern struct avl_tree_node *
+avl_tree_first_in_postorder(const struct avl_tree_node *root);
+
+extern struct avl_tree_node *
+avl_tree_next_in_postorder(const struct avl_tree_node *prev,
+			   const struct avl_tree_node *prev_parent);
+
+/*
+ * Iterate through the nodes in an AVL tree in sorted order.
+ * You may not modify the tree during the iteration.
+ *
+ * @child_struct
+ *	Variable that will receive a pointer to each struct inserted into the
+ *	tree.
+ * @root
+ *	Root of the AVL tree.
+ * @struct_name
+ *	Type of *child_struct.
+ * @struct_member
+ *	Member of @struct_name type that is the AVL tree node.
+ *
+ * Example:
+ *
+ * struct int_wrapper {
+ *	int data;
+ *	struct avl_tree_node index_node;
+ * };
+ *
+ * void print_ints(struct avl_tree_node *root)
+ * {
+ *	struct int_wrapper *i;
+ *
+ *	avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node)
+ *		printf("%d\n", i->data);
+ * }
+ */
+#define avl_tree_for_each_in_order(child_struct, root,			\
+				   struct_name, struct_member)		\
+	for (struct avl_tree_node *_cur =				\
+		avl_tree_first_in_order(root);				\
+	     _cur && ((child_struct) =					\
+		      avl_tree_entry(_cur, struct_name,			\
+				     struct_member), 1);		\
+	     _cur = avl_tree_next_in_order(_cur))
+
+/*
+ * Like avl_tree_for_each_in_order(), but uses the reverse order.
+ */
+#define avl_tree_for_each_in_reverse_order(child_struct, root,		\
+					   struct_name, struct_member)	\
+	for (struct avl_tree_node *_cur =				\
+		avl_tree_last_in_order(root);				\
+	     _cur && ((child_struct) =					\
+		      avl_tree_entry(_cur, struct_name,			\
+				     struct_member), 1);		\
+	     _cur = avl_tree_prev_in_order(_cur))
+
+/*
+ * Like avl_tree_for_each_in_order(), but iterates through the nodes in
+ * postorder, so the current node may be deleted or freed.
+ */
+#define avl_tree_for_each_in_postorder(child_struct, root,		\
+				       struct_name, struct_member)	\
+	for (struct avl_tree_node *_cur =				\
+		avl_tree_first_in_postorder(root), *_parent;		\
+	     _cur && ((child_struct) =					\
+		      avl_tree_entry(_cur, struct_name,			\
+				     struct_member), 1)			\
+	          && (_parent = avl_get_parent(_cur), 1);		\
+	     _cur = avl_tree_next_in_postorder(_cur, _parent))
+
+#endif /* _AVL_TREE_H_ */
diff --git a/mwparserfromhell/parser/ctokenizer/common.h b/mwparserfromhell/parser/ctokenizer/common.h
index 3bd22af..f3d51f4 100644
--- a/mwparserfromhell/parser/ctokenizer/common.h
+++ b/mwparserfromhell/parser/ctokenizer/common.h
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -30,6 +30,8 @@ SOFTWARE.
 #include <structmember.h>
 #include <bytesobject.h>
 
+#include "avl_tree.h"
+
 /* Compatibility macros */
 
 #if PY_MAJOR_VERSION >= 3
@@ -92,10 +94,16 @@ typedef struct {
 #endif
 } Textbuffer;
 
+typedef struct {
+    Py_ssize_t head;
+    uint64_t context;
+} StackIdent;
+
 struct Stack {
     PyObject* stack;
     uint64_t context;
     Textbuffer* textbuffer;
+    StackIdent ident;
     struct Stack* next;
 };
 typedef struct Stack Stack;
@@ -111,6 +119,13 @@ typedef struct {
 #endif
 } TokenizerInput;
 
+typedef struct avl_tree_node avl_tree;
+
+typedef struct {
+    StackIdent id;
+    struct avl_tree_node node;
+} route_tree_node;
+
 typedef struct {
     PyObject_HEAD
     TokenizerInput text;     /* text to tokenize */
@@ -118,8 +133,8 @@ typedef struct {
     Py_ssize_t head;         /* current position in text */
     int global;              /* global context */
     int depth;               /* stack recursion depth */
-    int cycles;              /* total number of stack recursions */
     int route_state;         /* whether a BadRoute has been triggered */
     uint64_t route_context;  /* context when the last BadRoute was triggered */
+    avl_tree* bad_routes;    /* stack idents for routes known to fail */
     int skip_style_tags;     /* temp fix for the sometimes broken tag parser */
 } Tokenizer;
diff --git a/mwparserfromhell/parser/ctokenizer/contexts.h b/mwparserfromhell/parser/ctokenizer/contexts.h
index 96afd6c..2696925 100644
--- a/mwparserfromhell/parser/ctokenizer/contexts.h
+++ b/mwparserfromhell/parser/ctokenizer/contexts.h
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -81,6 +81,8 @@ SOFTWARE.
 #define LC_TABLE_TD_LINE            0x0000000800000000
 #define LC_TABLE_TH_LINE            0x0000001000000000
 
+#define LC_HTML_ENTITY              0x0000002000000000
+
 /* Global contexts */
 
 #define GL_HEADING 0x1
diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c
index f4e9606..27eed67 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_parse.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -445,6 +445,8 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self)
     Unicode this;
     int slashes, i;
 
+    if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0)
+        return 0;
     if (Tokenizer_push(self, LC_EXT_LINK_URI))
         return -1;
     if (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/') {
@@ -461,7 +463,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self)
             while (1) {
                 if (!valid[i])
                     goto end_of_loop;
-                if (this == valid[i])
+                if (this == (Unicode) valid[i])
                     break;
                 i++;
             }
@@ -533,7 +535,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
                 FAIL_ROUTE(0);
                 return 0;
             }
-        } while (chunk != valid[j++]);
+        } while (chunk != (Unicode) valid[j++]);
         Textbuffer_write(scheme_buffer, chunk);
     }
     end_of_loop:
@@ -552,7 +554,12 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
         return 0;
     }
     Py_DECREF(scheme);
-    if (Tokenizer_push(self, self->topstack->context | LC_EXT_LINK_URI)) {
+    uint64_t new_context = self->topstack->context | LC_EXT_LINK_URI;
+    if (Tokenizer_check_route(self, new_context) < 0) {
+        Textbuffer_dealloc(scheme_buffer);
+        return 0;
+    }
+    if (Tokenizer_push(self, new_context)) {
         Textbuffer_dealloc(scheme_buffer);
         return -1;
     }
@@ -1000,7 +1007,7 @@ static int Tokenizer_really_parse_entity(Tokenizer* self)
         while (1) {
             if (!valid[j])
                 FAIL_ROUTE_AND_EXIT()
-            if (this == valid[j])
+            if (this == (Unicode) valid[j])
                 break;
             j++;
         }
@@ -1065,11 +1072,14 @@ static int Tokenizer_parse_entity(Tokenizer* self)
     Py_ssize_t reset = self->head;
     PyObject *tokenlist;
 
-    if (Tokenizer_push(self, 0))
+    if (Tokenizer_check_route(self, LC_HTML_ENTITY) < 0)
+        goto on_bad_route;
+    if (Tokenizer_push(self, LC_HTML_ENTITY))
         return -1;
     if (Tokenizer_really_parse_entity(self))
         return -1;
     if (BAD_ROUTE) {
+        on_bad_route:
         RESET_ROUTE();
         self->head = reset;
         if (Tokenizer_emit_char(self, '&'))
@@ -1574,6 +1584,8 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self)
 
     if (!data)
         return NULL;
+    if (Tokenizer_check_route(self, LC_TAG_OPEN) < 0)
+        return NULL;
     if (Tokenizer_push(self, LC_TAG_OPEN)) {
         TagData_dealloc(data);
         return NULL;
@@ -2191,14 +2203,17 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token
 static int Tokenizer_parse_table(Tokenizer* self)
 {
     Py_ssize_t reset = self->head;
-    PyObject *style, *padding;
+    PyObject *style, *padding, *trash;
     PyObject *table = NULL;
     self->head += 2;
 
-    if(Tokenizer_push(self, LC_TABLE_OPEN))
+    if (Tokenizer_check_route(self, LC_TABLE_OPEN) < 0)
+        goto on_bad_route;
+    if (Tokenizer_push(self, LC_TABLE_OPEN))
         return -1;
     padding = Tokenizer_handle_table_style(self, '\n');
     if (BAD_ROUTE) {
+        on_bad_route:
         RESET_ROUTE();
         self->head = reset;
         if (Tokenizer_emit_char(self, '{'))
@@ -2214,11 +2229,16 @@ static int Tokenizer_parse_table(Tokenizer* self)
     }
 
     self->head++;
+    StackIdent restore_point = self->topstack->ident;
     table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
     if (BAD_ROUTE) {
         RESET_ROUTE();
         Py_DECREF(padding);
         Py_DECREF(style);
+        while (!Tokenizer_IS_CURRENT_STACK(self, restore_point)) {
+            trash = Tokenizer_pop(self);
+            Py_XDECREF(trash);
+        }
         self->head = reset;
         if (Tokenizer_emit_char(self, '{'))
             return -1;
@@ -2243,7 +2263,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
 */
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
-    PyObject *padding, *style, *row, *trash;
+    PyObject *padding, *style, *row;
     self->head += 2;
 
     if (!Tokenizer_CAN_RECURSE(self)) {
@@ -2253,14 +2273,13 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
         return 0;
     }
 
-    if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
+    if (Tokenizer_check_route(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN) < 0)
+        return 0;
+    if (Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
         return -1;
     padding = Tokenizer_handle_table_style(self, '\n');
-    if (BAD_ROUTE) {
-        trash = Tokenizer_pop(self);
-        Py_XDECREF(trash);
+    if (BAD_ROUTE)
         return 0;
-    }
     if (!padding)
         return -1;
     style = Tokenizer_pop(self);
@@ -2319,8 +2338,8 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
     if (cell_context & LC_TABLE_CELL_STYLE) {
         Py_DECREF(cell);
         self->head = reset;
-        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
-                          line_context))
+        if (Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
+                           line_context))
             return -1;
         padding = Tokenizer_handle_table_style(self, '|');
         if (!padding)
@@ -2541,6 +2560,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
     PyObject* temp;
 
     if (push) {
+        if (Tokenizer_check_route(self, context) < 0)
+            return NULL;
         if (Tokenizer_push(self, context))
             return NULL;
     }
diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.c b/mwparserfromhell/parser/ctokenizer/tok_support.c
index 31c6bb9..08bfe9c 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_support.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_support.c
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -40,10 +40,11 @@ int Tokenizer_push(Tokenizer* self, uint64_t context)
     top->textbuffer = Textbuffer_new(&self->text);
     if (!top->textbuffer)
         return -1;
+    top->ident.head = self->head;
+    top->ident.context = context;
     top->next = self->topstack;
     self->topstack = top;
     self->depth++;
-    self->cycles++;
     return 0;
 }
 
@@ -130,12 +131,38 @@ PyObject* Tokenizer_pop_keeping_context(Tokenizer* self)
 }
 
 /*
+    Compare two route_tree_nodes that are in their avl_tree_node forms.
+*/
+static int compare_nodes(
+    const struct avl_tree_node* na, const struct avl_tree_node* nb)
+{
+    route_tree_node *a = avl_tree_entry(na, route_tree_node, node);
+    route_tree_node *b = avl_tree_entry(nb, route_tree_node, node);
+
+    if (a->id.head < b->id.head)
+        return -1;
+    if (a->id.head > b->id.head)
+        return 1;
+    return (a->id.context > b->id.context) - (a->id.context < b->id.context);
+}
+
+/*
     Fail the current tokenization route. Discards the current
-    stack/context/textbuffer and sets the BAD_ROUTE flag.
+    stack/context/textbuffer and sets the BAD_ROUTE flag. Also records the
+    ident of the failed stack so future parsing attempts down this route can be
+    stopped early.
 */
 void* Tokenizer_fail_route(Tokenizer* self)
 {
     uint64_t context = self->topstack->context;
+
+    route_tree_node *node = malloc(sizeof(route_tree_node));
+    if (node) {
+        node->id = self->topstack->ident;
+        if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes))
+            free(node);
+    }
+
     PyObject* stack = Tokenizer_pop(self);
 
     Py_XDECREF(stack);
@@ -144,6 +171,31 @@ void* Tokenizer_fail_route(Tokenizer* self)
 }
 
 /*
+    Check if pushing a new route here with the given context would definitely
+    fail, based on a previous call to Tokenizer_fail_route() with the same
+    stack.
+
+    Return 0 if safe and -1 if unsafe. The BAD_ROUTE flag will be set in the
+    latter case.
+
+    This function is not necessary to call and works as an optimization
+    implementation detail. (The Python tokenizer checks every route on push,
+    but this would introduce too much overhead in C tokenizer due to the need
+    to check for a bad route after every call to Tokenizer_push.)
+*/
+int Tokenizer_check_route(Tokenizer* self, uint64_t context)
+{
+    StackIdent ident = {self->head, context};
+    struct avl_tree_node *node = (struct avl_tree_node*) (&ident + 1);
+
+    if (avl_tree_lookup_node(self->bad_routes, node, compare_nodes)) {
+        FAIL_ROUTE(context);
+        return -1;
+    }
+    return 0;
+}
+
+/*
     Write a token to the current token stack.
 */
 int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first)
diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.h b/mwparserfromhell/parser/ctokenizer/tok_support.h
index 182f9a0..ccc6af5 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_support.h
+++ b/mwparserfromhell/parser/ctokenizer/tok_support.h
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -32,6 +32,7 @@ void Tokenizer_delete_top_of_stack(Tokenizer*);
 PyObject* Tokenizer_pop(Tokenizer*);
 PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
 void* Tokenizer_fail_route(Tokenizer*);
+int Tokenizer_check_route(Tokenizer*, uint64_t);
 
 int Tokenizer_emit_token(Tokenizer*, PyObject*, int);
 int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int);
@@ -47,10 +48,11 @@ Unicode Tokenizer_read_backwards(Tokenizer*, Py_ssize_t);
 /* Macros */
 
 #define MAX_DEPTH 40
-#define MAX_CYCLES 100000
-
 #define Tokenizer_CAN_RECURSE(self)                                           \
-    (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES)
+    (self->depth < MAX_DEPTH)
+#define Tokenizer_IS_CURRENT_STACK(self, id)                                  \
+    (self->topstack->ident.head    == (id).head &&                            \
+     self->topstack->ident.context == (id).context)
 
 #define Tokenizer_emit(self, token)                                           \
     Tokenizer_emit_token(self, token, 0)
diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.c b/mwparserfromhell/parser/ctokenizer/tokenizer.c
index 47d2993..213c47b 100644
--- a/mwparserfromhell/parser/ctokenizer/tokenizer.c
+++ b/mwparserfromhell/parser/ctokenizer/tokenizer.c
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -103,8 +103,9 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
         return -1;
     init_tokenizer_text(&self->text);
     self->topstack = NULL;
-    self->head = self->global = self->depth = self->cycles = 0;
+    self->head = self->global = self->depth = 0;
     self->route_context = self->route_state = 0;
+    self->bad_routes = NULL;
     self->skip_style_tags = 0;
     return 0;
 }
@@ -158,10 +159,17 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
             return NULL;
     }
 
-    self->head = self->global = self->depth = self->cycles = 0;
+    self->head = self->global = self->depth = 0;
     self->skip_style_tags = skip_style_tags;
+    self->bad_routes = NULL;
+
     tokens = Tokenizer_parse(self, context, 1);
 
+    route_tree_node *n;
+    avl_tree_for_each_in_postorder(n, self->bad_routes, route_tree_node, node)
+        free(n);
+    self->bad_routes = NULL;
+
     if (!tokens || self->topstack) {
         Py_XDECREF(tokens);
         if (PyErr_Occurred())
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 309d0d3..b3e5883 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -65,7 +65,6 @@ class Tokenizer(object):
     MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
                ":", "/", "-", "!", "\n", START, END]
     MAX_DEPTH = 40
-    MAX_CYCLES = 100000
     regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
     tag_splitter = re.compile(r"([\s\"\'\\]+)")
 
@@ -75,7 +74,8 @@ class Tokenizer(object):
         self._stacks = []
         self._global = 0
         self._depth = 0
-        self._cycles = 0
+        self._bad_routes = set()
+        self._skip_style_tags = False
 
     @property
     def _stack(self):
@@ -100,11 +100,24 @@ class Tokenizer(object):
     def _textbuffer(self, value):
         self._stacks[-1][2] = value
 
+    @property
+    def _stack_ident(self):
+        """An identifier for the current stack.
+
+        This is based on the starting head position and context. Stacks with
+        the same identifier are always parsed in the same way. This can be used
+        to cache intermediate parsing info.
+        """
+        return self._stacks[-1][3]
+
     def _push(self, context=0):
         """Add a new token stack, context, and textbuffer to the list."""
-        self._stacks.append([[], context, []])
+        new_ident = (self._head, context)
+        if new_ident in self._bad_routes:
+            raise BadRoute(context)
+
+        self._stacks.append([[], context, [], new_ident])
         self._depth += 1
-        self._cycles += 1
 
     def _push_textbuffer(self):
         """Push the textbuffer onto the stack as a Text node and clear it."""
@@ -129,7 +142,7 @@ class Tokenizer(object):
 
     def _can_recurse(self):
         """Return whether or not our max recursion depth has been exceeded."""
-        return self._depth < self.MAX_DEPTH and self._cycles < self.MAX_CYCLES
+        return self._depth < self.MAX_DEPTH
 
     def _fail_route(self):
         """Fail the current tokenization route.
@@ -138,6 +151,7 @@ class Tokenizer(object):
         :exc:`.BadRoute`.
         """
         context = self._context
+        self._bad_routes.add(self._stack_ident)
         self._pop()
         raise BadRoute(context)
 
@@ -609,8 +623,8 @@ class Tokenizer(object):
     def _parse_entity(self):
         """Parse an HTML entity at the head of the wikicode string."""
         reset = self._head
-        self._push()
         try:
+            self._push(contexts.HTML_ENTITY)
             self._really_parse_entity()
         except BadRoute:
             self._head = reset
@@ -650,8 +664,9 @@ class Tokenizer(object):
             self._emit_first(tokens.TagAttrQuote(char=data.quoter))
             self._emit_all(self._pop())
         buf = data.padding_buffer
-        self._emit_first(tokens.TagAttrStart(pad_first=buf["first"],
-            pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"]))
+        self._emit_first(tokens.TagAttrStart(
+            pad_first=buf["first"], pad_before_eq=buf["before_eq"],
+            pad_after_eq=buf["after_eq"]))
         self._emit_all(self._pop())
         for key in data.padding_buffer:
             data.padding_buffer[key] = ""
@@ -1076,8 +1091,8 @@ class Tokenizer(object):
         """Parse a wikicode table by starting with the first line."""
         reset = self._head
         self._head += 2
-        self._push(contexts.TABLE_OPEN)
         try:
+            self._push(contexts.TABLE_OPEN)
             padding = self._handle_table_style("\n")
         except BadRoute:
             self._head = reset
@@ -1086,9 +1101,12 @@ class Tokenizer(object):
         style = self._pop()
 
         self._head += 1
+        restore_point = self._stack_ident
         try:
             table = self._parse(contexts.TABLE_OPEN)
         except BadRoute:
+            while self._stack_ident != restore_point:
+                self._pop()
             self._head = reset
             self._emit_text("{")
             return
@@ -1106,11 +1124,7 @@ class Tokenizer(object):
             return
 
         self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
-        try:
-            padding = self._handle_table_style("\n")
-        except BadRoute:
-            self._pop()
-            raise
+        padding = self._handle_table_style("\n")
         style = self._pop()
 
         # Don't parse the style separator:
@@ -1348,7 +1362,8 @@ class Tokenizer(object):
                     # Kill potential table contexts
                     self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
             # Start of table parsing
-            elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or
+            elif this == "{" and next == "|" and (
+                    self._read(-1) in ("\n", self.START) or
                     (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                 if self._can_recurse():
                     self._parse_table()
@@ -1374,7 +1389,7 @@ class Tokenizer(object):
                     self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
                     self._emit_text(this)
                 elif (self._read(-1) in ("\n", self.START) or
-                    (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
+                      (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                     if this == "|" and next == "}":
                         if self._context & contexts.TABLE_CELL_OPEN:
                             return self._handle_table_cell_end()
@@ -1406,10 +1421,12 @@ class Tokenizer(object):
 
     def tokenize(self, text, context=0, skip_style_tags=False):
         """Build a list of tokens from a string of wikicode and return it."""
-        self._skip_style_tags = skip_style_tags
         split = self.regex.split(text)
         self._text = [segment for segment in split if segment]
-        self._head = self._global = self._depth = self._cycles = 0
+        self._head = self._global = self._depth = 0
+        self._bad_routes = set()
+        self._skip_style_tags = skip_style_tags
+
         try:
             tokens = self._parse(context)
         except BadRoute:  # pragma: no cover (untestable/exceptional case)
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index 831f4d0..7137c50 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -346,3 +346,10 @@ name:   tables_in_templates_2
 label:  catch error handling mistakes when wikitables are inside templates
 input:  "{{hello|test\n{|\n| }}"
 output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n{"), TemplateParamSeparator(), Text(text="\n"), TemplateParamSeparator(), Text(text=" "), TemplateClose()]
+
+---
+
+name:   many_invalid_nested_tags
+label:  many unending nested tags that should be treated as plain text, followed by valid wikitext (see issues #42, #183)
+input:  "<b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b>[[{{x}}"
+output: [Text(text="<b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b>[["), TemplateOpen(), Text(text="x"), TemplateClose()]
diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest
index dccee37..8d30069 100644
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -694,4 +694,4 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{
 name:   recursion_opens_and_closes
 label:  test potentially dangerous recursion: template openings and closings
 input:  "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}"
-output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")]
+output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose()]

From 6ee61789da11a23720e743e14d856f7d5ed1c234 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 01:26:37 -0400
Subject: [PATCH 16/24] Fix compilation issue on Travis since GCC uses C90 by
 default there.

---
 mwparserfromhell/parser/ctokenizer/tok_support.c | 17 +++++++++++++++++
 mwparserfromhell/parser/ctokenizer/tok_support.h |  1 +
 mwparserfromhell/parser/ctokenizer/tokenizer.c   |  6 ++----
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.c b/mwparserfromhell/parser/ctokenizer/tok_support.c
index 08bfe9c..f3814ed 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_support.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_support.c
@@ -196,6 +196,23 @@ int Tokenizer_check_route(Tokenizer* self, uint64_t context)
 }
 
 /*
+    Free the tokenizer's bad route cache tree. Intended to be called by the
+    main tokenizer function after parsing is finished.
+*/
+void Tokenizer_free_bad_route_tree(Tokenizer *self)
+{
+    struct avl_tree_node *cur = avl_tree_first_in_postorder(self->bad_routes);
+    struct avl_tree_node *parent;
+    while (cur) {
+        route_tree_node *node = avl_tree_entry(cur, route_tree_node, node);
+        parent = avl_get_parent(cur);
+        free(node);
+        cur = avl_tree_next_in_postorder(cur, parent);
+    }
+    self->bad_routes = NULL;
+}
+
+/*
     Write a token to the current token stack.
 */
 int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first)
diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.h b/mwparserfromhell/parser/ctokenizer/tok_support.h
index ccc6af5..57f4126 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_support.h
+++ b/mwparserfromhell/parser/ctokenizer/tok_support.h
@@ -33,6 +33,7 @@ PyObject* Tokenizer_pop(Tokenizer*);
 PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
 void* Tokenizer_fail_route(Tokenizer*);
 int Tokenizer_check_route(Tokenizer*, uint64_t);
+void Tokenizer_free_bad_route_tree(Tokenizer*);
 
 int Tokenizer_emit_token(Tokenizer*, PyObject*, int);
 int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int);
diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.c b/mwparserfromhell/parser/ctokenizer/tokenizer.c
index 213c47b..9017909 100644
--- a/mwparserfromhell/parser/ctokenizer/tokenizer.c
+++ b/mwparserfromhell/parser/ctokenizer/tokenizer.c
@@ -22,6 +22,7 @@ SOFTWARE.
 
 #include "tokenizer.h"
 #include "tok_parse.h"
+#include "tok_support.h"
 #include "tokens.h"
 
 /* Globals */
@@ -165,10 +166,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 
     tokens = Tokenizer_parse(self, context, 1);
 
-    route_tree_node *n;
-    avl_tree_for_each_in_postorder(n, self->bad_routes, route_tree_node, node)
-        free(n);
-    self->bad_routes = NULL;
+    Tokenizer_free_bad_route_tree(self);
 
     if (!tokens || self->topstack) {
         Py_XDECREF(tokens);

From 2593675651d76abda2b03b93a1dd24910974ca16 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 01:47:08 -0400
Subject: [PATCH 17/24] Remove stdbool.h from avl_tree since MSVC doesn't like
 it.

---
 mwparserfromhell/parser/ctokenizer/avl_tree.c | 6 ++++++
 mwparserfromhell/parser/ctokenizer/avl_tree.h | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.c b/mwparserfromhell/parser/ctokenizer/avl_tree.c
index 4fdff6f..dd034b2 100644
--- a/mwparserfromhell/parser/ctokenizer/avl_tree.c
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.c
@@ -3,6 +3,7 @@
  *		binary search tree), implementation file
  *
  * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com>
+ * Slight changes for compatibility by Ben Kurtovic <ben.kurtovic@gmail.com>
  *
  * To the extent possible under law, the author(s) have dedicated all copyright
  * and related and neighboring rights to this software to the public domain
@@ -17,6 +18,11 @@
  * see <http://creativecommons.org/publicdomain/zero/1.0/>.
  */
 
+#define false 0
+#define true  1
+
+typedef int bool;
+
 #include "avl_tree.h"
 
 /* Returns the left child (sign < 0) or the right child (sign > 0) of the
diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.h b/mwparserfromhell/parser/ctokenizer/avl_tree.h
index 86ade3f..86e2c75 100644
--- a/mwparserfromhell/parser/ctokenizer/avl_tree.h
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.h
@@ -3,6 +3,7 @@
  *		binary search tree), header file
  *
  * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com>
+ * Slight changes for compatibility by Ben Kurtovic <ben.kurtovic@gmail.com>
  *
  * To the extent possible under law, the author(s) have dedicated all copyright
  * and related and neighboring rights to this software to the public domain
@@ -20,7 +21,6 @@
 #ifndef _AVL_TREE_H_
 #define _AVL_TREE_H_
 
-#include <stdbool.h>
 #include <stddef.h>
 #include <inttypes.h> /* for uintptr_t */
 
@@ -78,7 +78,7 @@ avl_tree_node_set_unlinked(struct avl_tree_node *node)
 /* Returns true iff the specified AVL tree node has been marked with
  * avl_tree_node_set_unlinked() and has not subsequently been inserted into a
  * tree.  */
-static AVL_INLINE bool
+static AVL_INLINE int
 avl_tree_node_is_unlinked(const struct avl_tree_node *node)
 {
 	return node->parent_balance == (uintptr_t)node;

From 6ad3b9fb2ab8d05bd842c079df0b351aeae45c20 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 01:55:26 -0400
Subject: [PATCH 18/24] inttypes.h doesn't exist on Windows, so try using
 stdint.h

---
 mwparserfromhell/parser/ctokenizer/avl_tree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.h b/mwparserfromhell/parser/ctokenizer/avl_tree.h
index 86e2c75..f4869a6 100644
--- a/mwparserfromhell/parser/ctokenizer/avl_tree.h
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.h
@@ -22,7 +22,7 @@
 #define _AVL_TREE_H_
 
 #include <stddef.h>
-#include <inttypes.h> /* for uintptr_t */
+#include <stdint.h>
 
 #ifdef __GNUC__
 #  define AVL_INLINE inline __attribute__((always_inline))

From dc0b3ae44686f4d69c1043983ae8c1da720f8186 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 02:08:09 -0400
Subject: [PATCH 19/24] Enable Windows builds on Python 3.6; try to fix again.

---
 appveyor.yml                                  | 8 ++++++++
 mwparserfromhell/parser/ctokenizer/avl_tree.h | 7 +++++++
 2 files changed, 15 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index d60b14b..afe1450 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -52,6 +52,14 @@ environment:
       PYTHON_VERSION: "3.5"
       PYTHON_ARCH:    "64"
 
+    - PYTHON:         "C:\\Python36"
+      PYTHON_VERSION: "3.6"
+      PYTHON_ARCH:    "32"
+
+    - PYTHON:         "C:\\Python36-x64"
+      PYTHON_VERSION: "3.6"
+      PYTHON_ARCH:    "64"
+
 install:
   - "%PIP% install --disable-pip-version-check --user --upgrade pip"
   - "%PIP% install wheel twine"
diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.h b/mwparserfromhell/parser/ctokenizer/avl_tree.h
index f4869a6..8508411 100644
--- a/mwparserfromhell/parser/ctokenizer/avl_tree.h
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.h
@@ -22,10 +22,17 @@
 #define _AVL_TREE_H_
 
 #include <stddef.h>
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef unsigned long uintptr_t;
+#else
 #include <stdint.h>
+#endif
 
 #ifdef __GNUC__
 #  define AVL_INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER) && (_MSC_VER < 1900)
+#  define AVL_INLINE __inline
 #else
 #  define AVL_INLINE inline
 #endif

From 0ef6a2ffbe78e1031b46a3ba463cd014fb9a995e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 02:17:29 -0400
Subject: [PATCH 20/24] Fix declarations for C89 compatibility (forgot MSVC
 needed that...)

---
 mwparserfromhell/parser/ctokenizer/avl_tree.h  | 4 +---
 mwparserfromhell/parser/ctokenizer/tok_parse.c | 6 ++++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/mwparserfromhell/parser/ctokenizer/avl_tree.h b/mwparserfromhell/parser/ctokenizer/avl_tree.h
index 8508411..9caa2bc 100644
--- a/mwparserfromhell/parser/ctokenizer/avl_tree.h
+++ b/mwparserfromhell/parser/ctokenizer/avl_tree.h
@@ -23,9 +23,7 @@
 
 #include <stddef.h>
 
-#if defined(_MSC_VER) && (_MSC_VER < 1600)
-typedef unsigned long uintptr_t;
-#else
+#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
 #include <stdint.h>
 #endif
 
diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c
index 27eed67..f8e52ec 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_parse.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c
@@ -519,6 +519,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
     Unicode chunk;
     Py_ssize_t i;
     int slashes, j;
+    uint64_t new_context;
 
     if (!scheme_buffer)
         return -1;
@@ -554,7 +555,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
         return 0;
     }
     Py_DECREF(scheme);
-    uint64_t new_context = self->topstack->context | LC_EXT_LINK_URI;
+    new_context = self->topstack->context | LC_EXT_LINK_URI;
     if (Tokenizer_check_route(self, new_context) < 0) {
         Textbuffer_dealloc(scheme_buffer);
         return 0;
@@ -2205,6 +2206,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
     Py_ssize_t reset = self->head;
     PyObject *style, *padding, *trash;
     PyObject *table = NULL;
+    StackIdent restore_point;
     self->head += 2;
 
     if (Tokenizer_check_route(self, LC_TABLE_OPEN) < 0)
@@ -2229,7 +2231,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
     }
 
     self->head++;
-    StackIdent restore_point = self->topstack->ident;
+    restore_point = self->topstack->ident;
     table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
     if (BAD_ROUTE) {
         RESET_ROUTE();

From 5a99597eb3333508e504eb0debaa42b5561c8cae Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 02:19:08 -0400
Subject: [PATCH 21/24] Another C89 fix for MSVC.

---
 mwparserfromhell/parser/ctokenizer/tok_support.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.c b/mwparserfromhell/parser/ctokenizer/tok_support.c
index f3814ed..062c631 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_support.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_support.c
@@ -155,6 +155,7 @@ static int compare_nodes(
 void* Tokenizer_fail_route(Tokenizer* self)
 {
     uint64_t context = self->topstack->context;
+    PyObject* stack;
 
     route_tree_node *node = malloc(sizeof(route_tree_node));
     if (node) {
@@ -163,8 +164,7 @@ void* Tokenizer_fail_route(Tokenizer* self)
             free(node);
     }
 
-    PyObject* stack = Tokenizer_pop(self);
-
+    stack = Tokenizer_pop(self);
     Py_XDECREF(stack);
     FAIL_ROUTE(context);
     return NULL;

From 7308c8055ec50475ccd3df146b76ee6b986f789c Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 02:46:06 -0400
Subject: [PATCH 22/24] Not perfect, but slightly better template param space
 guessing (#155)

---
 mwparserfromhell/nodes/template.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 9c89fbd..58d25ae 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -136,6 +136,11 @@ class Template(Node):
                 component = str(param.value)
             match = re.search(r"^(\s*).*?(\s*)$", component, FLAGS)
             before, after = match.group(1), match.group(2)
+            if not use_names and component.isspace() and "\n" in before:
+                # If the value is empty, we expect newlines in the whitespace
+                # to be after the content, not before it:
+                before, after = before.split("\n", 1)
+                after = "\n" + after
             before_theories[before] += 1
             after_theories[after] += 1
 

From cd4f90e663fa421b836e93ddc56e4a573eefb664 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 04:14:17 -0400
Subject: [PATCH 23/24] Fix a rare parsing bug involving nested broken tags.

---
 CHANGELOG                                      | 2 ++
 docs/changelog.rst                             | 2 ++
 mwparserfromhell/parser/ctokenizer/tok_parse.c | 8 ++++++++
 mwparserfromhell/parser/tokenizer.py           | 6 ++++++
 tests/tokenizer/tags.mwtest                    | 7 +++++++
 5 files changed, 25 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index bebacbf..b52a70f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,8 @@ v0.5 (unreleased):
   on incompletely-constructed StringMixIn subclasses).
 - Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples.
 - Fixed len() sometimes raising ValueError on empty node lists.
+- Fixed a rare parsing bug involving self-closing tags inside the attributes of
+  unpaired tags.
 - Fixed release script after changes to PyPI.
 
 v0.4.4 (released December 30, 2016):
diff --git a/docs/changelog.rst b/docs/changelog.rst
index c558579..b02437f 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -27,6 +27,8 @@ Unreleased
 - Fixed :meth:`.Wikicode.matches`\ 's behavior on iterables besides lists and
   tuples.
 - Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists.
+- Fixed a rare parsing bug involving self-closing tags inside the attributes of
+  unpaired tags.
 - Fixed release script after changes to PyPI.
 
 v0.4.4
diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c
index f8e52ec..90ee19d 100644
--- a/mwparserfromhell/parser/ctokenizer/tok_parse.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c
@@ -1548,6 +1548,14 @@ static PyObject* Tokenizer_handle_single_tag_end(Tokenizer* self)
             if (depth == 0)
                 break;
         }
+        is_instance = PyObject_IsInstance(token, TagCloseSelfclose);
+        if (is_instance == -1)
+            return NULL;
+        else if (is_instance == 1) {
+            depth--;
+            if (depth == 0)  // Should never happen
+                return NULL;
+        }
     }
     if (!token || depth > 0)
         return NULL;
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index b3e5883..d7a0282 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -819,6 +819,12 @@ class Tokenizer(object):
                 depth -= 1
                 if depth == 0:
                     break
+            elif isinstance(token, tokens.TagCloseSelfclose):
+                depth -= 1
+                if depth == 0:  # pragma: no cover (untestable/exceptional)
+                    raise ParserError(
+                        "_handle_single_tag_end() got an unexpected "
+                        "TagCloseSelfclose")
         else:  # pragma: no cover (untestable/exceptional case)
             raise ParserError("_handle_single_tag_end() missed a TagCloseOpen")
         padding = stack[index].padding
diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest
index 3c07ac9..40815a6 100644
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -646,3 +646,10 @@ name:   non_ascii_full
 label:  an open/close tag pair containing non-ASCII characters
 input:  "<éxamplé></éxamplé>"
 output: [TagOpenOpen(), Text(text="éxamplé"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="éxamplé"), TagCloseClose()]
+
+---
+
+name:   single_nested_selfclosing
+label:  a single (unpaired) tag with a self-closing tag in the middle (see issue #147)
+input:  "<li a <br/> c>foobar"
+output: [TagOpenOpen(), Text(text="li"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=" "), Text(text="a"), TagAttrStart(pad_first="", pad_after_eq="", pad_before_eq=" "), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding=""), TagAttrStart(pad_first="", pad_after_eq="", pad_before_eq=""), Text(text="c"), TagCloseSelfclose(padding="", implicit=True), Text(text="foobar")]

From 3ffc13bfd47edb2b96425f05e45d7c8a29cea126 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Fri, 23 Jun 2017 04:31:18 -0400
Subject: [PATCH 24/24] release/0.5

---
 CHANGELOG                    | 2 +-
 appveyor.yml                 | 2 +-
 docs/changelog.rst           | 4 ++--
 mwparserfromhell/__init__.py | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index b52a70f..bdcf906 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,4 @@
-v0.5 (unreleased):
+v0.5 (released June 23, 2017):
 
 - Added Wikicode.contains() to determine whether a Node or Wikicode object is
   contained within another Wikicode object.
diff --git a/appveyor.yml b/appveyor.yml
index afe1450..ff2ef4a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,6 @@
 # This config file is used by appveyor.com to build Windows release binaries
 
-version: 0.5.dev0-b{build}
+version: 0.5-b{build}
 
 branches:
   only:
diff --git a/docs/changelog.rst b/docs/changelog.rst
index b02437f..cf4e31a 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -4,8 +4,8 @@ Changelog
 v0.5
 ----
 
-Unreleased
-(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...develop>`__):
+`Released June 23, 2017 <https://github.com/earwig/mwparserfromhell/tree/v0.5>`_
+(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.4...v0.5>`__):
 
 - Added :meth:`.Wikicode.contains` to determine whether a :class:`.Node` or
   :class:`.Wikicode` object is contained within another :class:`.Wikicode`
diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py
index 64f3681..17f9e97 100644
--- a/mwparserfromhell/__init__.py
+++ b/mwparserfromhell/__init__.py
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014, 2015, 2016 Ben Kurtovic"
 __license__ = "MIT License"
-__version__ = "0.5.dev0"
+__version__ = "0.5"
 __email__ = "ben.kurtovic@gmail.com"
 
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,