Merge branch 'develop'

10 years ago · 94b6557d00
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,6 @@
 language: python
 python:
    - "2.6"
    - "2.7"
    - "3.2"
    - "3.3"
--- a/+ 18
+++ b/+ 18
@@ -1,3 +1,21 @@
 v0.3.3 (released April 22, 2014):

 - Added support for Python 2.6 and 3.4.
 - Template.has() is now passed 'ignore_empty=False' by default instead of True.
  This fixes a bug when adding parameters to templates with empty fields, and
  is a breaking change if you rely on the default behavior.
 - The 'matches' argument of Wikicode's filter methods now accepts a function
  (taking one argument, a Node, and returning a bool) in addition to a regex.
 - Re-added 'flat' argument to Wikicode.get_sections(), fixed the order in which
  it returns sections, and made it faster.
 - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects
  instead of just a single string or Wikicode.
 - Given the frequency of issues with the (admittedly insufficient) tag parser,
  there's a temporary skip_style_tags argument to parse() that ignores '' and
  ''' until these issues are corrected.
 - Fixed a parser bug involving nested wikilinks and external links.
 - C code cleanup and speed improvements.

 v0.3.2 (released September 1, 2013):

 - Added support for Python 3.2 (along with current support for 3.3 and 2.7).
--- a/+ 1
+++ b/+ 1
@@ -1,4 +1,4 @@
 Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,32 @@
 Changelog
 =========

 v0.3.3
 ------

 `Released April 22, 2014 <https://github.com/earwig/mwparserfromhell/tree/v0.3.3>`_
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.2...v0.3.3>`__):

 - Added support for Python 2.6 and 3.4.
 - :py:meth:`.Template.has` is now passed *ignore_empty=False* by default
  instead of *True*. This fixes a bug when adding parameters to templates with
  empty fields, **and is a breaking change if you rely on the default
  behavior.**
 - The *matches* argument of :py:class:`Wikicode's <.Wikicode>`
  :py:meth:`.filter` methods now accepts a function (taking one argument, a
  :py:class:`.Node`, and returning a bool) in addition to a regex.
 - Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the
  order in which it returns sections, and made it faster.
 - :py:meth:`.Wikicode.matches` now accepts a tuple or list of
  strings/:py:class:`.Wikicode` objects instead of just a single string or
  :py:class:`.Wikicode`.
 - Given the frequency of issues with the (admittedly insufficient) tag parser,
  there's a temporary *skip_style_tags* argument to
  :py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues
  are corrected.
 - Fixed a parser bug involving nested wikilinks and external links.
 - C code cleanup and speed improvements.

 v0.3.2
 ------

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -42,7 +42,7 @@ master_doc = 'index'

 # General information about the project.
 project = u'mwparserfromhell'
 copyright = u'2012, 2013 Ben Kurtovic'
 copyright = u'2012, 2013, 2014 Ben Kurtovic'

 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
--- a/mwparserfromhell/init.py
+++ b/mwparserfromhell/init.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -29,10 +29,10 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 from __future__ import unicode_literals

 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic"
 __license__ = "MIT License"
 __version__ = "0.3.2"
 __email__ = "ben.kurtovic@verizon.net"
 __version__ = "0.3.3"
 __email__ = "ben.kurtovic@gmail.com"

 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,
               utils, wikicode)
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -10,18 +10,21 @@ types are meant to be imported directly from within the parser's modules.

 import sys

 py3k = sys.version_info.major == 3
 py32 = py3k and sys.version_info.minor == 2
 py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6)
 py3k = (sys.version_info[0] == 3)
 py32 = py3k and (sys.version_info[1] == 2)

 if py3k:
    bytes = bytes
    str = str
    range = range
    maxsize = sys.maxsize
    import html.entities as htmlentities

 else:
    bytes = str
    str = unicode
    range = xrange
    maxsize = sys.maxint
    import htmlentitydefs as htmlentities

--- a/mwparserfromhell/definitions.py
+++ b/mwparserfromhell/definitions.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/init.py
+++ b/mwparserfromhell/nodes/init.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -42,21 +42,21 @@ class Node(StringMixIn):

    :py:meth:`__unicode__` must be overridden. It should return a ``unicode``
    or (``str`` in py3k) representation of the node. If the node contains
    :py:class:`~.Wikicode` objects inside of it, :py:meth:`__iternodes__`
    should be overridden to yield tuples of (``wikicode``,
    ``node_in_wikicode``) for each node in each wikicode, as well as the node
    itself (``None``, ``self``). If the node is printable, :py:meth:`__strip__`
    should be overridden to return the printable version of the node - it does
    not have to be a string, but something that can be converted to a string
    with ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build
    a nice tree representation of the node, if desired, for
    :py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__`
    should be a generator that iterates over them. If the node is printable
    (shown when the page is rendered), :py:meth:`__strip__` should return its
    printable version, stripping out any formatting marks. It does not have to
    return a string, but something that can be converted to a string with
    ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a
    nice tree representation of the node, if desired, for
    :py:meth:`~.Wikicode.get_tree`.
    """
    def __unicode__(self):
        raise NotImplementedError()

    def __iternodes__(self, getter):
        yield None, self
    def __children__(self):
        return  # Funny generator-that-yields-nothing syntax
        yield

    def __strip__(self, normalize, collapse):
        return None
--- a/mwparserfromhell/nodes/argument.py
+++ b/mwparserfromhell/nodes/argument.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -42,13 +42,10 @@ class Argument(Node):
            return start + "|" + str(self.default) + "}}}"
        return start + "}}}"

    def __iternodes__(self, getter):
        yield None, self
        for child in getter(self.name):
            yield self.name, child
    def __children__(self):
        yield self.name
        if self.default is not None:
            for child in getter(self.default):
                yield self.default, child
            yield self.default

    def __strip__(self, normalize, collapse):
        if self.default is not None:
--- a/mwparserfromhell/nodes/comment.py
+++ b/mwparserfromhell/nodes/comment.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/external_link.py
+++ b/mwparserfromhell/nodes/external_link.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -44,13 +44,10 @@ class ExternalLink(Node):
            return "[" + str(self.url) + "]"
        return str(self.url)

    def __iternodes__(self, getter):
        yield None, self
        for child in getter(self.url):
            yield self.url, child
    def __children__(self):
        yield self.url
        if self.title is not None:
            for child in getter(self.title):
                yield self.title, child
            yield self.title

    def __strip__(self, normalize, collapse):
        if self.brackets:
--- a/mwparserfromhell/nodes/extras/init.py
+++ b/mwparserfromhell/nodes/extras/init.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/heading.py
+++ b/mwparserfromhell/nodes/heading.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -39,10 +39,8 @@ class Heading(Node):
    def __unicode__(self):
        return ("=" * self.level) + str(self.title) + ("=" * self.level)

    def __iternodes__(self, getter):
        yield None, self
        for child in getter(self.title):
            yield self.title, child
    def __children__(self):
        yield self.title

    def __strip__(self, normalize, collapse):
        return self.title.strip_code(normalize, collapse)
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -70,23 +70,17 @@ class Tag(Node):
            result += "</" + str(self.closing_tag) + ">"
        return result

    def __iternodes__(self, getter):
        yield None, self
    def __children__(self):
        if not self.wiki_markup:
            for child in getter(self.tag):
                yield self.tag, child
            yield self.tag
            for attr in self.attributes:
                for child in getter(attr.name):
                    yield attr.name, child
                if attr.value:
                    for child in getter(attr.value):
                        yield attr.value, child
                yield attr.name
                if attr.value is not None:
                    yield attr.value
        if self.contents:
            for child in getter(self.contents):
                yield self.contents, child
            yield self.contents
        if not self.self_closing and not self.wiki_markup and self.closing_tag:
            for child in getter(self.closing_tag):
                yield self.closing_tag, child
            yield self.closing_tag

    def __strip__(self, normalize, collapse):
        if self.contents and is_visible(self.tag):
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@ import re

 from . import HTMLEntity, Node, Text
 from .extras import Parameter
 from ..compat import str
 from ..compat import range, str
 from ..utils import parse_anything

 __all__ = ["Template"]
@@ -51,16 +51,12 @@ class Template(Node):
        else:
            return "{{" + str(self.name) + "}}"

    def __iternodes__(self, getter):
        yield None, self
        for child in getter(self.name):
            yield self.name, child
    def __children__(self):
        yield self.name
        for param in self.params:
            if param.showkey:
                for child in getter(param.name):
                    yield param.name, child
            for child in getter(param.value):
                yield param.value, child
                yield param.name
            yield param.value

    def __showtree__(self, write, get, mark):
        write("{{")
@@ -174,7 +170,7 @@ class Template(Node):
    def name(self, value):
        self._name = parse_anything(value)

    def has(self, name, ignore_empty=True):
    def has(self, name, ignore_empty=False):
        """Return ``True`` if any parameter in the template is named *name*.

        With *ignore_empty*, ``False`` will be returned even if the template
@@ -190,7 +186,7 @@ class Template(Node):
                return True
        return False

    has_param = lambda self, name, ignore_empty=True: \
    has_param = lambda self, name, ignore_empty=False: \
                self.has(name, ignore_empty)
    has_param.__doc__ = "Alias for :py:meth:`has`."

--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/nodes/wikilink.py
+++ b/mwparserfromhell/nodes/wikilink.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -41,13 +41,10 @@ class Wikilink(Node):
            return "[[" + str(self.title) + "|" + str(self.text) + "]]"
        return "[[" + str(self.title) + "]]"

    def __iternodes__(self, getter):
        yield None, self
        for child in getter(self.title):
            yield self.title, child
    def __children__(self):
        yield self.title
        if self.text is not None:
            for child in getter(self.text):
                yield self.text, child
            yield self.text

    def __strip__(self, normalize, collapse):
        if self.text is not None:
--- a/mwparserfromhell/parser/init.py
+++ b/mwparserfromhell/parser/init.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -53,8 +53,12 @@ class Parser(object):
            self._tokenizer = Tokenizer()
        self._builder = Builder()

    def parse(self, text, context=0):
        """Parse *text*, returning a :py:class:`~.Wikicode` object tree."""
        tokens = self._tokenizer.tokenize(text, context)
    def parse(self, text, context=0, skip_style_tags=False):
        """Parse *text*, returning a :py:class:`~.Wikicode` object tree.

        If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be
        parsed, but instead be treated as plain text.
        """
        tokens = self._tokenizer.tokenize(text, context, skip_style_tags)
        code = self._builder.build(tokens)
        return code
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -55,7 +55,6 @@ Local (stack-specific) contexts:

    * :py:const:`EXT_LINK_URI`
    * :py:const:`EXT_LINK_TITLE`
    * :py:const:`EXT_LINK_BRACKETS`

 * :py:const:`HEADING`

@@ -100,7 +99,8 @@ Aggregate contexts:
 * :py:const:`FAIL`
 * :py:const:`UNSAFE`
 * :py:const:`DOUBLE`
 * :py:const:`INVALID_LINK`
 * :py:const:`NO_WIKILINKS`
 * :py:const:`NO_EXT_LINKS`

 """

@@ -121,38 +121,37 @@ WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT

 EXT_LINK_URI      = 1 << 7
 EXT_LINK_TITLE    = 1 << 8
 EXT_LINK_BRACKETS = 1 << 9
 EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS

 HEADING_LEVEL_1 = 1 << 10
 HEADING_LEVEL_2 = 1 << 11
 HEADING_LEVEL_3 = 1 << 12
 HEADING_LEVEL_4 = 1 << 13
 HEADING_LEVEL_5 = 1 << 14
 HEADING_LEVEL_6 = 1 << 15
 EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE

 HEADING_LEVEL_1 = 1 << 9
 HEADING_LEVEL_2 = 1 << 10
 HEADING_LEVEL_3 = 1 << 11
 HEADING_LEVEL_4 = 1 << 12
 HEADING_LEVEL_5 = 1 << 13
 HEADING_LEVEL_6 = 1 << 14
 HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 +
           HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6)

 TAG_OPEN =  1 << 16
 TAG_ATTR =  1 << 17
 TAG_BODY =  1 << 18
 TAG_CLOSE = 1 << 19
 TAG_OPEN =  1 << 15
 TAG_ATTR =  1 << 16
 TAG_BODY =  1 << 17
 TAG_CLOSE = 1 << 18
 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE

 STYLE_ITALICS =      1 << 20
 STYLE_BOLD =         1 << 21
 STYLE_PASS_AGAIN =   1 << 22
 STYLE_SECOND_PASS =  1 << 23
 STYLE_ITALICS =      1 << 19
 STYLE_BOLD =         1 << 20
 STYLE_PASS_AGAIN =   1 << 21
 STYLE_SECOND_PASS =  1 << 22
 STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS

 DL_TERM = 1 << 24
 DL_TERM = 1 << 23

 HAS_TEXT =       1 << 25
 FAIL_ON_TEXT =   1 << 26
 FAIL_NEXT  =     1 << 27
 FAIL_ON_LBRACE = 1 << 28
 FAIL_ON_RBRACE = 1 << 29
 FAIL_ON_EQUALS = 1 << 30
 HAS_TEXT =       1 << 24
 FAIL_ON_TEXT =   1 << 25
 FAIL_NEXT  =     1 << 26
 FAIL_ON_LBRACE = 1 << 27
 FAIL_ON_RBRACE = 1 << 28
 FAIL_ON_EQUALS = 1 << 29
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                FAIL_ON_RBRACE + FAIL_ON_EQUALS)

@@ -163,7 +162,8 @@ GL_HEADING = 1 << 0
 # Aggregate contexts:

 FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE
 UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY +
          ARGUMENT_NAME + TAG_CLOSE)
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE +
          TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE)
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
 INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK
 NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI
 NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1,6 +1,6 @@
 /*
 Tokenizer for MWParserFromHell
 Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -31,7 +31,7 @@ static int is_marker(Py_UNICODE this)
    int i;

    for (i = 0; i < NUM_MARKERS; i++) {
        if (*MARKERS[i] == this)
        if (MARKERS[i] == this)
            return 1;
    }
    return 0;
@@ -440,7 +440,7 @@ static int
 Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse)
 {
    Textbuffer *original = buffer;
    int i;
    long i;

    if (reverse) {
        do {
@@ -642,7 +642,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
    PyObject *tokenlist;

    self->head += 2;
    while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) {
    while (Tokenizer_READ(self, 0) == '{' && braces < MAX_BRACES) {
        self->head++;
        braces++;
    }
@@ -674,8 +674,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
            if (BAD_ROUTE) {
                char text[MAX_BRACES + 1];
                RESET_ROUTE();
                for (i = 0; i < braces; i++) text[i] = *"{";
                text[braces] = *"";
                for (i = 0; i < braces; i++) text[i] = '{';
                text[braces] = '\0';
                if (Tokenizer_emit_text_then_stack(self, text)) {
                    Py_XDECREF(text);
                    return -1;
@@ -872,7 +872,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self)

    if (Tokenizer_push(self, LC_EXT_LINK_URI))
        return -1;
    if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") {
    if (Tokenizer_READ(self, 0) == '/' && Tokenizer_READ(self, 1) == '/') {
        if (Tokenizer_emit_text(self, "//"))
            return -1;
        self->head += 2;
@@ -881,7 +881,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self)
        buffer = Textbuffer_new();
        if (!buffer)
            return -1;
        while ((this = Tokenizer_READ(self, 0)) != *"") {
        while ((this = Tokenizer_READ(self, 0))) {
            i = 0;
            while (1) {
                if (!valid[i])
@@ -898,18 +898,18 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self)
            self->head++;
        }
        end_of_loop:
        if (this != *":") {
        if (this != ':') {
            Textbuffer_dealloc(buffer);
            Tokenizer_fail_route(self);
            return 0;
        }
        if (Tokenizer_emit_char(self, *":")) {
        if (Tokenizer_emit_char(self, ':')) {
            Textbuffer_dealloc(buffer);
            return -1;
        }
        self->head++;
        slashes = (Tokenizer_READ(self, 0) == *"/" &&
                   Tokenizer_READ(self, 1) == *"/");
        slashes = (Tokenizer_READ(self, 0) == '/' &&
                   Tokenizer_READ(self, 1) == '/');
        if (slashes) {
            if (Tokenizer_emit_text(self, "//")) {
                Textbuffer_dealloc(buffer);
@@ -940,7 +940,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
    Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer;
    PyObject *scheme;
    Py_UNICODE chunk;
    int slashes, i, j;
    long i;
    int slashes, j;

    if (!scheme_buffer)
        return -1;
@@ -973,8 +974,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
        Textbuffer_dealloc(scheme_buffer);
        return -1;
    }
    slashes = (Tokenizer_READ(self, 0) == *"/" &&
               Tokenizer_READ(self, 1) == *"/");
    slashes = (Tokenizer_READ(self, 0) == '/' &&
               Tokenizer_READ(self, 1) == '/');
    if (!IS_SCHEME(scheme, slashes, 1)) {
        Py_DECREF(scheme);
        Textbuffer_dealloc(scheme_buffer);
@@ -988,7 +989,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
    }
    if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1))
        return -1;
    if (Tokenizer_emit_char(self, *":"))
    if (Tokenizer_emit_char(self, ':'))
        return -1;
    if (slashes) {
        if (Tokenizer_emit_text(self, "//"))
@@ -1014,13 +1015,13 @@ Tokenizer_handle_free_link_text(Tokenizer* self, int* parens,
                return error;                             \
        }

    if (this == *"(" && !(*parens)) {
    if (this == '(' && !(*parens)) {
        *parens = 1;
        PUSH_TAIL_BUFFER(*tail, -1)
    }
    else if (this == *"," || this == *";" || this == *"\\" || this == *"." ||
             this == *":" || this == *"!" || this == *"?" ||
             (!(*parens) && this == *")"))
    else if (this == ',' || this == ';' || this == '\\' || this == '.' ||
             this == ':' || this == '!' || this == '?' ||
             (!(*parens) && this == ')'))
        return Textbuffer_write(tail, this);
    else
        PUSH_TAIL_BUFFER(*tail, -1)
@@ -1037,12 +1038,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next)
    Py_UNICODE after = Tokenizer_READ(self, 2);
    int ctx = self->topstack->context;

    return (this == *"" || this == *"\n" || this == *"[" || this == *"]" ||
        this == *"<" || this == *">"  || (this == *"'" && next == *"'") ||
        (this == *"|" && ctx & LC_TEMPLATE) ||
        (this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) ||
        (this == *"}" && next == *"}" &&
            (ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT))));
    return (!this || this == '\n' || this == '[' || this == ']' ||
        this == '<' || this == '>'  || (this == '\'' && next == '\'') ||
        (this == '|' && ctx & LC_TEMPLATE) ||
        (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) ||
        (this == '}' && next == '}' &&
            (ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT))));
 }

 /*
@@ -1061,21 +1062,21 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets,
    if (BAD_ROUTE)
        return NULL;
    this = Tokenizer_READ(self, 0);
    if (this == *"" || this == *"\n" || this == *" " || this == *"]")
    if (!this || this == '\n' || this == ' ' || this == ']')
        return Tokenizer_fail_route(self);
    if (!brackets && this == *"[")
    if (!brackets && this == '[')
        return Tokenizer_fail_route(self);
    while (1) {
        this = Tokenizer_READ(self, 0);
        next = Tokenizer_READ(self, 1);
        if (this == *"&") {
        if (this == '&') {
            PUSH_TAIL_BUFFER(*extra, NULL)
            if (Tokenizer_parse_entity(self))
                return NULL;
        }
        else if (this == *"<" && next == *"!"
                 && Tokenizer_READ(self, 2) == *"-"
                 && Tokenizer_READ(self, 3) == *"-") {
        else if (this == '<' && next == '!'
                 && Tokenizer_READ(self, 2) == '-'
                 && Tokenizer_READ(self, 3) == '-') {
            PUSH_TAIL_BUFFER(*extra, NULL)
            if (Tokenizer_parse_comment(self))
                return NULL;
@@ -1084,16 +1085,16 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets,
            self->head--;
            return Tokenizer_pop(self);
        }
        else if (this == *"" || this == *"\n")
        else if (!this || this == '\n')
            return Tokenizer_fail_route(self);
        else if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) {
        else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) {
            PUSH_TAIL_BUFFER(*extra, NULL)
            if (Tokenizer_parse_template_or_argument(self))
                return NULL;
        }
        else if (this == *"]")
        else if (this == ']')
            return Tokenizer_pop(self);
        else if (this == *" ") {
        else if (this == ' ') {
            if (brackets) {
                if (Tokenizer_emit(self, ExternalLinkSeparator))
                    return NULL;
@@ -1102,7 +1103,7 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets,
                self->head++;
                return Tokenizer_parse(self, 0, 0);
            }
            if (Textbuffer_write(extra, *" "))
            if (Textbuffer_write(extra, ' '))
                return NULL;
            return Tokenizer_pop(self);
        }
@@ -1157,7 +1158,7 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link)
 */
 static int Tokenizer_parse_external_link(Tokenizer* self, int brackets)
 {
    #define INVALID_CONTEXT self->topstack->context & AGG_INVALID_LINK
    #define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS
    #define NOT_A_LINK                                        \
        if (!brackets && self->topstack->context & LC_DLTERM) \
            return Tokenizer_handle_dl_term(self);            \
@@ -1232,7 +1233,7 @@ static int Tokenizer_parse_heading(Tokenizer* self)

    self->global |= GL_HEADING;
    self->head += 1;
    while (Tokenizer_READ(self, 0) == *"=") {
    while (Tokenizer_READ(self, 0) == '=') {
        best++;
        self->head++;
    }
@@ -1242,7 +1243,7 @@ static int Tokenizer_parse_heading(Tokenizer* self)
        RESET_ROUTE();
        self->head = reset + best - 1;
        for (i = 0; i < best; i++) {
            if (Tokenizer_emit_char(self, *"="))
            if (Tokenizer_emit_char(self, '='))
                return -1;
        }
        self->global ^= GL_HEADING;
@@ -1271,7 +1272,7 @@ static int Tokenizer_parse_heading(Tokenizer* self)
    if (heading->level < best) {
        diff = best - heading->level;
        for (i = 0; i < diff; i++) {
            if (Tokenizer_emit_char(self, *"=")) {
            if (Tokenizer_emit_char(self, '=')) {
                Py_DECREF(heading->title);
                free(heading);
                return -1;
@@ -1296,14 +1297,14 @@ static int Tokenizer_parse_heading(Tokenizer* self)
 */
 static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self)
 {
    Py_ssize_t reset = self->head, best;
    int i, current, level, diff;
    Py_ssize_t reset = self->head;
    int best, i, current, level, diff;
    HeadingData *after, *heading;
    PyObject *stack;

    self->head += 1;
    best = 1;
    while (Tokenizer_READ(self, 0) == *"=") {
    while (Tokenizer_READ(self, 0) == '=') {
        best++;
        self->head++;
    }
@@ -1316,7 +1317,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self)
        if (level < best) {
            diff = best - level;
            for (i = 0; i < diff; i++) {
                if (Tokenizer_emit_char(self, *"="))
                if (Tokenizer_emit_char(self, '='))
                    return NULL;
            }
        }
@@ -1324,7 +1325,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self)
    }
    else {
        for (i = 0; i < best; i++) {
            if (Tokenizer_emit_char(self, *"=")) {
            if (Tokenizer_emit_char(self, '=')) {
                Py_DECREF(after->title);
                free(after);
                return NULL;
@@ -1372,21 +1373,21 @@ static int Tokenizer_really_parse_entity(Tokenizer* self)
        return -1;
    self->head++;
    this = Tokenizer_READ(self, 0);
    if (this == *"") {
    if (!this) {
        Tokenizer_fail_route(self);
        return 0;
    }
    if (this == *"#") {
    if (this == '#') {
        numeric = 1;
        if (Tokenizer_emit(self, HTMLEntityNumeric))
            return -1;
        self->head++;
        this = Tokenizer_READ(self, 0);
        if (this == *"") {
        if (!this) {
            Tokenizer_fail_route(self);
            return 0;
        }
        if (this == *"x" || this == *"X") {
        if (this == 'x' || this == 'X') {
            hexadecimal = 1;
            kwargs = PyDict_New();
            if (!kwargs)
@@ -1416,22 +1417,20 @@ static int Tokenizer_really_parse_entity(Tokenizer* self)
    zeroes = 0;
    while (1) {
        this = Tokenizer_READ(self, 0);
        if (this == *";") {
        if (this == ';') {
            if (i == 0)
                FAIL_ROUTE_AND_EXIT()
            break;
        }
        if (i == 0 && this == *"0") {
        if (i == 0 && this == '0') {
            zeroes++;
            self->head++;
            continue;
        }
        if (i >= MAX_ENTITY_SIZE)
            FAIL_ROUTE_AND_EXIT()
        for (j = 0; j < NUM_MARKERS; j++) {
            if (this == *MARKERS[j])
                FAIL_ROUTE_AND_EXIT()
        }
        if (is_marker(this))
            FAIL_ROUTE_AND_EXIT()
        j = 0;
        while (1) {
            if (!valid[j])
@@ -1508,7 +1507,7 @@ static int Tokenizer_parse_entity(Tokenizer* self)
    if (BAD_ROUTE) {
        RESET_ROUTE();
        self->head = reset;
        if (Tokenizer_emit_char(self, *"&"))
        if (Tokenizer_emit_char(self, '&'))
            return -1;
        return 0;
    }
@@ -1537,14 +1536,14 @@ static int Tokenizer_parse_comment(Tokenizer* self)
        return -1;
    while (1) {
        this = Tokenizer_READ(self, 0);
        if (this == *"") {
        if (!this) {
            comment = Tokenizer_pop(self);
            Py_XDECREF(comment);
            self->head = reset;
            return Tokenizer_emit_text(self, "<!--");
        }
        if (this == *"-" && Tokenizer_READ(self, 1) == this &&
                            Tokenizer_READ(self, 2) == *">") {
        if (this == '-' && Tokenizer_READ(self, 1) == this &&
                            Tokenizer_READ(self, 2) == '>') {
            if (Tokenizer_emit_first(self, CommentStart))
                return -1;
            if (Tokenizer_emit(self, CommentEnd))
@@ -1654,11 +1653,11 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text)

    if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self))
        return Tokenizer_emit_char(self, text);
    else if (text == next && next == *"{")
    else if (text == next && next == '{')
        return Tokenizer_parse_template_or_argument(self);
    else if (text == next && next == *"[")
    else if (text == next && next == '[')
        return Tokenizer_parse_wikilink(self);
    else if (text == *"<")
    else if (text == '<')
        return Tokenizer_parse_tag(self);
    return Tokenizer_emit_char(self, text);
 }
@@ -1705,7 +1704,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
            return -1;
    }
    else if (data->context & TAG_ATTR_NAME) {
        if (chunk == *"=") {
        if (chunk == '=') {
            data->context = TAG_ATTR_VALUE | TAG_NOTE_QUOTE;
            if (Tokenizer_emit(self, TagAttrEquals))
                return -1;
@@ -1720,11 +1719,11 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
        }
    }
    else if (data->context & TAG_ATTR_VALUE) {
        escaped = (Tokenizer_READ_BACKWARDS(self, 1) == *"\\" &&
                   Tokenizer_READ_BACKWARDS(self, 2) != *"\\");
        escaped = (Tokenizer_READ_BACKWARDS(self, 1) == '\\' &&
                   Tokenizer_READ_BACKWARDS(self, 2) != '\\');
        if (data->context & TAG_NOTE_QUOTE) {
            data->context ^= TAG_NOTE_QUOTE;
            if (chunk == *"\"" && !escaped) {
            if (chunk == '"' && !escaped) {
                data->context |= TAG_QUOTED;
                if (Tokenizer_push(self, self->topstack->context))
                    return -1;
@@ -1733,7 +1732,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
            }
        }
        else if (data->context & TAG_QUOTED) {
            if (chunk == *"\"" && !escaped) {
            if (chunk == '"' && !escaped) {
                data->context |= TAG_NOTE_SPACE;
                return 0;
            }
@@ -1844,15 +1843,15 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self)
    while (1) {
        this = Tokenizer_READ(self, 0);
        next = Tokenizer_READ(self, 1);
        if (this == *"")
        if (!this)
            return Tokenizer_fail_route(self);
        else if (this == *"<" && next == *"/") {
        else if (this == '<' && next == '/') {
            if (Tokenizer_handle_tag_open_close(self))
                return NULL;
            self->head++;
            return Tokenizer_parse(self, 0, 0);
        }
        else if (this == *"&") {
        else if (this == '&') {
            if (Tokenizer_parse_entity(self))
                return NULL;
        }
@@ -1957,7 +1956,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self)
        next = Tokenizer_READ(self, 1);
        can_exit = (!(data->context & (TAG_QUOTED | TAG_NAME)) ||
                    data->context & TAG_NOTE_SPACE);
        if (this == *"") {
        if (!this) {
            if (self->topstack->context & LC_TAG_ATTR) {
                if (data->context & TAG_QUOTED) {
                    // Unclosed attribute quote: reset, don't die
@@ -1973,7 +1972,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self)
            TagData_dealloc(data);
            return Tokenizer_fail_route(self);
        }
        else if (this == *">" && can_exit) {
        else if (this == '>' && can_exit) {
            if (Tokenizer_handle_tag_close_open(self, data, TagCloseOpen)) {
                TagData_dealloc(data);
                return NULL;
@@ -1995,7 +1994,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self)
            Py_DECREF(text);
            return Tokenizer_handle_blacklisted_tag(self);
        }
        else if (this == *"/" && next == *">" && can_exit) {
        else if (this == '/' && next == '>' && can_exit) {
            if (Tokenizer_handle_tag_close_open(self, data,
                                                TagCloseSelfclose)) {
                TagData_dealloc(data);
@@ -2078,7 +2077,7 @@ static int Tokenizer_parse_tag(Tokenizer* self)
    if (BAD_ROUTE) {
        RESET_ROUTE();
        self->head = reset;
        return Tokenizer_emit_char(self, *"<");
        return Tokenizer_emit_char(self, '<');
    }
    if (!tag) {
        return -1;
@@ -2165,12 +2164,12 @@ static int Tokenizer_parse_bold(Tokenizer* self)
        RESET_ROUTE();
        self->head = reset;
        if (self->topstack->context & LC_STYLE_SECOND_PASS)
            return Tokenizer_emit_char(self, *"'") ? -1 : 1;
            return Tokenizer_emit_char(self, '\'') ? -1 : 1;
        if (self->topstack->context & LC_STYLE_ITALICS) {
            self->topstack->context |= LC_STYLE_PASS_AGAIN;
            return Tokenizer_emit_text(self, "'''");
        }
        if (Tokenizer_emit_char(self, *"'"))
        if (Tokenizer_emit_char(self, '\''))
            return -1;
        return Tokenizer_parse_italics(self);
    }
@@ -2256,19 +2255,19 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self)
    int context = self->topstack->context, ticks = 2, i;

    self->head += 2;
    while (Tokenizer_READ(self, 0) == *"'") {
    while (Tokenizer_READ(self, 0) == '\'') {
        self->head++;
        ticks++;
    }
    if (ticks > 5) {
        for (i = 0; i < ticks - 5; i++) {
            if (Tokenizer_emit_char(self, *"'"))
            if (Tokenizer_emit_char(self, '\''))
                return NULL;
        }
        ticks = 5;
    }
    else if (ticks == 4) {
        if (Tokenizer_emit_char(self, *"'"))
        if (Tokenizer_emit_char(self, '\''))
            return NULL;
        ticks = 3;
    }
@@ -2281,7 +2280,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self)
    if (!Tokenizer_CAN_RECURSE(self)) {
        if (ticks == 3) {
            if (context & LC_STYLE_SECOND_PASS) {
                if (Tokenizer_emit_char(self, *"'"))
                if (Tokenizer_emit_char(self, '\''))
                    return NULL;
                return Tokenizer_pop(self);
            }
@@ -2289,7 +2288,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self)
                self->topstack->context |= LC_STYLE_PASS_AGAIN;
        }
        for (i = 0; i < ticks; i++) {
            if (Tokenizer_emit_char(self, *"'"))
            if (Tokenizer_emit_char(self, '\''))
                return NULL;
        }
    }
@@ -2321,7 +2320,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self)
    PyObject *markup = Tokenizer_read(self, 0), *kwargs;
    Py_UNICODE code = *PyUnicode_AS_UNICODE(markup);

    if (code == *";")
    if (code == ';')
        self->topstack->context |= LC_DLTERM;
    kwargs = PyDict_New();
    if (!kwargs)
@@ -2345,8 +2344,8 @@ static int Tokenizer_handle_list(Tokenizer* self)

    if (Tokenizer_handle_list_marker(self))
        return -1;
    while (marker == *"#" || marker == *"*" || marker == *";" ||
           marker == *":") {
    while (marker == '#' || marker == '*' || marker == ';' ||
           marker == ':') {
        self->head++;
        if (Tokenizer_handle_list_marker(self))
            return -1;
@@ -2368,11 +2367,11 @@ static int Tokenizer_handle_hr(Tokenizer* self)
        return -1;
    self->head += 3;
    for (i = 0; i < 4; i++) {
        if (Textbuffer_write(&buffer, *"-"))
        if (Textbuffer_write(&buffer, '-'))
            return -1;
    }
    while (Tokenizer_READ(self, 1) == *"-") {
        if (Textbuffer_write(&buffer, *"-"))
    while (Tokenizer_READ(self, 1) == '-') {
        if (Textbuffer_write(&buffer, '-'))
            return -1;
        self->head++;
    }
@@ -2400,9 +2399,9 @@ static int Tokenizer_handle_hr(Tokenizer* self)
 static int Tokenizer_handle_dl_term(Tokenizer* self)
 {
    self->topstack->context ^= LC_DLTERM;
    if (Tokenizer_READ(self, 0) == *":")
    if (Tokenizer_READ(self, 0) == ':')
        return Tokenizer_handle_list_marker(self);
    return Tokenizer_emit_char(self, *"\n");
    return Tokenizer_emit_char(self, '\n');
 }

 /*
@@ -2441,28 +2440,26 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 {
    if (context & LC_FAIL_NEXT)
        return -1;
    if (context & LC_WIKILINK) {
        if (context & LC_WIKILINK_TEXT)
            return (data == *"[" && Tokenizer_READ(self, 1) == *"[") ? -1 : 0;
        else if (data == *"]" || data == *"{")
    if (context & LC_WIKILINK_TITLE) {
        if (data == ']' || data == '{')
            self->topstack->context |= LC_FAIL_NEXT;
        else if (data == *"\n" || data == *"[" || data == *"}")
        else if (data == '\n' || data == '[' || data == '}')
            return -1;
        return 0;
    }
    if (context & LC_EXT_LINK_TITLE)
        return (data == *"\n") ? -1 : 0;
        return (data == '\n') ? -1 : 0;
    if (context & LC_TAG_CLOSE)
        return (data == *"<") ? -1 : 0;
        return (data == '<') ? -1 : 0;
    if (context & LC_TEMPLATE_NAME) {
        if (data == *"{" || data == *"}" || data == *"[") {
        if (data == '{' || data == '}' || data == '[') {
            self->topstack->context |= LC_FAIL_NEXT;
            return 0;
        }
        if (data == *"]") {
        if (data == ']') {
            return -1;
        }
        if (data == *"|")
        if (data == '|')
            return 0;
        if (context & LC_HAS_TEXT) {
            if (context & LC_FAIL_ON_TEXT) {
@@ -2470,7 +2467,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
                    return -1;
            }
            else {
                if (data == *"\n")
                if (data == '\n')
                    self->topstack->context |= LC_FAIL_ON_TEXT;
            }
        }
@@ -2479,13 +2476,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
    }
    else {
        if (context & LC_FAIL_ON_EQUALS) {
            if (data == *"=") {
            if (data == '=') {
                return -1;
            }
        }
        else if (context & LC_FAIL_ON_LBRACE) {
            if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" &&
                                 Tokenizer_READ(self, -2) == *"{")) {
            if (data == '{' || (Tokenizer_READ(self, -1) == '{' &&
                                 Tokenizer_READ(self, -2) == '{')) {
                if (context & LC_TEMPLATE)
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                else
@@ -2495,7 +2492,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
            self->topstack->context ^= LC_FAIL_ON_LBRACE;
        }
        else if (context & LC_FAIL_ON_RBRACE) {
            if (data == *"}") {
            if (data == '}') {
                if (context & LC_TEMPLATE)
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                else
@@ -2504,9 +2501,9 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
            }
            self->topstack->context ^= LC_FAIL_ON_RBRACE;
        }
        else if (data == *"{")
        else if (data == '{')
            self->topstack->context |= LC_FAIL_ON_LBRACE;
        else if (data == *"}")
        else if (data == '}')
            self->topstack->context |= LC_FAIL_ON_RBRACE;
    }
    return 0;
@@ -2544,11 +2541,11 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
            self->head++;
            continue;
        }
        if (this == *"")
        if (!this)
            return Tokenizer_handle_end(self, this_context);
        next = Tokenizer_READ(self, 1);
        last = Tokenizer_READ_BACKWARDS(self, 1);
        if (this == next && next == *"{") {
        if (this == next && next == '{') {
            if (Tokenizer_CAN_RECURSE(self)) {
                if (Tokenizer_parse_template_or_argument(self))
                    return NULL;
@@ -2556,84 +2553,83 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == *"|" && this_context & LC_TEMPLATE) {
        else if (this == '|' && this_context & LC_TEMPLATE) {
            if (Tokenizer_handle_template_param(self))
                return NULL;
        }
        else if (this == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) {
        else if (this == '=' && this_context & LC_TEMPLATE_PARAM_KEY) {
            if (Tokenizer_handle_template_param_value(self))
                return NULL;
        }
        else if (this == next && next == *"}" && this_context & LC_TEMPLATE)
        else if (this == next && next == '}' && this_context & LC_TEMPLATE)
            return Tokenizer_handle_template_end(self);
        else if (this == *"|" && this_context & LC_ARGUMENT_NAME) {
        else if (this == '|' && this_context & LC_ARGUMENT_NAME) {
            if (Tokenizer_handle_argument_separator(self))
                return NULL;
        }
        else if (this == next && next == *"}" && this_context & LC_ARGUMENT) {
            if (Tokenizer_READ(self, 2) == *"}") {
        else if (this == next && next == '}' && this_context & LC_ARGUMENT) {
            if (Tokenizer_READ(self, 2) == '}') {
                return Tokenizer_handle_argument_end(self);
            }
            if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == next && next == *"[" && Tokenizer_CAN_RECURSE(self)) {
            if (!(this_context & AGG_INVALID_LINK)) {
        else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) {
            if (!(this_context & AGG_NO_WIKILINKS)) {
                if (Tokenizer_parse_wikilink(self))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
        else if (this == '|' && this_context & LC_WIKILINK_TITLE) {
            if (Tokenizer_handle_wikilink_separator(self))
                return NULL;
        }
        else if (this == next && next == *"]" && this_context & LC_WIKILINK)
        else if (this == next && next == ']' && this_context & LC_WIKILINK)
            return Tokenizer_handle_wikilink_end(self);
        else if (this == *"[") {
        else if (this == '[') {
            if (Tokenizer_parse_external_link(self, 1))
                return NULL;
        }
        else if (this == *":" && !is_marker(last)) {
        else if (this == ':' && !is_marker(last)) {
            if (Tokenizer_parse_external_link(self, 0))
                return NULL;
        }
        else if (this == *"]" && this_context & LC_EXT_LINK_TITLE)
        else if (this == ']' && this_context & LC_EXT_LINK_TITLE)
            return Tokenizer_pop(self);
        else if (this == *"=" && !(self->global & GL_HEADING)) {
            if (last == *"\n" || last == *"") {
        else if (this == '=' && !(self->global & GL_HEADING)) {
            if (!last || last == '\n') {
                if (Tokenizer_parse_heading(self))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == *"=" && this_context & LC_HEADING)
        else if (this == '=' && this_context & LC_HEADING)
            return (PyObject*) Tokenizer_handle_heading_end(self);
        else if (this == *"\n" && this_context & LC_HEADING)
        else if (this == '\n' && this_context & LC_HEADING)
            return Tokenizer_fail_route(self);
        else if (this == *"&") {
        else if (this == '&') {
            if (Tokenizer_parse_entity(self))
                return NULL;
        }
        else if (this == *"<" && next == *"!") {
        else if (this == '<' && next == '!') {
            next_next = Tokenizer_READ(self, 2);
            if (next_next == Tokenizer_READ(self, 3) && next_next == *"-") {
            if (next_next == Tokenizer_READ(self, 3) && next_next == '-') {
                if (Tokenizer_parse_comment(self))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == *"<" && next == *"/" &&
                                            Tokenizer_READ(self, 2) != *"") {
        else if (this == '<' && next == '/' && Tokenizer_READ(self, 2)) {
            if (this_context & LC_TAG_BODY ?
                Tokenizer_handle_tag_open_close(self) :
                Tokenizer_handle_invalid_tag_start(self))
                return NULL;
        }
        else if (this == *"<" && !(this_context & LC_TAG_CLOSE)) {
        else if (this == '<' && !(this_context & LC_TAG_CLOSE)) {
            if (Tokenizer_CAN_RECURSE(self)) {
                if (Tokenizer_parse_tag(self))
                    return NULL;
@@ -2641,19 +2637,19 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == *">" && this_context & LC_TAG_CLOSE)
        else if (this == '>' && this_context & LC_TAG_CLOSE)
            return Tokenizer_handle_tag_close_close(self);
        else if (this == next && next == *"'") {
        else if (this == next && next == '\'' && !self->skip_style_tags) {
            temp = Tokenizer_parse_style(self);
            if (temp != Py_None)
                return temp;
        }
        else if (last == *"\n" || last == *"") {
            if (this == *"#" || this == *"*" || this == *";" || this == *":") {
        else if (!last || last == '\n') {
            if (this == '#' || this == '*' || this == ';' || this == ':') {
                if (Tokenizer_handle_list(self))
                    return NULL;
            }
            else if (this == *"-" && this == next &&
            else if (this == '-' && this == next &&
                     this == Tokenizer_READ(self, 2) &&
                     this == Tokenizer_READ(self, 3)) {
                if (Tokenizer_handle_hr(self))
@@ -2662,7 +2658,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if ((this == *"\n" || this == *":") && this_context & LC_DLTERM) {
        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
            if (Tokenizer_handle_dl_term(self))
                return NULL;
        }
@@ -2678,9 +2674,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
    PyObject *text, *temp;
    int context = 0;
    int context = 0, skip_style_tags = 0;

    if (PyArg_ParseTuple(args, "U|i", &text, &context)) {
    if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) {
        Py_XDECREF(self->text);
        self->text = PySequence_Fast(text, "expected a sequence");
    }
@@ -2689,7 +2685,8 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
        Py_ssize_t size;
        /* Failed to parse a Unicode object; try a string instead. */
        PyErr_Clear();
        if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context))
        if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context,
                              &skip_style_tags))
            return NULL;
        temp = PyUnicode_FromStringAndSize(encoded, size);
        if (!text)
@@ -2701,6 +2698,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
    }
    self->head = self->global = self->depth = self->cycles = 0;
    self->length = PyList_GET_SIZE(self->text);
    self->skip_style_tags = skip_style_tags;
    return Tokenizer_parse(self, context, 1);
 }

--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -1,6 +1,6 @@
 /*
 Tokenizer Header File for MWParserFromHell
 Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -41,9 +41,9 @@ SOFTWARE.
 #define HEXDIGITS "0123456789abcdefABCDEF"
 #define ALPHANUM  "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

 static const char* MARKERS[] = {
    "{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", ":", "/",
    "-", "\n", ""};
 static const char MARKERS[] = {
    '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/',
    '-', '\n', '\0'};

 #define NUM_MARKERS 18
 #define TEXTBUFFER_BLOCKSIZE 1024
@@ -121,40 +121,39 @@ static PyObject* TagCloseClose;
 #define LC_WIKILINK_TITLE       0x00000020
 #define LC_WIKILINK_TEXT        0x00000040

 #define LC_EXT_LINK             0x00000380
 #define LC_EXT_LINK             0x00000180
 #define LC_EXT_LINK_URI         0x00000080
 #define LC_EXT_LINK_TITLE       0x00000100
 #define LC_EXT_LINK_BRACKETS    0x00000200

 #define LC_HEADING              0x0000FC00
 #define LC_HEADING_LEVEL_1      0x00000400
 #define LC_HEADING_LEVEL_2      0x00000800
 #define LC_HEADING_LEVEL_3      0x00001000
 #define LC_HEADING_LEVEL_4      0x00002000
 #define LC_HEADING_LEVEL_5      0x00004000
 #define LC_HEADING_LEVEL_6      0x00008000

 #define LC_TAG                  0x000F0000
 #define LC_TAG_OPEN             0x00010000
 #define LC_TAG_ATTR             0x00020000
 #define LC_TAG_BODY             0x00040000
 #define LC_TAG_CLOSE            0x00080000

 #define LC_STYLE                0x00F00000
 #define LC_STYLE_ITALICS        0x00100000
 #define LC_STYLE_BOLD           0x00200000
 #define LC_STYLE_PASS_AGAIN     0x00400000
 #define LC_STYLE_SECOND_PASS    0x00800000

 #define LC_DLTERM               0x01000000

 #define LC_SAFETY_CHECK         0x7E000000
 #define LC_HAS_TEXT             0x02000000
 #define LC_FAIL_ON_TEXT         0x04000000
 #define LC_FAIL_NEXT            0x08000000
 #define LC_FAIL_ON_LBRACE       0x10000000
 #define LC_FAIL_ON_RBRACE       0x20000000
 #define LC_FAIL_ON_EQUALS       0x40000000

 #define LC_HEADING              0x00007E00
 #define LC_HEADING_LEVEL_1      0x00000200
 #define LC_HEADING_LEVEL_2      0x00000400
 #define LC_HEADING_LEVEL_3      0x00000800
 #define LC_HEADING_LEVEL_4      0x00001000
 #define LC_HEADING_LEVEL_5      0x00002000
 #define LC_HEADING_LEVEL_6      0x00004000

 #define LC_TAG                  0x00078000
 #define LC_TAG_OPEN             0x00008000
 #define LC_TAG_ATTR             0x00010000
 #define LC_TAG_BODY             0x00020000
 #define LC_TAG_CLOSE            0x00040000

 #define LC_STYLE                0x00780000
 #define LC_STYLE_ITALICS        0x00080000
 #define LC_STYLE_BOLD           0x00100000
 #define LC_STYLE_PASS_AGAIN     0x00200000
 #define LC_STYLE_SECOND_PASS    0x00400000

 #define LC_DLTERM               0x00800000

 #define LC_SAFETY_CHECK         0x3F000000
 #define LC_HAS_TEXT             0x01000000
 #define LC_FAIL_ON_TEXT         0x02000000
 #define LC_FAIL_NEXT            0x04000000
 #define LC_FAIL_ON_LBRACE       0x08000000
 #define LC_FAIL_ON_RBRACE       0x10000000
 #define LC_FAIL_ON_EQUALS       0x20000000

 /* Global contexts: */

@@ -163,9 +162,10 @@ static PyObject* TagCloseClose;
 /* Aggregate contexts: */

 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK)
 #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
 #define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK)

 /* Tag contexts: */

@@ -223,6 +223,7 @@ typedef struct {
    int global;             /* global context */
    int depth;              /* stack recursion depth */
    int cycles;             /* total number of stack recursions */
    int skip_style_tags;    /* temporary fix for the sometimes broken tag parser */
 } Tokenizer;


@@ -241,7 +242,7 @@ typedef struct {

 /* Macros for accessing definitions: */

 #define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li")
 #define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li")
 #define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL))
 #define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL))
 #define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL))
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -25,7 +25,7 @@ from math import log
 import re

 from . import contexts, tokens
 from ..compat import htmlentities
 from ..compat import htmlentities, range
 from ..definitions import (get_html_tag, is_parsable, is_single,
                           is_single_only, is_scheme)

@@ -467,7 +467,7 @@ class Tokenizer(object):
        reset = self._head
        self._head += 1
        try:
            bad_context = self._context & contexts.INVALID_LINK
            bad_context = self._context & contexts.NO_EXT_LINKS
            if bad_context or not self._can_recurse():
                raise BadRoute()
            link, extra, delta = self._really_parse_external_link(brackets)
@@ -620,7 +620,8 @@ class Tokenizer(object):
        self._emit_first(tokens.TagAttrStart(pad_first=buf["first"],
            pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"]))
        self._emit_all(self._pop())
        data.padding_buffer = {key: "" for key in data.padding_buffer}
        for key in data.padding_buffer:
            data.padding_buffer[key] = ""

    def _handle_tag_space(self, data, text):
        """Handle whitespace (*text*) inside of an HTML open tag."""
@@ -989,10 +990,8 @@ class Tokenizer(object):
        context = self._context
        if context & contexts.FAIL_NEXT:
            return False
        if context & contexts.WIKILINK:
            if context & contexts.WIKILINK_TEXT:
                return not (this == self._read(1) == "[")
            elif this == "]" or this == "{":
        if context & contexts.WIKILINK_TITLE:
            if this == "]" or this == "{":
                self._context |= contexts.FAIL_NEXT
            elif this == "\n" or this == "[" or this == "}":
                return False
@@ -1082,7 +1081,7 @@ class Tokenizer(object):
                else:
                    self._emit_text("}")
            elif this == next == "[" and self._can_recurse():
                if not self._context & contexts.INVALID_LINK:
                if not self._context & contexts.NO_WIKILINKS:
                    self._parse_wikilink()
                else:
                    self._emit_text("[")
@@ -1124,7 +1123,7 @@ class Tokenizer(object):
                    self._emit_text("<")
            elif this == ">" and self._context & contexts.TAG_CLOSE:
                return self._handle_tag_close_close()
            elif this == next == "'":
            elif this == next == "'" and not self._skip_style_tags:
                result = self._parse_style()
                if result is not None:
                    return result
@@ -1141,8 +1140,9 @@ class Tokenizer(object):
                self._emit_text(this)
            self._head += 1

    def tokenize(self, text, context=0):
    def tokenize(self, text, context=0, skip_style_tags=False):
        """Build a list of tokens from a string of wikicode and return it."""
        self._skip_style_tags = skip_style_tags
        split = self.regex.split(text)
        self._text = [segment for segment in split if segment]
        self._head = self._global = self._depth = self._cycles = 0
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -34,15 +34,12 @@ from ..compat import py3k, str

 __all__ = ["Token"]

 class Token(object):
 class Token (dict):
    """A token stores the semantic meaning of a unit of wikicode."""

    def __init__(self, **kwargs):
        super(Token, self).__setattr__("_kwargs", kwargs)

    def __repr__(self):
        args = []
        for key, value in self._kwargs.items():
        for key, value in self.items():
            if isinstance(value, str) and len(value) > 100:
                args.append(key + "=" + repr(value[:97] + "..."))
            else:
@@ -50,18 +47,19 @@ class Token(object):
        return "{0}({1})".format(type(self).__name__, ", ".join(args))

    def __eq__(self, other):
        if isinstance(other, type(self)):
            return self._kwargs == other._kwargs
        return False
        return isinstance(other, type(self)) and dict.__eq__(self, other)

    def __ne__(self, other):
        return not self.__eq__(other)

    def __getattr__(self, key):
        return self._kwargs.get(key)
        return self.get(key)

    def __setattr__(self, key, value):
        self._kwargs[key] = value
        self[key] = value

    def __delattr__(self, key):
        del self._kwargs[key]
        del self[key]


 def make(name):
--- a/mwparserfromhell/smart_list.py
+++ b/mwparserfromhell/smart_list.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -79,6 +79,11 @@ class SmartList(_SliceNormalizerMixIn, list):
        [2, 3, 4]
        >>> parent
        [0, 1, 2, 3, 4]

    The parent needs to keep a list of its children in order to update them,
    which prevents them from being garbage-collected. If you are keeping the
    parent around for a while but creating many children, it is advisable to
    call :py:meth:`~._ListProxy.destroy` when you're finished with them.
    """

    def __init__(self, iterable=None):
@@ -146,6 +151,11 @@ class SmartList(_SliceNormalizerMixIn, list):
        self.extend(other)
        return self

    def _release_children(self):
        copy = list(self)
        for child in self._children:
            child._parent = copy

    @inheritdoc
    def append(self, item):
        head = len(self)
@@ -174,17 +184,13 @@ class SmartList(_SliceNormalizerMixIn, list):

    @inheritdoc
    def reverse(self):
        copy = list(self)
        for child in self._children:
            child._parent = copy
        self._release_children()
        super(SmartList, self).reverse()

    if py3k:
        @inheritdoc
        def sort(self, key=None, reverse=None):
            copy = list(self)
            for child in self._children:
                child._parent = copy
            self._release_children()
            kwargs = {}
            if key is not None:
                kwargs["key"] = key
@@ -194,9 +200,7 @@ class SmartList(_SliceNormalizerMixIn, list):
    else:
        @inheritdoc
        def sort(self, cmp=None, key=None, reverse=None):
            copy = list(self)
            for child in self._children:
                child._parent = copy
            self._release_children()
            kwargs = {}
            if cmp is not None:
                kwargs["cmp"] = cmp
@@ -448,5 +452,9 @@ class _ListProxy(_SliceNormalizerMixIn, list):
            item.sort(**kwargs)
            self._parent[self._start:self._stop:self._step] = item

    def destroy(self):
        """Make the parent forget this child. The child will no longer work."""
        self._parent._children.pop(id(self))


 del inheritdoc
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -26,8 +26,9 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner.
 """

 from __future__ import unicode_literals
 from sys import getdefaultencoding

 from .compat import py3k, py32, str
 from .compat import bytes, py26, py3k, str

 __all__ = ["StringMixIn"]

@@ -55,10 +56,10 @@ class StringMixIn(object):
            return self.__unicode__()

        def __bytes__(self):
            return self.__unicode__().encode("utf8")
            return bytes(self.__unicode__(), getdefaultencoding())
    else:
        def __str__(self):
            return self.__unicode__().encode("utf8")
            return bytes(self.__unicode__())

    def __unicode__(self):
        raise NotImplementedError()
@@ -67,33 +68,21 @@ class StringMixIn(object):
        return repr(self.__unicode__())

    def __lt__(self, other):
        if isinstance(other, StringMixIn):
            return self.__unicode__() < other.__unicode__()
        return self.__unicode__() < other

    def __le__(self, other):
        if isinstance(other, StringMixIn):
            return self.__unicode__() <= other.__unicode__()
        return self.__unicode__() <= other

    def __eq__(self, other):
        if isinstance(other, StringMixIn):
            return self.__unicode__() == other.__unicode__()
        return self.__unicode__() == other

    def __ne__(self, other):
        if isinstance(other, StringMixIn):
            return self.__unicode__() != other.__unicode__()
        return self.__unicode__() != other

    def __gt__(self, other):
        if isinstance(other, StringMixIn):
            return self.__unicode__() > other.__unicode__()
        return self.__unicode__() > other

    def __ge__(self, other):
        if isinstance(other, StringMixIn):
            return self.__unicode__() >= other.__unicode__()
        return self.__unicode__() >= other

    if py3k:
@@ -117,250 +106,22 @@ class StringMixIn(object):
        return reversed(self.__unicode__())

    def __contains__(self, item):
        if isinstance(item, StringMixIn):
            return str(item) in self.__unicode__()
        return item in self.__unicode__()
        return str(item) in self.__unicode__()

    @inheritdoc
    def capitalize(self):
        return self.__unicode__().capitalize()

    if py3k and not py32:
        @inheritdoc
        def casefold(self):
            return self.__unicode__().casefold()

    @inheritdoc
    def center(self, width, fillchar=None):
        if fillchar is None:
            return self.__unicode__().center(width)
        return self.__unicode__().center(width, fillchar)

    @inheritdoc
    def count(self, sub, start=None, end=None):
        return self.__unicode__().count(sub, start, end)

    if not py3k:
        @inheritdoc
        def decode(self, encoding=None, errors=None):
            kwargs = {}
            if encoding is not None:
                kwargs["encoding"] = encoding
            if errors is not None:
                kwargs["errors"] = errors
            return self.__unicode__().decode(**kwargs)

    @inheritdoc
    def encode(self, encoding=None, errors=None):
        kwargs = {}
        if encoding is not None:
            kwargs["encoding"] = encoding
        if errors is not None:
            kwargs["errors"] = errors
        return self.__unicode__().encode(**kwargs)

    @inheritdoc
    def endswith(self, prefix, start=None, end=None):
        return self.__unicode__().endswith(prefix, start, end)

    @inheritdoc
    def expandtabs(self, tabsize=None):
        if tabsize is None:
            return self.__unicode__().expandtabs()
        return self.__unicode__().expandtabs(tabsize)

    @inheritdoc
    def find(self, sub, start=None, end=None):
        return self.__unicode__().find(sub, start, end)

    @inheritdoc
    def format(self, *args, **kwargs):
        return self.__unicode__().format(*args, **kwargs)

    if py3k:
        @inheritdoc
        def format_map(self, mapping):
            return self.__unicode__().format_map(mapping)

    @inheritdoc
    def index(self, sub, start=None, end=None):
        return self.__unicode__().index(sub, start, end)

    @inheritdoc
    def isalnum(self):
        return self.__unicode__().isalnum()

    @inheritdoc
    def isalpha(self):
        return self.__unicode__().isalpha()

    @inheritdoc
    def isdecimal(self):
        return self.__unicode__().isdecimal()

    @inheritdoc
    def isdigit(self):
        return self.__unicode__().isdigit()

    if py3k:
        @inheritdoc
        def isidentifier(self):
            return self.__unicode__().isidentifier()

    @inheritdoc
    def islower(self):
        return self.__unicode__().islower()

    @inheritdoc
    def isnumeric(self):
        return self.__unicode__().isnumeric()

    if py3k:
        @inheritdoc
        def isprintable(self):
            return self.__unicode__().isprintable()

    @inheritdoc
    def isspace(self):
        return self.__unicode__().isspace()

    @inheritdoc
    def istitle(self):
        return self.__unicode__().istitle()

    @inheritdoc
    def isupper(self):
        return self.__unicode__().isupper()

    @inheritdoc
    def join(self, iterable):
        return self.__unicode__().join(iterable)

    @inheritdoc
    def ljust(self, width, fillchar=None):
        if fillchar is None:
            return self.__unicode__().ljust(width)
        return self.__unicode__().ljust(width, fillchar)

    @inheritdoc
    def lower(self):
        return self.__unicode__().lower()

    @inheritdoc
    def lstrip(self, chars=None):
        return self.__unicode__().lstrip(chars)
    def __getattr__(self, attr):
        return getattr(self.__unicode__(), attr)

    if py3k:
        @staticmethod
        @inheritdoc
        def maketrans(x, y=None, z=None):
            if z is None:
                if y is None:
                    return str.maketrans(x)
                return str.maketrans(x, y)
            return str.maketrans(x, y, z)

    @inheritdoc
    def partition(self, sep):
        return self.__unicode__().partition(sep)
        maketrans = str.maketrans  # Static method can't rely on __getattr__

    @inheritdoc
    def replace(self, old, new, count=None):
        if count is None:
            return self.__unicode__().replace(old, new)
        return self.__unicode__().replace(old, new, count)

    @inheritdoc
    def rfind(self, sub, start=None, end=None):
        return self.__unicode__().rfind(sub, start, end)

    @inheritdoc
    def rindex(self, sub, start=None, end=None):
        return self.__unicode__().rindex(sub, start, end)

    @inheritdoc
    def rjust(self, width, fillchar=None):
        if fillchar is None:
            return self.__unicode__().rjust(width)
        return self.__unicode__().rjust(width, fillchar)

    @inheritdoc
    def rpartition(self, sep):
        return self.__unicode__().rpartition(sep)

    if py3k and not py32:
        @inheritdoc
        def rsplit(self, sep=None, maxsplit=None):
            kwargs = {}
            if sep is not None:
                kwargs["sep"] = sep
            if maxsplit is not None:
                kwargs["maxsplit"] = maxsplit
            return self.__unicode__().rsplit(**kwargs)
    else:
    if py26:
        @inheritdoc
        def rsplit(self, sep=None, maxsplit=None):
            if maxsplit is None:
                if sep is None:
                    return self.__unicode__().rsplit()
                return self.__unicode__().rsplit(sep)
            return self.__unicode__().rsplit(sep, maxsplit)

    @inheritdoc
    def rstrip(self, chars=None):
        return self.__unicode__().rstrip(chars)

    if py3k and not py32:
        @inheritdoc
        def split(self, sep=None, maxsplit=None):
            kwargs = {}
            if sep is not None:
                kwargs["sep"] = sep
            if maxsplit is not None:
                kwargs["maxsplit"] = maxsplit
            return self.__unicode__().split(**kwargs)
    else:
        @inheritdoc
        def split(self, sep=None, maxsplit=None):
            if maxsplit is None:
                if sep is None:
                    return self.__unicode__().split()
                return self.__unicode__().split(sep)
            return self.__unicode__().split(sep, maxsplit)

    @inheritdoc
    def splitlines(self, keepends=None):
        if keepends is None:
            return self.__unicode__().splitlines()
        return self.__unicode__().splitlines(keepends)

    @inheritdoc
    def startswith(self, prefix, start=None, end=None):
        return self.__unicode__().startswith(prefix, start, end)

    @inheritdoc
    def strip(self, chars=None):
        return self.__unicode__().strip(chars)

    @inheritdoc
    def swapcase(self):
        return self.__unicode__().swapcase()

    @inheritdoc
    def title(self):
        return self.__unicode__().title()

    @inheritdoc
    def translate(self, table):
        return self.__unicode__().translate(table)

    @inheritdoc
    def upper(self):
        return self.__unicode__().upper()

    @inheritdoc
    def zfill(self, width):
        return self.__unicode__().zfill(width)
        def encode(self, encoding=None, errors=None):
            if encoding is None:
                encoding = getdefaultencoding()
            if errors is not None:
                return self.__unicode__().encode(encoding, errors)
            return self.__unicode__().encode(encoding)


 del inheritdoc
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,8 +21,8 @@
 # SOFTWARE.

 """
 This module contains accessory functions that wrap around existing ones to
 provide additional functionality.
 This module contains accessory functions for other parts of the library. Parser
 users generally won't need stuff from here.
 """

 from __future__ import unicode_literals
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,9 +21,10 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 from itertools import chain
 import re

 from .compat import maxsize, py3k, str
 from .compat import py3k, range, str
 from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity,
                    Node, Tag, Template, Text, Wikilink)
 from .string_mixin import StringMixIn
@@ -51,96 +52,130 @@ class Wikicode(StringMixIn):
    def __unicode__(self):
        return "".join([str(node) for node in self.nodes])

    def _get_children(self, node):
        """Iterate over all descendants of a given *node*, including itself.

        This is implemented by the ``__iternodes__()`` generator of ``Node``
        classes, which by default yields itself and nothing more.
        """
        for context, child in node.__iternodes__(self._get_all_nodes):
            yield child

    def _get_all_nodes(self, code):
        """Iterate over all of our descendant nodes.

        This is implemented by calling :py:meth:`_get_children` on every node
        in our node list (:py:attr:`self.nodes <nodes>`).
    @staticmethod
    def _get_children(node, contexts=False, parent=None):
        """Iterate over all child :py:class:`.Node`\ s of a given *node*."""
        yield (parent, node) if contexts else node
        for code in node.__children__():
            for child in code.nodes:
                for result in Wikicode._get_children(child, contexts, code):
                    yield result

    @staticmethod
    def _slice_replace(code, index, old, new):
        """Replace the string *old* with *new* across *index* in *code*."""
        nodes = [str(node) for node in code.get(index)]
        substring = "".join(nodes).replace(old, new)
        code.nodes[index] = parse_anything(substring).nodes

    @staticmethod
    def _build_matcher(matches, flags):
        """Helper for :py:meth:`_indexed_ifilter` and others.

        If *matches* is a function, return it. If it's a regex, return a
        wrapper around it that can be called with a node to do a search. If
        it's ``None``, return a function that always returns ``True``.
        """
        for node in code.nodes:
            for child in self._get_children(node):
                yield child

    def _is_equivalent(self, obj, node):
        """Return ``True`` if *obj* and *node* are equivalent, else ``False``.

        If *obj* is a ``Node``, the function will test whether they are the
        same object, otherwise it will compare them with ``==``.
        if matches:
            if callable(matches):
                return matches
            return lambda obj: re.search(matches, str(obj), flags)  # r
        return lambda obj: True

    def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS,
                         forcetype=None):
        """Iterate over nodes and their corresponding indices in the node list.

        The arguments are interpreted as for :py:meth:`ifilter`. For each tuple
        ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
        that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
        node itself, but will still contain it.
        """
        return (node is obj) if isinstance(obj, Node) else (node == obj)

    def _contains(self, nodes, obj):
        """Return ``True`` if *obj* is inside of *nodes*, else ``False``.

        If *obj* is a ``Node``, we will only return ``True`` if *obj* is
        actually in the list (and not just a node that equals it). Otherwise,
        the test is simply ``obj in nodes``.
        match = self._build_matcher(matches, flags)
        if recursive:
            def getter(i, node):
                for ch in self._get_children(node):
                    yield (i, ch)
            inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
        else:
            inodes = enumerate(self.nodes)
        for i, node in inodes:
            if (not forcetype or isinstance(node, forcetype)) and match(node):
                yield (i, node)

    def _do_strong_search(self, obj, recursive=True):
        """Search for the specific element *obj* within the node list.

        *obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode`
        object. If found, we return a tuple (*context*, *index*) where
        *context* is the :py:class:`.Wikicode` that contains *obj* and *index*
        is its index there, as a :py:class:`slice`. Note that if *recursive* is
        ``False``, *context* will always be ``self`` (since we only look for
        *obj* among immediate descendants), but if *recursive* is ``True``,
        then it could be any :py:class:`.Wikicode` contained by a node within
        ``self``. If *obj* is not found, :py:exc:`ValueError` is raised.
        """
        mkslice = lambda i: slice(i, i + 1)
        if isinstance(obj, Node):
            for node in nodes:
                if node is obj:
                    return True
            return False
        return obj in nodes

    def _do_search(self, obj, recursive, context=None, literal=None):
        """Return some info about the location of *obj* within *context*.

        If *recursive* is ``True``, we'll look within *context* (``self`` by
        default) and its descendants, otherwise just *context*. We raise
        :py:exc:`ValueError` if *obj* isn't found. The return data is a list of
        3-tuples (*type*, *context*, *data*) where *type* is *obj*\ 's best
        type resolution (either ``Node``, ``Wikicode``, or ``str``), *context*
        is the closest ``Wikicode`` encompassing it, and *data* is either a
        ``Node``, a list of ``Node``\ s, or ``None`` depending on *type*.
        """
        if not context:
            context = self
            literal = isinstance(obj, (Node, Wikicode))
            obj = parse_anything(obj)
            if not obj or obj not in self:
                raise ValueError(obj)
            if len(obj.nodes) == 1:
                obj = obj.get(0)
            if not recursive:
                return self, mkslice(self.index(obj))
            for i, node in enumerate(self.nodes):
                for context, child in self._get_children(node, contexts=True):
                    if obj is child:
                        if not context:
                            context = self
                        return context, mkslice(context.index(child))
        else:
            context, ind = self._do_strong_search(obj.get(0), recursive)
            for i in range(1, len(obj.nodes)):
                if obj.get(i) is not context.get(ind.start + i):
                    break
            else:
                return context, slice(ind.start, ind.start + len(obj.nodes))
        raise ValueError(obj)

        compare = lambda a, b: (a is b) if literal else (a == b)
        results = []
        i = 0
        while i < len(context.nodes):
            node = context.get(i)
            if isinstance(obj, Node) and compare(obj, node):
                results.append((Node, context, node))
            elif isinstance(obj, Wikicode) and compare(obj.get(0), node):
                for j in range(1, len(obj.nodes)):
                    if not compare(obj.get(j), context.get(i + j)):
                        break
                else:
                    nodes = list(context.nodes[i:i + len(obj.nodes)])
                    results.append((Wikicode, context, nodes))
                    i += len(obj.nodes) - 1
            elif recursive:
                contexts = node.__iternodes__(self._get_all_nodes)
                processed = []
                for code in (ctx for ctx, child in contexts):
                    if code and code not in processed and obj in code:
                        search = self._do_search(obj, recursive, code, literal)
                        results.extend(search)
                        processed.append(code)
            i += 1

        if not results and not literal and recursive:
            results.append((str, context, None))
        if not results and context is self:
    def _do_weak_search(self, obj, recursive):
        """Search for an element that looks like *obj* within the node list.

        This follows the same rules as :py:meth:`_do_strong_search` with some
        differences. *obj* is treated as a string that might represent any
        :py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two
        present in the node list. Thus, matching is weak (using string
        comparisons) rather than strong (using ``is``). Because multiple nodes
        can match *obj*, the result is a list of tuples instead of just one
        (however, :py:exc:`ValueError` is still raised if nothing is found).
        Individual matches will never overlap.

        The tuples contain a new first element, *exact*, which is ``True`` if
        we were able to match *obj* exactly to one or more adjacent nodes, or
        ``False`` if we found *obj* inside a node or incompletely spanning
        multiple nodes.
        """
        obj = parse_anything(obj)
        if not obj or obj not in self:
            raise ValueError(obj)
        results = []
        contexts = [self]
        while contexts:
            context = contexts.pop()
            i = len(context.nodes) - 1
            while i >= 0:
                node = context.get(i)
                if obj.get(-1) == node:
                    for j in range(-len(obj.nodes), -1):
                        if obj.get(j) != context.get(i + j + 1):
                            break
                    else:
                        i -= len(obj.nodes) - 1
                        index = slice(i, i + len(obj.nodes))
                        results.append((True, context, index))
                elif recursive and obj in node:
                    contexts.extend(node.__children__())
                i -= 1
        if not results:
            if not recursive:
                raise ValueError(obj)
            results.append((False, self, slice(0, len(self.nodes))))
        return results

    def _get_tree(self, code, lines, marker, indent):
@@ -245,14 +280,14 @@ class Wikicode(StringMixIn):
        return the index of our direct descendant node within *our* list of
        nodes. Otherwise, the lookup is done only on direct descendants.
        """
        if recursive:
            for i, node in enumerate(self.nodes):
                if self._contains(self._get_children(node), obj):
                    return i
            raise ValueError(obj)

        strict = isinstance(obj, Node)
        equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n)
        for i, node in enumerate(self.nodes):
            if self._is_equivalent(obj, node):
            if recursive:
                for child in self._get_children(node):
                    if equivalent(obj, child):
                        return i
            elif equivalent(obj, node):
                return i
        raise ValueError(obj)

@@ -268,66 +303,79 @@ class Wikicode(StringMixIn):
            self.nodes.insert(index, node)

    def insert_before(self, obj, value, recursive=True):
        """Insert *value* immediately before *obj* in the list of nodes.
        """Insert *value* immediately before *obj*.

        *obj* can be either a string, a :py:class:`~.Node`, or other
        *obj* can be either a string, a :py:class:`~.Node`, or another
        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
        for example). *value* can be anything parasable by
        :py:func:`.parse_anything`. If *recursive* is ``True``, we will try to
        find *obj* within our child nodes even if it is not a direct descendant
        of this :py:class:`~.Wikicode` object. If *obj* is not found,
        for example). If *obj* is a string, we will operate on all instances
        of that string within the code, otherwise only on the specific instance
        given. *value* can be anything parasable by :py:func:`.parse_anything`.
        If *recursive* is ``True``, we will try to find *obj* within our child
        nodes even if it is not a direct descendant of this
        :py:class:`~.Wikicode` object. If *obj* is not found,
        :py:exc:`ValueError` is raised.
        """
        for restype, context, data in self._do_search(obj, recursive):
            if restype in (Node, Wikicode):
                i = context.index(data if restype is Node else data[0], False)
                context.insert(i, value)
            else:
                obj = str(obj)
                context.nodes = str(context).replace(obj, str(value) + obj)
        if isinstance(obj, (Node, Wikicode)):
            context, index = self._do_strong_search(obj, recursive)
            context.insert(index.start, value)
        else:
            for exact, context, index in self._do_weak_search(obj, recursive):
                if exact:
                    context.insert(index.start, value)
                else:
                    obj = str(obj)
                    self._slice_replace(context, index, obj, str(value) + obj)

    def insert_after(self, obj, value, recursive=True):
        """Insert *value* immediately after *obj* in the list of nodes.
        """Insert *value* immediately after *obj*.

        *obj* can be either a string, a :py:class:`~.Node`, or other
        *obj* can be either a string, a :py:class:`~.Node`, or another
        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
        for example). *value* can be anything parasable by
        :py:func:`.parse_anything`. If *recursive* is ``True``, we will try to
        find *obj* within our child nodes even if it is not a direct descendant
        of this :py:class:`~.Wikicode` object. If *obj* is not found,
        for example). If *obj* is a string, we will operate on all instances
        of that string within the code, otherwise only on the specific instance
        given. *value* can be anything parasable by :py:func:`.parse_anything`.
        If *recursive* is ``True``, we will try to find *obj* within our child
        nodes even if it is not a direct descendant of this
        :py:class:`~.Wikicode` object. If *obj* is not found,
        :py:exc:`ValueError` is raised.
        """
        for restype, context, data in self._do_search(obj, recursive):
            if restype in (Node, Wikicode):
                i = context.index(data if restype is Node else data[-1], False)
                context.insert(i + 1, value)
            else:
                obj = str(obj)
                context.nodes = str(context).replace(obj, obj + str(value))
        if isinstance(obj, (Node, Wikicode)):
            context, index = self._do_strong_search(obj, recursive)
            context.insert(index.stop, value)
        else:
            for exact, context, index in self._do_weak_search(obj, recursive):
                if exact:
                    context.insert(index.stop, value)
                else:
                    obj = str(obj)
                    self._slice_replace(context, index, obj, obj + str(value))

    def replace(self, obj, value, recursive=True):
        """Replace *obj* with *value* in the list of nodes.
        """Replace *obj* with *value*.

        *obj* can be either a string, a :py:class:`~.Node`, or other
        *obj* can be either a string, a :py:class:`~.Node`, or another
        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
        for example). *value* can be anything parasable by
        :py:func:`.parse_anything`. If *recursive* is ``True``, we will try to
        find *obj* within our child nodes even if it is not a direct descendant
        of this :py:class:`~.Wikicode` object. If *obj* is not found,
        for example). If *obj* is a string, we will operate on all instances
        of that string within the code, otherwise only on the specific instance
        given. *value* can be anything parasable by :py:func:`.parse_anything`.
        If *recursive* is ``True``, we will try to find *obj* within our child
        nodes even if it is not a direct descendant of this
        :py:class:`~.Wikicode` object. If *obj* is not found,
        :py:exc:`ValueError` is raised.
        """
        for restype, context, data in self._do_search(obj, recursive):
            if restype is Node:
                i = context.index(data, False)
                context.nodes.pop(i)
                context.insert(i, value)
            elif restype is Wikicode:
                i = context.index(data[0], False)
                for _ in data:
                    context.nodes.pop(i)
                context.insert(i, value)
            else:
                context.nodes = str(context).replace(str(obj), str(value))
        if isinstance(obj, (Node, Wikicode)):
            context, index = self._do_strong_search(obj, recursive)
            for i in range(index.start, index.stop):
                context.nodes.pop(index.start)
            context.insert(index.start, value)
        else:
            for exact, context, index in self._do_weak_search(obj, recursive):
                if exact:
                    for i in range(index.start, index.stop):
                        context.nodes.pop(index.start)
                    context.insert(index.start, value)
                else:
                    self._slice_replace(context, index, str(obj), str(value))

    def append(self, value):
        """Insert *value* at the end of the list of nodes.
@@ -341,55 +389,65 @@ class Wikicode(StringMixIn):
    def remove(self, obj, recursive=True):
        """Remove *obj* from the list of nodes.

        *obj* can be either a string, a :py:class:`~.Node`, or other
        *obj* can be either a string, a :py:class:`~.Node`, or another
        :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
        for example). If *recursive* is ``True``, we will try to find *obj*
        within our child nodes even if it is not a direct descendant of this
        for example). If *obj* is a string, we will operate on all instances
        of that string within the code, otherwise only on the specific instance
        given. If *recursive* is ``True``, we will try to find *obj* within our
        child nodes even if it is not a direct descendant of this
        :py:class:`~.Wikicode` object. If *obj* is not found,
        :py:exc:`ValueError` is raised.
        """
        for restype, context, data in self._do_search(obj, recursive):
            if restype is Node:
                context.nodes.pop(context.index(data, False))
            elif restype is Wikicode:
                i = context.index(data[0], False)
                for _ in data:
                    context.nodes.pop(i)
            else:
                context.nodes = str(context).replace(str(obj), "")
        if isinstance(obj, (Node, Wikicode)):
            context, index = self._do_strong_search(obj, recursive)
            for i in range(index.start, index.stop):
                context.nodes.pop(index.start)
        else:
            for exact, context, index in self._do_weak_search(obj, recursive):
                if exact:
                    for i in range(index.start, index.stop):
                        context.nodes.pop(index.start)
                else:
                    self._slice_replace(context, index, str(obj), "")

    def matches(self, other):
        """Do a loose equivalency test suitable for comparing page names.

        *other* can be any string-like object, including
        :py:class:`~.Wikicode`. This operation is symmetric; both sides are
        adjusted. Specifically, whitespace and markup is stripped and the first
        letter's case is normalized. Typical usage is
        :py:class:`~.Wikicode`, or a tuple of these. This operation is
        symmetric; both sides are adjusted. Specifically, whitespace and markup
        is stripped and the first letter's case is normalized. Typical usage is
        ``if template.name.matches("stub"): ...``.
        """
        cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:]
                            if a and b else a == b)
        this = self.strip_code().strip()
        if isinstance(other, (tuple, list)):
            for obj in other:
                that = parse_anything(obj).strip_code().strip()
                if cmp(this, that):
                    return True
            return False
        that = parse_anything(other).strip_code().strip()
        if not this or not that:
            return this == that
        return this[0].upper() + this[1:] == that[0].upper() + that[1:]
        return cmp(this, that)

    def ifilter(self, recursive=True, matches=None, flags=FLAGS,
                forcetype=None):
        """Iterate over nodes in our list matching certain conditions.

        If *recursive* is ``True``, we will iterate over our children and all
        descendants of our children, otherwise just our immediate children. If
        *matches* is given, we will only yield the nodes that match the given
        regular expression (with :py:func:`re.search`). The default flags used
        are :py:const:`re.IGNORECASE`, :py:const:`re.DOTALL`, and
        :py:const:`re.UNICODE`, but custom flags can be specified by passing
        *flags*. If *forcetype* is given, only nodes that are instances of this
        type are yielded.
        of their descendants, otherwise just our immediate children. If
        *forcetype* is given, only nodes that are instances of this type are
        yielded. *matches* can be used to further restrict the nodes, either as
        a function (taking a single :py:class:`.Node` and returning a boolean)
        or a regular expression (matched against the node's string
        representation with :py:func:`re.search`). If *matches* is a regex, the
        flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`,
        :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can
        be specified by passing *flags*.
        """
        for node in (self._get_all_nodes(self) if recursive else self.nodes):
            if not forcetype or isinstance(node, forcetype):
                if not matches or re.search(matches, str(node), flags):
                    yield node
        return (node for i, node in
                self._indexed_ifilter(recursive, matches, flags, forcetype))

    def filter(self, recursive=True, matches=None, flags=FLAGS,
               forcetype=None):
@@ -399,7 +457,7 @@ class Wikicode(StringMixIn):
        """
        return list(self.ifilter(recursive, matches, flags, forcetype))

    def get_sections(self, levels=None, matches=None, flags=FLAGS,
    def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False,
                     include_lead=None, include_headings=True):
        """Return a list of sections within the page.

@@ -407,13 +465,13 @@ class Wikicode(StringMixIn):
        node list (implemented using :py:class:`~.SmartList`) so that changes
        to sections are reflected in the parent Wikicode object.

        Each section contains all of its subsections. If *levels* is given, it
        should be a iterable of integers; only sections whose heading levels
        are within it will be returned. If *matches* is given, it should be a
        regex to be matched against the titles of section headings; only
        sections whose headings match the regex will be included. *flags* can
        be used to override the default regex flags (see :py:meth:`ifilter`) if
        *matches* is used.
        Each section contains all of its subsections, unless *flat* is
        ``True``. If *levels* is given, it should be a iterable of integers;
        only sections whose heading levels are within it will be returned. If
        *matches* is given, it should be either a function or a regex; only
        sections whose headings match it (without the surrounding equal signs)
        will be included. *flags* can be used to override the default regex
        flags (see :py:meth:`ifilter`) if a regex *matches* is used.

        If *include_lead* is ``True``, the first, lead section (without a
        heading) will be included in the list; ``False`` will not include it;
@@ -422,47 +480,58 @@ class Wikicode(StringMixIn):
        :py:class:`~.Heading` object will be included; otherwise, this is
        skipped.
        """
        if matches:
            matches = r"^(=+?)\s*" + matches + r"\s*\1$"
        headings = self.filter_headings()
        filtered = self.filter_headings(matches=matches, flags=flags)
        if levels:
            filtered = [head for head in filtered if head.level in levels]

        if matches or include_lead is False or (not include_lead and levels):
            buffers = []
        else:
            buffers = [(maxsize, 0)]
        sections = []
        i = 0
        while i < len(self.nodes):
            if self.nodes[i] in headings:
                this = self.nodes[i].level
                for (level, start) in buffers:
                    if this <= level:
                        sections.append(Wikicode(self.nodes[start:i]))
                buffers = [buf for buf in buffers if buf[0] < this]
                if self.nodes[i] in filtered:
                    if not include_headings:
                        i += 1
                        if i >= len(self.nodes):
                            break
                    buffers.append((this, i))
            i += 1
        for (level, start) in buffers:
            if start != i:
                sections.append(Wikicode(self.nodes[start:i]))
        return sections
        title_matcher = self._build_matcher(matches, flags)
        matcher = lambda heading: (title_matcher(heading.title) and
                                   (not levels or heading.level in levels))
        iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading)
        sections = []  # Tuples of (index_of_first_node, section)
        open_headings = [] # Tuples of (index, heading), where index and
                           # heading.level are both monotonically increasing

        # Add the lead section if appropriate:
        if include_lead or not (include_lead is not None or matches or levels):
            itr = self._indexed_ifilter(recursive=False, forcetype=Heading)
            try:
                first = next(itr)[0]
                sections.append((0, Wikicode(self.nodes[:first])))
            except StopIteration:  # No headings in page
                sections.append((0, Wikicode(self.nodes[:])))

        # Iterate over headings, adding sections to the list as they end:
        for i, heading in iheadings:
            if flat:  # With flat, all sections close at the next heading
                newly_closed, open_headings = open_headings, []
            else:  # Otherwise, figure out which sections have closed, if any
                closed_start_index = len(open_headings)
                for j, (start, last_heading) in enumerate(open_headings):
                    if heading.level <= last_heading.level:
                        closed_start_index = j
                        break
                newly_closed = open_headings[closed_start_index:]
                del open_headings[closed_start_index:]
            for start, closed_heading in newly_closed:
                if matcher(closed_heading):
                    sections.append((start, Wikicode(self.nodes[start:i])))
            start = i if include_headings else (i + 1)
            open_headings.append((start, heading))

        # Add any remaining open headings to the list of sections:
        for start, heading in open_headings:
            if matcher(heading):
                sections.append((start, Wikicode(self.nodes[start:])))

        # Ensure that earlier sections are earlier in the returned list:
        return [section for i, section in sorted(sections)]

    def strip_code(self, normalize=True, collapse=True):
        """Return a rendered string without unprintable code such as templates.

        The way a node is stripped is handled by the
        :py:meth:`~.Node.__showtree__` method of :py:class:`~.Node` objects,
        which generally return a subset of their nodes or ``None``. For
        example, templates and tags are removed completely, links are stripped
        to just their display part, headings are stripped to just their title.
        If *normalize* is ``True``, various things may be done to strip code
        :py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which
        generally return a subset of their nodes or ``None``. For example,
        templates and tags are removed completely, links are stripped to just
        their display part, headings are stripped to just their title. If
        *normalize* is ``True``, various things may be done to strip code
        further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
        and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
        remove excess whitespace as well (three or more newlines are converted
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,10 +21,16 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import sys

 if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \
   (sys.version_info[1] == 3 and sys.version_info[1] < 2):
    raise Exception('mwparserfromhell needs Python 2.6+ or 3.2+')

 from setuptools import setup, find_packages, Extension

 from mwparserfromhell import __version__
 from mwparserfromhell.compat import py3k
 from mwparserfromhell.compat import py26, py3k

 with open("README.rst") as fp:
    long_docs = fp.read()
@@ -36,10 +42,11 @@ setup(
    name = "mwparserfromhell",
    packages = find_packages(exclude=("tests",)),
    ext_modules = [tokenizer],
    test_suite = "tests",
    tests_require = ["unittest2"] if py26 else [],
    test_suite = "tests.discover",
    version = __version__,
    author = "Ben Kurtovic",
    author_email = "ben.kurtovic@verizon.net",
    author_email = "ben.kurtovic@gmail.com",
    url = "https://github.com/earwig/mwparserfromhell",
    description = "MWParserFromHell is a parser for MediaWiki wikicode.",
    long_description = long_docs,
@@ -52,10 +59,12 @@ setup(
        "Intended Audience :: Developers",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
        "Programming Language :: Python :: 2.6",
        "Programming Language :: Python :: 2.7",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.2",
        "Programming Language :: Python :: 3.3",
        "Programming Language :: Python :: 3.4",
        "Topic :: Text Processing :: Markup"
    ],
 )
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,8 +21,13 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 from unittest import TestCase

 try:
    from unittest2 import TestCase
 except ImportError:
    from unittest import TestCase

 from mwparserfromhell.compat import range
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                    Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
@@ -32,15 +37,6 @@ from mwparserfromhell.wikicode import Wikicode
 wrap = lambda L: Wikicode(SmartList(L))
 wraptext = lambda *args: wrap([Text(t) for t in args])

 def getnodes(code):
    """Iterate over all child nodes of a given parent node.

    Imitates Wikicode._get_all_nodes().
    """
    for node in code.nodes:
        for context, child in node.__iternodes__(getnodes):
            yield child

 class TreeEqualityTestCase(TestCase):
    """A base test case with support for comparing the equality of node trees.

@@ -106,7 +102,7 @@ class TreeEqualityTestCase(TestCase):
            self.assertEqual(exp_attr.pad_first, act_attr.pad_first)
            self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq)
            self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq)
        self.assertIs(expected.wiki_markup, actual.wiki_markup)
        self.assertEqual(expected.wiki_markup, actual.wiki_markup)
        self.assertIs(expected.self_closing, actual.self_closing)
        self.assertIs(expected.invalid, actual.invalid)
        self.assertIs(expected.implicit, actual.implicit)
--- a/tests/compat.py
+++ b/tests/compat.py
@@ -9,12 +9,10 @@ the main library.
 from mwparserfromhell.compat import py3k

 if py3k:
    range = range
    from io import StringIO
    from urllib.parse import urlencode
    from urllib.request import urlopen

 else:
    range = xrange
    from StringIO import StringIO
    from urllib import urlencode, urlopen
--- a/tests/discover.py
+++ b/tests/discover.py
@@ -0,0 +1,24 @@
 # -*- coding: utf-8 -*-

 """
 Discover tests using ``unittest2` for Python 2.6.

 It appears the default distutils test suite doesn't play nice with
 ``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load
 tests seems to work around that issue.

 http://stackoverflow.com/a/17004409/753501
 """

 import os.path

 from mwparserfromhell.compat import py26

 if py26:
    import unittest2 as unittest
 else:
    import unittest

 def additional_tests():
    project_root = os.path.split(os.path.dirname(__file__))[0]
    return unittest.defaultTestLoader.discover(project_root)
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,16 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Argument, Text

 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

 class TestArgument(TreeEqualityTestCase):
    """Test cases for the Argument node."""
@@ -38,20 +42,15 @@ class TestArgument(TreeEqualityTestCase):
        node2 = Argument(wraptext("foo"), wraptext("bar"))
        self.assertEqual("{{{foo|bar}}}", str(node2))

    def test_iternodes(self):
        """test Argument.__iternodes__()"""
        node1n1 = Text("foobar")
        node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz")
        node1 = Argument(wrap([node1n1]))
        node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3]))
        gen1 = node1.__iternodes__(getnodes)
        gen2 = node2.__iternodes__(getnodes)
        self.assertEqual((None, node1), next(gen1))
        self.assertEqual((None, node2), next(gen2))
        self.assertEqual((node1.name, node1n1), next(gen1))
        self.assertEqual((node2.name, node2n1), next(gen2))
        self.assertEqual((node2.default, node2n2), next(gen2))
        self.assertEqual((node2.default, node2n3), next(gen2))
    def test_children(self):
        """test Argument.__children__()"""
        node1 = Argument(wraptext("foobar"))
        node2 = Argument(wraptext("foo"), wrap([Text("bar"), Text("baz")]))
        gen1 = node1.__children__()
        gen2 = node2.__children__()
        self.assertIs(node1.name, next(gen1))
        self.assertIs(node2.name, next(gen2))
        self.assertIs(node2.default, next(gen2))
        self.assertRaises(StopIteration, next, gen1)
        self.assertRaises(StopIteration, next, gen2)

--- a/tests/test_attribute.py
+++ b/tests/test_attribute.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Template
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading,
                                    HTMLEntity, Tag, Template, Text, Wikilink)
--- a/tests/test_comment.py
+++ b/tests/test_comment.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Comment
@@ -36,11 +40,10 @@ class TestComment(TreeEqualityTestCase):
        node = Comment("foobar")
        self.assertEqual("<!--foobar-->", str(node))

    def test_iternodes(self):
        """test Comment.__iternodes__()"""
    def test_children(self):
        """test Comment.__children__()"""
        node = Comment("foobar")
        gen = node.__iternodes__(None)
        self.assertEqual((None, node), next(gen))
        gen = node.__children__()
        self.assertRaises(StopIteration, next, gen)

    def test_strip(self):
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 try:
    from mwparserfromhell.parser._tokenizer import CTokenizer
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -22,7 +22,11 @@

 from __future__ import print_function, unicode_literals
 import json
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 import mwparserfromhell
 from mwparserfromhell.compat import py3k, str
--- a/tests/test_external_link.py
+++ b/tests/test_external_link.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,16 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import ExternalLink, Text

 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

 class TestExternalLink(TreeEqualityTestCase):
    """Test cases for the ExternalLink node."""
@@ -43,21 +47,16 @@ class TestExternalLink(TreeEqualityTestCase):
                             wraptext("Example Web Page"))
        self.assertEqual("[http://example.com/ Example Web Page]", str(node4))

    def test_iternodes(self):
        """test ExternalLink.__iternodes__()"""
        node1n1 = Text("http://example.com/")
        node2n1 = Text("http://example.com/")
        node2n2, node2n3 = Text("Example"), Text("Page")
        node1 = ExternalLink(wrap([node1n1]), brackets=False)
        node2 = ExternalLink(wrap([node2n1]), wrap([node2n2, node2n3]))
        gen1 = node1.__iternodes__(getnodes)
        gen2 = node2.__iternodes__(getnodes)
        self.assertEqual((None, node1), next(gen1))
        self.assertEqual((None, node2), next(gen2))
        self.assertEqual((node1.url, node1n1), next(gen1))
        self.assertEqual((node2.url, node2n1), next(gen2))
        self.assertEqual((node2.title, node2n2), next(gen2))
        self.assertEqual((node2.title, node2n3), next(gen2))
    def test_children(self):
        """test ExternalLink.__children__()"""
        node1 = ExternalLink(wraptext("http://example.com/"), brackets=False)
        node2 = ExternalLink(wraptext("http://example.com/"),
                             wrap([Text("Example"), Text("Page")]))
        gen1 = node1.__children__()
        gen2 = node2.__children__()
        self.assertEqual(node1.url, next(gen1))
        self.assertEqual(node2.url, next(gen2))
        self.assertEqual(node2.title, next(gen2))
        self.assertRaises(StopIteration, next, gen1)
        self.assertRaises(StopIteration, next, gen2)

--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,16 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Heading, Text

 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

 class TestHeading(TreeEqualityTestCase):
    """Test cases for the Heading node."""
@@ -38,14 +42,11 @@ class TestHeading(TreeEqualityTestCase):
        node2 = Heading(wraptext(" zzz "), 5)
        self.assertEqual("===== zzz =====", str(node2))

    def test_iternodes(self):
        """test Heading.__iternodes__()"""
        text1, text2 = Text("foo"), Text("bar")
        node = Heading(wrap([text1, text2]), 3)
        gen = node.__iternodes__(getnodes)
        self.assertEqual((None, node), next(gen))
        self.assertEqual((node.title, text1), next(gen))
        self.assertEqual((node.title, text2), next(gen))
    def test_children(self):
        """test Heading.__children__()"""
        node = Heading(wrap([Text("foo"), Text("bar")]), 3)
        gen = node.__children__()
        self.assertEqual(node.title, next(gen))
        self.assertRaises(StopIteration, next, gen)

    def test_strip(self):
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity
@@ -42,11 +46,10 @@ class TestHTMLEntity(TreeEqualityTestCase):
        self.assertEqual("&#x6b;", str(node3))
        self.assertEqual("&#X6C;", str(node4))

    def test_iternodes(self):
        """test HTMLEntity.__iternodes__()"""
    def test_children(self):
        """test HTMLEntity.__children__()"""
        node = HTMLEntity("nbsp", named=True, hexadecimal=False)
        gen = node.__iternodes__(None)
        self.assertEqual((None, node), next(gen))
        gen = node.__children__()
        self.assertRaises(StopIteration, next, gen)

    def test_strip(self):
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,24 +21,30 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell import parser
 from mwparserfromhell.nodes import Template, Text, Wikilink
 from mwparserfromhell.compat import range
 from mwparserfromhell.nodes import Tag, Template, Text, Wikilink
 from mwparserfromhell.nodes.extras import Parameter

 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
 from .compat import range

 class TestParser(TreeEqualityTestCase):
    """Tests for the Parser class itself, which tokenizes and builds nodes."""

    def test_use_c(self):
        """make sure the correct tokenizer is used"""
        restore = parser.use_c
        if parser.use_c:
            self.assertTrue(parser.Parser()._tokenizer.USES_C)
            parser.use_c = False
        self.assertFalse(parser.Parser()._tokenizer.USES_C)
        parser.use_c = restore

    def test_parsing(self):
        """integration test for parsing overall"""
@@ -62,5 +68,26 @@ class TestParser(TreeEqualityTestCase):
        actual = parser.Parser().parse(text)
        self.assertWikicodeEqual(expected, actual)

    def test_skip_style_tags(self):
        """test Parser.parse(skip_style_tags=True)"""
        def test():
            with_style = parser.Parser().parse(text, skip_style_tags=False)
            without_style = parser.Parser().parse(text, skip_style_tags=True)
            self.assertWikicodeEqual(a, with_style)
            self.assertWikicodeEqual(b, without_style)

        text = "This is an example with ''italics''!"
        a = wrap([Text("This is an example with "),
                  Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"),
                  Text("!")])
        b = wraptext("This is an example with ''italics''!")

        restore = parser.use_c
        if parser.use_c:
            test()
            parser.use_c = False
        test()
        parser.use_c = restore

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.parser.tokenizer import Tokenizer

--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,14 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.smart_list import SmartList, _ListProxy
 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from .compat import range
 from mwparserfromhell.compat import py3k, range
 from mwparserfromhell.smart_list import SmartList, _ListProxy

 class TestSmartList(unittest.TestCase):
    """Test cases for the SmartList class and its child, _ListProxy."""
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -23,12 +23,14 @@
 from __future__ import unicode_literals
 from sys import getdefaultencoding
 from types import GeneratorType
 import unittest

 from mwparserfromhell.compat import bytes, py3k, py32, str
 from mwparserfromhell.string_mixin import StringMixIn
 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from .compat import range
 from mwparserfromhell.compat import bytes, py3k, py32, range, str
 from mwparserfromhell.string_mixin import StringMixIn

 class _FakeString(StringMixIn):
    def __init__(self, data):
@@ -59,8 +61,8 @@ class TestStringMixIn(unittest.TestCase):
        else:
            methods.append("decode")
        for meth in methods:
            expected = getattr(str, meth).__doc__
            actual = getattr(StringMixIn, meth).__doc__
            expected = getattr("foo", meth).__doc__
            actual = getattr(_FakeString("foo"), meth).__doc__
            self.assertEqual(expected, actual)

    def test_types(self):
@@ -109,12 +111,12 @@ class TestStringMixIn(unittest.TestCase):
        self.assertFalse(str1 < str4)
        self.assertTrue(str1 <= str4)

        self.assertTrue(str1 > str5)
        self.assertTrue(str1 >= str5)
        self.assertFalse(str1 == str5)
        self.assertTrue(str1 != str5)
        self.assertFalse(str1 < str5)
        self.assertFalse(str1 <= str5)
        self.assertFalse(str5 > str1)
        self.assertFalse(str5 >= str1)
        self.assertFalse(str5 == str1)
        self.assertTrue(str5 != str1)
        self.assertTrue(str5 < str1)
        self.assertTrue(str5 <= str1)

    def test_other_magics(self):
        """test other magically implemented features, like len() and iter()"""
@@ -376,7 +378,7 @@ class TestStringMixIn(unittest.TestCase):
        self.assertEqual(actual, str25.rsplit(None, 3))
        actual = ["   this is a   sentence with", "", "whitespace", ""]
        self.assertEqual(actual, str25.rsplit(" ", 3))
        if py3k:
        if py3k and not py32:
            actual = ["   this is a", "sentence", "with", "whitespace"]
            self.assertEqual(actual, str25.rsplit(maxsplit=3))

@@ -394,7 +396,7 @@ class TestStringMixIn(unittest.TestCase):
        self.assertEqual(actual, str25.split(None, 3))
        actual = ["", "", "", "this is a   sentence with  whitespace "]
        self.assertEqual(actual, str25.split(" ", 3))
        if py3k:
        if py3k and not py32:
            actual = ["this", "is", "a", "sentence with  whitespace "]
            self.assertEqual(actual, str25.split(maxsplit=3))

--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,16 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Tag, Template, Text
 from mwparserfromhell.nodes.extras import Attribute
 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

 agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
 agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False)
@@ -64,37 +68,30 @@ class TestTag(TreeEqualityTestCase):
        self.assertEqual("----", str(node8))
        self.assertEqual("''italics!''", str(node9))

    def test_iternodes(self):
        """test Tag.__iternodes__()"""
        node1n1, node1n2 = Text("ref"), Text("foobar")
        node2n1, node3n1, node3n2 = Text("bold text"), Text("img"), Text("id")
        node3n3, node3n4, node3n5 = Text("foo"), Text("class"), Text("bar")

    def test_children(self):
        """test Tag.__children__()"""
        # <ref>foobar</ref>
        node1 = Tag(wrap([node1n1]), wrap([node1n2]))
        node1 = Tag(wraptext("ref"), wraptext("foobar"))
        # '''bold text'''
        node2 = Tag(wraptext("b"), wrap([node2n1]), wiki_markup="'''")
        node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''")
        # <img id="foo" class="bar" />
        node3 = Tag(wrap([node3n1]),
                    attrs=[Attribute(wrap([node3n2]), wrap([node3n3])),
                           Attribute(wrap([node3n4]), wrap([node3n5]))],
        node3 = Tag(wraptext("img"),
                    attrs=[Attribute(wraptext("id"), wraptext("foo")),
                           Attribute(wraptext("class"), wraptext("bar"))],
                    self_closing=True, padding=" ")

        gen1 = node1.__iternodes__(getnodes)
        gen2 = node2.__iternodes__(getnodes)
        gen3 = node3.__iternodes__(getnodes)
        self.assertEqual((None, node1), next(gen1))
        self.assertEqual((None, node2), next(gen2))
        self.assertEqual((None, node3), next(gen3))
        self.assertEqual((node1.tag, node1n1), next(gen1))
        self.assertEqual((node3.tag, node3n1), next(gen3))
        self.assertEqual((node3.attributes[0].name, node3n2), next(gen3))
        self.assertEqual((node3.attributes[0].value, node3n3), next(gen3))
        self.assertEqual((node3.attributes[1].name, node3n4), next(gen3))
        self.assertEqual((node3.attributes[1].value, node3n5), next(gen3))
        self.assertEqual((node1.contents, node1n2), next(gen1))
        self.assertEqual((node2.contents, node2n1), next(gen2))
        self.assertEqual((node1.closing_tag, node1n1), next(gen1))
        gen1 = node1.__children__()
        gen2 = node2.__children__()
        gen3 = node3.__children__()
        self.assertEqual(node1.tag, next(gen1))
        self.assertEqual(node3.tag, next(gen3))
        self.assertEqual(node3.attributes[0].name, next(gen3))
        self.assertEqual(node3.attributes[0].value, next(gen3))
        self.assertEqual(node3.attributes[1].name, next(gen3))
        self.assertEqual(node3.attributes[1].value, next(gen3))
        self.assertEqual(node1.contents, next(gen1))
        self.assertEqual(node2.contents, next(gen2))
        self.assertEqual(node1.closing_tag, next(gen1))
        self.assertRaises(StopIteration, next, gen1)
        self.assertRaises(StopIteration, next, gen2)
        self.assertRaises(StopIteration, next, gen3)
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,16 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity, Template, Text
 from mwparserfromhell.nodes.extras import Parameter
 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

 pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
 pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False)
@@ -42,27 +46,21 @@ class TestTemplate(TreeEqualityTestCase):
                         [pgenh("1", "bar"), pgens("abc", "def")])
        self.assertEqual("{{foo|bar|abc=def}}", str(node2))

    def test_iternodes(self):
        """test Template.__iternodes__()"""
        node1n1 = Text("foobar")
        node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc")
        node2n4, node2n5 = Text("def"), Text("ghi")
        node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False)
        node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]),
    def test_children(self):
        """test Template.__children__()"""
        node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False)
        node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]),
                            showkey=True)
        node1 = Template(wrap([node1n1]))
        node2 = Template(wrap([node2n1]), [node2p1, node2p2])
        node1 = Template(wraptext("foobar"))
        node2 = Template(wraptext("foo"), [node2p1, node2p2])

        gen1 = node1.__iternodes__(getnodes)
        gen2 = node2.__iternodes__(getnodes)
        self.assertEqual((None, node1), next(gen1))
        self.assertEqual((None, node2), next(gen2))
        self.assertEqual((node1.name, node1n1), next(gen1))
        self.assertEqual((node2.name, node2n1), next(gen2))
        self.assertEqual((node2.params[0].value, node2n2), next(gen2))
        self.assertEqual((node2.params[1].name, node2n3), next(gen2))
        self.assertEqual((node2.params[1].value, node2n4), next(gen2))
        self.assertEqual((node2.params[1].value, node2n5), next(gen2))
        gen1 = node1.__children__()
        gen2 = node2.__children__()
        self.assertEqual(node1.name, next(gen1))
        self.assertEqual(node2.name, next(gen2))
        self.assertEqual(node2.params[0].value, next(gen2))
        self.assertEqual(node2.params[1].name, next(gen2))
        self.assertEqual(node2.params[1].value, next(gen2))
        self.assertRaises(StopIteration, next, gen1)
        self.assertRaises(StopIteration, next, gen2)

@@ -123,15 +121,15 @@ class TestTemplate(TreeEqualityTestCase):
        node3 = Template(wraptext("foo"),
                         [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")])
        node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")])
        self.assertFalse(node1.has("foobar"))
        self.assertTrue(node2.has(1))
        self.assertTrue(node2.has("abc"))
        self.assertFalse(node2.has("def"))
        self.assertTrue(node3.has("1"))
        self.assertTrue(node3.has(" b "))
        self.assertFalse(node4.has("b"))
        self.assertTrue(node3.has("b", False))
        self.assertFalse(node1.has("foobar", False))
        self.assertTrue(node2.has(1, False))
        self.assertTrue(node2.has("abc", False))
        self.assertFalse(node2.has("def", False))
        self.assertTrue(node3.has("1", False))
        self.assertTrue(node3.has(" b ", False))
        self.assertTrue(node4.has("b", False))
        self.assertTrue(node3.has("b", True))
        self.assertFalse(node4.has("b", True))

    def test_get(self):
        """test Template.get()"""
@@ -223,6 +221,7 @@ class TestTemplate(TreeEqualityTestCase):
                                          pgenh("1", "c"), pgenh("2", "d")])
        node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"),
                                          pgens("f", "g")])
        node41 = Template(wraptext("a"), [pgenh("1", "")])

        node1.add("e", "f", showkey=True)
        node2.add(2, "g", showkey=False)
@@ -266,6 +265,7 @@ class TestTemplate(TreeEqualityTestCase):
        node38.add("1", "e")
        node39.add("1", "e")
        node40.add("d", "h", before="b")
        node41.add(1, "b")

        self.assertEqual("{{a|b=c|d|e=f}}", node1)
        self.assertEqual("{{a|b=c|d|g}}", node2)
@@ -312,6 +312,7 @@ class TestTemplate(TreeEqualityTestCase):
        self.assertEqual("{{a|1=e|x=y|2=d}}", node38)
        self.assertEqual("{{a|x=y|e|d}}", node39)
        self.assertEqual("{{a|b=c|d=h|f=g}}", node40)
        self.assertEqual("{{a|b}}", node41)

    def test_remove(self):
        """test Template.remove()"""
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text
@@ -36,11 +40,10 @@ class TestText(unittest.TestCase):
        node2 = Text("fóóbar")
        self.assertEqual("fóóbar", str(node2))

    def test_iternodes(self):
        """test Text.__iternodes__()"""
    def test_children(self):
        """test Text.__children__()"""
        node = Text("foobar")
        gen = node.__iternodes__(None)
        self.assertEqual((None, node), next(gen))
        gen = node.__children__()
        self.assertRaises(StopIteration, next, gen)

    def test_strip(self):
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.parser import tokens
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,11 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.nodes import Template, Text
 from mwparserfromhell.utils import parse_anything
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -24,14 +24,18 @@ from __future__ import unicode_literals
 from functools import partial
 import re
 from types import GeneratorType
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import py3k, str
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                    Node, Tag, Template, Text, Wikilink)
 from mwparserfromhell.smart_list import SmartList
 from mwparserfromhell.wikicode import Wikicode
 from mwparserfromhell import parse
 from mwparserfromhell.compat import py3k, str

 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

@@ -242,6 +246,7 @@ class TestWikicode(TreeEqualityTestCase):
        """test Wikicode.matches()"""
        code1 = parse("Cleanup")
        code2 = parse("\nstub<!-- TODO: make more specific -->")
        code3 = parse("")
        self.assertTrue(code1.matches("Cleanup"))
        self.assertTrue(code1.matches("cleanup"))
        self.assertTrue(code1.matches("  cleanup\n"))
@@ -250,13 +255,22 @@ class TestWikicode(TreeEqualityTestCase):
        self.assertTrue(code2.matches("stub"))
        self.assertTrue(code2.matches("Stub<!-- no, it's fine! -->"))
        self.assertFalse(code2.matches("StuB"))
        self.assertTrue(code1.matches(("cleanup", "stub")))
        self.assertTrue(code2.matches(("cleanup", "stub")))
        self.assertFalse(code2.matches(("StuB", "sTUb", "foobar")))
        self.assertFalse(code2.matches(["StuB", "sTUb", "foobar"]))
        self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub")))
        self.assertTrue(code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"]))
        self.assertTrue(code3.matches(""))
        self.assertTrue(code3.matches("<!-- nothing -->"))
        self.assertTrue(code3.matches(("a", "b", "")))

    def test_filter_family(self):
        """test the Wikicode.i?filter() family of functions"""
        def genlist(gen):
            self.assertIsInstance(gen, GeneratorType)
            return list(gen)
        ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw)))
        ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k)))

        code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]")
        for func in (code.filter, ifilter(code)):
@@ -292,21 +306,27 @@ class TestWikicode(TreeEqualityTestCase):
                              "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
                             func(recursive=True, forcetype=Template))

        code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}")
        code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}")
        for func in (code3.filter, ifilter(code3)):
            self.assertEqual(["{{foobar}}", "{{FOO}}"], func(recursive=False, matches=r"foo"))
            self.assertEqual(["{{foobar}}", "{{barfoo}}"],
                             func(False, matches=lambda node: "foo" in node))
            self.assertEqual(["{{foobar}}", "{{FOO}}", "{{barfoo}}"],
                             func(False, matches=r"foo"))
            self.assertEqual(["{{foobar}}", "{{FOO}}"],
                             func(recursive=False, matches=r"^{{foo.*?}}"))
                             func(matches=r"^{{foo.*?}}"))
            self.assertEqual(["{{foobar}}"],
                             func(recursive=False, matches=r"^{{foo.*?}}", flags=re.UNICODE))
            self.assertEqual(["{{baz}}", "{{bz}}"], func(recursive=False, matches=r"^{{b.*?z"))
            self.assertEqual(["{{baz}}"], func(recursive=False, matches=r"^{{b.+?z}}"))
                             func(matches=r"^{{foo.*?}}", flags=re.UNICODE))
            self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z"))
            self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}"))

        self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"],
                         code2.filter_templates(recursive=False))
        self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}",
                          "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"],
                         code2.filter_templates(recursive=True))

        self.assertEqual(["{{foobar}}"], code3.filter_templates(
            matches=lambda node: node.name.matches("Foobar")))
        self.assertEqual(["{{baz}}", "{{bz}}"],
                         code3.filter_templates(matches=r"^{{b.*?z"))
        self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z"))
@@ -335,35 +355,43 @@ class TestWikicode(TreeEqualityTestCase):
        p4_III = "== Section III ==\n" + p4_IIIA
        page4 = parse(p4_lead + p4_I + p4_II + p4_III)

        self.assertEqual([], page1.get_sections())
        self.assertEqual([""], page1.get_sections())
        self.assertEqual(["", "==Heading=="], page2.get_sections())
        self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n",
                          "====Gnidaeh====\n"], page3.get_sections())
        self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II,
                          p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
        self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II,
                          p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1],
                         page4.get_sections())

        self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4]))
        self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"],
                         page3.get_sections(levels=(2, 3)))
        self.assertEqual(["===Heading===\nFoo bar baz\n"],
                         page3.get_sections(levels=(2, 3), flat=True))
        self.assertEqual([], page3.get_sections(levels=[0]))
        self.assertEqual(["", "====Gnidaeh====\n"],
                         page3.get_sections(levels=[4], include_lead=True))
        self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n",
                          "====Gnidaeh====\n"],
                         page3.get_sections(include_lead=False))
        self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"],
                         page3.get_sections(flat=True, include_lead=False))

        self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4]))
        self.assertEqual([""], page2.get_sections(include_headings=False))
        self.assertEqual([p4_IA, p4_IB, p4_IIIA], page4.get_sections(levels=[3]))
        self.assertEqual([p4_IA, "=== Section I.B ===\n",
                          "=== Section III.A ===\nText.\n"],
                         page4.get_sections(levels=[3], flat=True))
        self.assertEqual(["", ""], page2.get_sections(include_headings=False))
        self.assertEqual(["\nSection I.B.1 body.\n\n&bull;Some content.\n\n",
                          "\nEven more text.\n" + p4_IIIA2ai1],
                         page4.get_sections(levels=[4],
                                            include_headings=False))

        self.assertEqual([], page4.get_sections(matches=r"body"))
        self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1],
        self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1],
                         page4.get_sections(matches=r"Section\sI[.\s].*?"))
        self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
        self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1],
                         page4.get_sections(matches=r".*?a.*?"))
        self.assertEqual([p4_IIIA1a, p4_IIIA2ai1],
                         page4.get_sections(matches=r".*?a.*?", flags=re.U))
@@ -371,6 +399,11 @@ class TestWikicode(TreeEqualityTestCase):
                         page4.get_sections(matches=r".*?a.*?", flags=re.U,
                                            include_headings=False))

        sections = page2.get_sections(include_headings=False)
        sections[0].append("Lead!\n")
        sections[1].append("\nFirst section!")
        self.assertEqual("Lead!\n==Heading==\nFirst section!", page2)

        page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz")
        section = page5.get_sections(matches="Foo")[0]
        section.replace("\nBar\n", "\nBarf ")
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,16 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text, Wikilink

 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext
 from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

 class TestWikilink(TreeEqualityTestCase):
    """Test cases for the Wikilink node."""
@@ -38,20 +42,15 @@ class TestWikilink(TreeEqualityTestCase):
        node2 = Wikilink(wraptext("foo"), wraptext("bar"))
        self.assertEqual("[[foo|bar]]", str(node2))

    def test_iternodes(self):
        """test Wikilink.__iternodes__()"""
        node1n1 = Text("foobar")
        node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz")
        node1 = Wikilink(wrap([node1n1]))
        node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3]))
        gen1 = node1.__iternodes__(getnodes)
        gen2 = node2.__iternodes__(getnodes)
        self.assertEqual((None, node1), next(gen1))
        self.assertEqual((None, node2), next(gen2))
        self.assertEqual((node1.title, node1n1), next(gen1))
        self.assertEqual((node2.title, node2n1), next(gen2))
        self.assertEqual((node2.text, node2n2), next(gen2))
        self.assertEqual((node2.text, node2n3), next(gen2))
    def test_children(self):
        """test Wikilink.__children__()"""
        node1 = Wikilink(wraptext("foobar"))
        node2 = Wikilink(wraptext("foo"), wrap([Text("bar"), Text("baz")]))
        gen1 = node1.__children__()
        gen2 = node2.__children__()
        self.assertEqual(node1.title, next(gen1))
        self.assertEqual(node2.title, next(gen2))
        self.assertEqual(node2.text, next(gen2))
        self.assertRaises(StopIteration, next, gen1)
        self.assertRaises(StopIteration, next, gen2)

--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -150,3 +150,31 @@ name:   comment_inside_bracketed_link
 label:  an HTML comment inside a bracketed external link
 input:  "[http://example.com/foo<!--comment-->bar]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()]

 ---

 name:   wikilink_inside_external_link
 label:  a wikilink inside an external link, which the parser considers valid (see issue #61)
 input:  "[http://example.com/foo Foo [[Bar]]]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), ExternalLinkSeparator(), Text(text="Foo "), WikilinkOpen(), Text(text="Bar"), WikilinkClose(), ExternalLinkClose()]

 ---

 name:   external_link_inside_wikilink
 label:  an external link inside a wikilink, valid in the case of images (see issue #62)
 input:  "[[File:Example.png|thumb|http://example.com]]"
 output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), WikilinkClose()]

 ---

 name:   external_link_inside_wikilink_brackets
 label:  an external link with brackets inside a wikilink
 input:  "[[File:Example.png|thumb|[http://example.com Example]]]"
 output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose(), WikilinkClose()]

 ---

 name:   external_link_inside_wikilink_title
 label:  an external link inside a wikilink title, which is invalid
 input:  "[[File:Example.png http://example.com]]"
 output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()]
--- a/tests/tokenizer/wikilinks.mwtest
+++ b/tests/tokenizer/wikilinks.mwtest
@@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b

 ---

 name:   nested
 label:  a wikilink nested within another
 input:  "[[foo|[[bar]]]]"
 output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()]

 ---

 name:   nested_padding
 label:  a wikilink nested within another, separated by other data
 input:  "[[foo|a[[b]]c]]"
 output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()]

 ---

 name:   invalid_newline
 label:  invalid wikilink: newline as only content
 input:  "[[\n]]"
@@ -103,27 +117,13 @@ output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(),

 ---

 name:   invalid_nested_text
 label:  invalid wikilink: a wikilink nested within the value of another
 name:   invalid_nested_no_close
 label:  invalid wikilink: a wikilink nested within the value of another, missing a pair of closing brackets
 input:  "[[foo|[[bar]]"
 output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()]

 ---

 name:   invalid_nested_text_2
 label:  invalid wikilink: a wikilink nested within the value of another, two pairs of closing brackets
 input:  "[[foo|[[bar]]]]"
 output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")]


 name:   invalid_nested_text_padding
 label:  invalid wikilink: a wikilink nested within the value of another, separated by other data
 input:  "[[foo|a[[b]]c]]"
 output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")]


 name:   incomplete_open_only
 label:  incomplete wikilinks: just an open
 input:  "[["