Merge branch 'feature/external_links' into develop (closes #39)

11 år sedan · 71e9822ab0
--- a/+ 6
+++ b/+ 6
@@ -1,8 +1,10 @@
 v0.3 (unreleased):

 - Added complete support for HTML Tags, along with appropriate unit tests. This
  includes forms like <ref>foo</ref>, <ref name="bar"/>, and wiki-markup tags
  like bold ('''), italics (''), and lists (*, #, ; and :).
 - Added complete support for HTML Tags, including forms like <ref>foo</ref>,
  <ref name="bar"/>, and wiki-markup tags like bold ('''), italics (''), and
  lists (*, #, ; and :).
 - Added support for ExternalLinks (http://example.com/ and
  [http://example.com/ Example]).
 - Wikicode's filter methods are now passed 'recursive=True' by default instead
  of False. This is a breaking change if you rely on any filter() methods being
  non-recursive by default.
@@ -14,7 +16,7 @@ v0.3 (unreleased):
 - Renamed Template.has_param() to has() for consistency with Template's other
  methods; has_param() is now an alias.
 - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
 - Various fixes and cleanup.
 - Various bugfixes, internal changes, and cleanup.

 v0.2 (released June 20, 2013):

--- a/docs/api/mwparserfromhell.nodes.rst
+++ b/docs/api/mwparserfromhell.nodes.rst
@@ -25,6 +25,14 @@ nodes Package
    :undoc-members:
    :show-inheritance:

 :mod:`external_link` Module
 ---------------------------

 .. automodule:: mwparserfromhell.nodes.external_link
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`heading` Module
 ---------------------

--- a/docs/api/mwparserfromhell.rst
+++ b/docs/api/mwparserfromhell.rst
@@ -30,10 +30,10 @@ mwparserfromhell Package
    :members:
    :undoc-members:

 :mod:`tag_defs` Module
 :mod:`definitions` Module
 -------------------------

 .. automodule:: mwparserfromhell.tag_defs
 .. automodule:: mwparserfromhell.definitions
    :members:

 :mod:`utils` Module
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,10 +7,11 @@ v0.3
 Unreleased
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.2...develop>`__):

 - Added complete support for HTML :py:class:`Tags <.Tag>`, along with
  appropriate unit tests. This includes forms like ``<ref>foo</ref>``,
  ``<ref name="bar"/>``, and wiki-markup tags like bold (``'''``), italics
  (``''``), and lists (``*``, ``#``, ``;`` and ``:``).
 - Added complete support for HTML :py:class:`Tags <.Tag>`, including forms like
  ``<ref>foo</ref>``, ``<ref name="bar"/>``, and wiki-markup tags like bold
  (``'''``), italics (``''``), and lists (``*``, ``#``, ``;`` and ``:``).
 - Added support for :py:class:`.ExternalLink`\ s (``http://example.com/`` and
  ``[http://example.com/ Example]``).
 - :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods are now passed
  *recursive=True* by default instead of *False*. **This is a breaking change
  if you rely on any filter() methods being non-recursive by default.**
@@ -25,7 +26,7 @@ Unreleased
  :py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's
  other methods; :py:meth:`~.has_param` is now an alias.
 - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
 - Various fixes and cleanup.
 - Various bugfixes, internal changes, and cleanup.

 v0.2
 ----
--- a/mwparserfromhell/init.py
+++ b/mwparserfromhell/init.py
@@ -34,6 +34,7 @@ __license__ = "MIT License"
 __version__ = "0.3.dev"
 __email__ = "ben.kurtovic@verizon.net"

 from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,
               utils, wikicode)

 parse = utils.parse_anything
--- a/mwparserfromhell/definitions.py
+++ b/mwparserfromhell/definitions.py
@@ -20,12 +20,22 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 """Contains data regarding certain HTML tags."""
 """Contains data about certain markup, like HTML tags and external links."""

 from __future__ import unicode_literals

 __all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single",
           "is_single_only"]
           "is_single_only", "is_scheme"]

 URI_SCHEMES = {
    # [mediawiki/core.git]/includes/DefaultSettings.php @ 374a0ad943
    "http": True, "https": True, "ftp": True, "ftps": True, "ssh": True,
    "sftp": True, "irc": True, "ircs": True, "xmpp": False, "sip": False,
    "sips": False, "gopher": True, "telnet": True, "nntp": True,
    "worldwind": True, "mailto": False, "tel": False, "sms": False,
    "news": False, "svn": True, "git": True, "mms": True, "bitcoin": False,
    "magnet": False, "urn": False, "geo": False
 }

 PARSER_BLACKLIST = [
    # enwiki extensions @ 2013-06-28
@@ -70,3 +80,12 @@ def is_single(tag):
 def is_single_only(tag):
    """Return whether or not the given *tag* must exist without a close tag."""
    return tag.lower() in SINGLE_ONLY

 def is_scheme(scheme, slashes=True, reverse=False):
    """Return whether *scheme* is valid for external links."""
    if reverse:  # Convenience for C
        scheme = scheme[::-1]
    scheme = scheme.lower()
    if slashes:
        return scheme in URI_SCHEMES
    return scheme in URI_SCHEMES and not URI_SCHEMES[scheme]
--- a/mwparserfromhell/nodes/init.py
+++ b/mwparserfromhell/nodes/init.py
@@ -69,6 +69,7 @@ from . import extras
 from .text import Text
 from .argument import Argument
 from .comment import Comment
 from .external_link import ExternalLink
 from .heading import Heading
 from .html_entity import HTMLEntity
 from .tag import Tag
--- a/mwparserfromhell/nodes/external_link.py
+++ b/mwparserfromhell/nodes/external_link.py
@@ -0,0 +1,97 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 from . import Node
 from ..compat import str
 from ..utils import parse_anything

 __all__ = ["ExternalLink"]

 class ExternalLink(Node):
    """Represents an external link, like ``[http://example.com/ Example]``."""

    def __init__(self, url, title=None, brackets=True):
        super(ExternalLink, self).__init__()
        self._url = url
        self._title = title
        self._brackets = brackets

    def __unicode__(self):
        if self.brackets:
            if self.title is not None:
                return "[" + str(self.url) + " " + str(self.title) + "]"
            return "[" + str(self.url) + "]"
        return str(self.url)

    def __iternodes__(self, getter):
        yield None, self
        for child in getter(self.url):
            yield self.url, child
        if self.title is not None:
            for child in getter(self.title):
                yield self.title, child

    def __strip__(self, normalize, collapse):
        if self.brackets:
            if self.title:
                return self.title.strip_code(normalize, collapse)
            return None
        return self.url.strip_code(normalize, collapse)

    def __showtree__(self, write, get, mark):
        if self.brackets:
            write("[")
        get(self.url)
        if self.title is not None:
            get(self.title)
        if self.brackets:
            write("]")

    @property
    def url(self):
        """The URL of the link target, as a :py:class:`~.Wikicode` object."""
        return self._url

    @property
    def title(self):
        """The link title (if given), as a :py:class:`~.Wikicode` object."""
        return self._title

    @property
    def brackets(self):
        """Whether to enclose the URL in brackets or display it straight."""
        return self._brackets

    @url.setter
    def url(self, value):
        from ..parser import contexts
        self._url = parse_anything(value, contexts.EXT_LINK_URI)

    @title.setter
    def title(self, value):
        self._title = None if value is None else parse_anything(value)

    @brackets.setter
    def brackets(self, value):
        self._brackets = bool(value)
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -25,7 +25,7 @@ from __future__ import unicode_literals
 from . import Node, Text
 from .extras import Attribute
 from ..compat import str
 from ..tag_defs import is_visible
 from ..definitions import is_visible
 from ..utils import parse_anything

 __all__ = ["Tag"]
@@ -152,7 +152,7 @@ class Tag(Node):
        This makes the tag look like a lone close tag. It is technically
        invalid and is only parsable Wikicode when the tag itself is
        single-only, like ``<br>`` and ``<img>``. See
        :py:func:`.tag_defs.is_single_only`.
        :py:func:`.definitions.is_single_only`.
        """
        return self._invalid

@@ -161,7 +161,7 @@ class Tag(Node):
        """Whether the tag is implicitly self-closing, with no ending slash.

        This is only possible for specific "single" tags like ``<br>`` and
        ``<li>``. See :py:func:`.tag_defs.is_single`. This field only has an
        ``<li>``. See :py:func:`.definitions.is_single`. This field only has an
        effect if :py:attr:`self_closing` is also ``True``.
        """
        return self._implicit
--- a/mwparserfromhell/parser/init.py
+++ b/mwparserfromhell/parser/init.py
@@ -46,16 +46,15 @@ class Parser(object):
    :py:class:`~.Node`\ s by the :py:class:`~.Builder`.
    """

    def __init__(self, text):
        self.text = text
    def __init__(self):
        if use_c and CTokenizer:
            self._tokenizer = CTokenizer()
        else:
            self._tokenizer = Tokenizer()
        self._builder = Builder()

    def parse(self):
        """Return a string as a parsed :py:class:`~.Wikicode` object tree."""
        tokens = self._tokenizer.tokenize(self.text)
    def parse(self, text, context=0):
        """Parse *text*, returning a :py:class:`~.Wikicode` object tree."""
        tokens = self._tokenizer.tokenize(text, context)
        code = self._builder.build(tokens)
        return code
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -24,8 +24,8 @@ from __future__ import unicode_literals

 from . import tokens
 from ..compat import str
 from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template,
                     Text, Wikilink)
 from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag,
                     Template, Text, Wikilink)
 from ..nodes.extras import Attribute, Parameter
 from ..smart_list import SmartList
 from ..wikicode import Wikicode
@@ -142,6 +142,22 @@ class Builder(object):
            else:
                self._write(self._handle_token(token))

    def _handle_external_link(self, token):
        """Handle when an external link is at the head of the tokens."""
        brackets, url = token.brackets, None
        self._push()
        while self._tokens:
            token = self._tokens.pop()
            if isinstance(token, tokens.ExternalLinkSeparator):
                url = self._pop()
                self._push()
            elif isinstance(token, tokens.ExternalLinkClose):
                if url is not None:
                    return ExternalLink(url, self._pop(), brackets)
                return ExternalLink(self._pop(), brackets=brackets)
            else:
                self._write(self._handle_token(token))

    def _handle_entity(self):
        """Handle a case where an HTML entity is at the head of the tokens."""
        token = self._tokens.pop()
@@ -244,6 +260,8 @@ class Builder(object):
            return self._handle_argument()
        elif isinstance(token, tokens.WikilinkOpen):
            return self._handle_wikilink()
        elif isinstance(token, tokens.ExternalLinkOpen):
            return self._handle_external_link(token)
        elif isinstance(token, tokens.HTMLEntityStart):
            return self._handle_entity()
        elif isinstance(token, tokens.HeadingStart):
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -51,6 +51,12 @@ Local (stack-specific) contexts:
    * :py:const:`WIKILINK_TITLE`
    * :py:const:`WIKILINK_TEXT`

 * :py:const:`EXT_LINK`

    * :py:const:`EXT_LINK_URI`
    * :py:const:`EXT_LINK_TITLE`
    * :py:const:`EXT_LINK_BRACKETS`

 * :py:const:`HEADING`

    * :py:const:`HEADING_LEVEL_1`
@@ -94,6 +100,7 @@ Aggregate contexts:
 * :py:const:`FAIL`
 * :py:const:`UNSAFE`
 * :py:const:`DOUBLE`
 * :py:const:`INVALID_LINK`

 """

@@ -112,35 +119,40 @@ WIKILINK_TITLE = 1 << 5
 WIKILINK_TEXT =  1 << 6
 WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT

 HEADING_LEVEL_1 = 1 << 7
 HEADING_LEVEL_2 = 1 << 8
 HEADING_LEVEL_3 = 1 << 9
 HEADING_LEVEL_4 = 1 << 10
 HEADING_LEVEL_5 = 1 << 11
 HEADING_LEVEL_6 = 1 << 12
 EXT_LINK_URI      = 1 << 7
 EXT_LINK_TITLE    = 1 << 8
 EXT_LINK_BRACKETS = 1 << 9
 EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS

 HEADING_LEVEL_1 = 1 << 10
 HEADING_LEVEL_2 = 1 << 11
 HEADING_LEVEL_3 = 1 << 12
 HEADING_LEVEL_4 = 1 << 13
 HEADING_LEVEL_5 = 1 << 14
 HEADING_LEVEL_6 = 1 << 15
 HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 +
           HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6)

 TAG_OPEN =  1 << 13
 TAG_ATTR =  1 << 14
 TAG_BODY =  1 << 15
 TAG_CLOSE = 1 << 16
 TAG_OPEN =  1 << 16
 TAG_ATTR =  1 << 17
 TAG_BODY =  1 << 18
 TAG_CLOSE = 1 << 19
 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE

 STYLE_ITALICS =      1 << 17
 STYLE_BOLD =         1 << 18
 STYLE_PASS_AGAIN =   1 << 19
 STYLE_SECOND_PASS =  1 << 20
 STYLE_ITALICS =      1 << 20
 STYLE_BOLD =         1 << 21
 STYLE_PASS_AGAIN =   1 << 22
 STYLE_SECOND_PASS =  1 << 23
 STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS

 DL_TERM = 1 << 21
 DL_TERM = 1 << 24

 HAS_TEXT =       1 << 22
 FAIL_ON_TEXT =   1 << 23
 FAIL_NEXT  =     1 << 24
 FAIL_ON_LBRACE = 1 << 25
 FAIL_ON_RBRACE = 1 << 26
 FAIL_ON_EQUALS = 1 << 27
 HAS_TEXT =       1 << 25
 FAIL_ON_TEXT =   1 << 26
 FAIL_NEXT  =     1 << 27
 FAIL_ON_LBRACE = 1 << 28
 FAIL_ON_RBRACE = 1 << 29
 FAIL_ON_EQUALS = 1 << 30
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                FAIL_ON_RBRACE + FAIL_ON_EQUALS)

@@ -150,7 +162,8 @@ GL_HEADING = 1 << 0

 # Aggregate contexts:

 FAIL = TEMPLATE + ARGUMENT + WIKILINK + HEADING + TAG + STYLE
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + TEMPLATE_PARAM_KEY + ARGUMENT_NAME +
          TAG_CLOSE)
 FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE
 UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY +
          ARGUMENT_NAME + TAG_CLOSE)
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
 INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -24,6 +24,20 @@ SOFTWARE.
 #include "tokenizer.h"

 /*
    Determine whether the given Py_UNICODE is a marker.
 */
 static int is_marker(Py_UNICODE this)
 {
    int i;

    for (i = 0; i < NUM_MARKERS; i++) {
        if (*MARKERS[i] == this)
            return 1;
    }
    return 0;
 }

 /*
    Given a context, return the heading level encoded within it.
 */
 static int heading_level_from_context(int n)
@@ -37,13 +51,14 @@ static int heading_level_from_context(int n)
 }

 /*
    Call the given function in tag_defs, using 'tag' as a parameter, and return
    its output as a bool.
    Call the given function in definitions.py, using 'in1', 'in2', and 'in3' as
    parameters, and return its output as a bool.
 */
 static int call_tag_def_func(const char* funcname, PyObject* tag)
 static int call_def_func(const char* funcname, PyObject* in1, PyObject* in2,
                         PyObject* in3)
 {
    PyObject* func = PyObject_GetAttrString(tag_defs, funcname);
    PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL);
    PyObject* func = PyObject_GetAttrString(definitions, funcname);
    PyObject* result = PyObject_CallFunctionObjArgs(func, in1, in2, in3, NULL);
    int ans = (result == Py_True) ? 1 : 0;

    Py_DECREF(func);
@@ -65,7 +80,7 @@ static PyObject* strip_tag_name(PyObject* token)
    Py_DECREF(text);
    if (!rstripped)
        return NULL;
    lowered = PyObject_CallMethod(rstripped, "rstrip", NULL);
    lowered = PyObject_CallMethod(rstripped, "lower", NULL);
    Py_DECREF(rstripped);
    return lowered;
 }
@@ -85,7 +100,7 @@ static Textbuffer* Textbuffer_new(void)
        PyErr_NoMemory();
        return NULL;
    }
    buffer->next = NULL;
    buffer->prev = buffer->next = NULL;
    return buffer;
 }

@@ -113,10 +128,10 @@ static int Textbuffer_write(Textbuffer** this, Py_UNICODE code)
        if (!new)
            return -1;
        new->next = self;
        self->prev = new;
        *this = self = new;
    }
    self->data[self->size] = code;
    self->size++;
    self->data[self->size++] = code;
    return 0;
 }

@@ -345,7 +360,7 @@ static void* Tokenizer_fail_route(Tokenizer* self)
 }

 /*
    Write a token to the end of the current token stack.
    Write a token to the current token stack.
 */
 static int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first)
 {
@@ -366,7 +381,8 @@ static int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first)
 }

 /*
    Write a token to the end of the current token stack.
    Write a token to the current token stack, with kwargs. Steals a reference
    to kwargs.
 */
 static int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token,
                                       PyObject* kwargs, int first)
@@ -417,6 +433,42 @@ static int Tokenizer_emit_text(Tokenizer* self, const char* text)
 }

 /*
    Write the contents of another textbuffer to the current textbuffer,
    deallocating it in the process.
 */
 static int
 Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse)
 {
    Textbuffer *original = buffer;
    int i;

    if (reverse) {
        do {
            for (i = buffer->size - 1; i >= 0; i--) {
                if (Tokenizer_emit_char(self, buffer->data[i])) {
                    Textbuffer_dealloc(original);
                    return -1;
                }
            }
        } while ((buffer = buffer->next));
    }
    else {
        while (buffer->next)
            buffer = buffer->next;
        do {
            for (i = 0; i < buffer->size; i++) {
                if (Tokenizer_emit_char(self, buffer->data[i])) {
                    Textbuffer_dealloc(original);
                    return -1;
                }
            }
        } while ((buffer = buffer->prev));
    }
    Textbuffer_dealloc(original);
    return 0;
 }

 /*
    Write a series of tokens to the current stack at once.
 */
 static int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist)
@@ -808,6 +860,353 @@ static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self)
 }

 /*
    Parse the URI scheme of a bracket-enclosed external link.
 */
 static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self)
 {
    static const char* valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-";
    Textbuffer* buffer;
    PyObject* scheme;
    Py_UNICODE this;
    int slashes, i;

    if (Tokenizer_push(self, LC_EXT_LINK_URI))
        return -1;
    if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") {
        if (Tokenizer_emit_text(self, "//"))
            return -1;
        self->head += 2;
    }
    else {
        buffer = Textbuffer_new();
        if (!buffer)
            return -1;
        while ((this = Tokenizer_READ(self, 0)) != *"") {
            i = 0;
            while (1) {
                if (!valid[i])
                    goto end_of_loop;
                if (this == valid[i])
                    break;
                i++;
            }
            Textbuffer_write(&buffer, this);
            if (Tokenizer_emit_char(self, this)) {
                Textbuffer_dealloc(buffer);
                return -1;
            }
            self->head++;
        }
        end_of_loop:
        if (this != *":") {
            Textbuffer_dealloc(buffer);
            Tokenizer_fail_route(self);
            return 0;
        }
        if (Tokenizer_emit_char(self, *":")) {
            Textbuffer_dealloc(buffer);
            return -1;
        }
        self->head++;
        slashes = (Tokenizer_READ(self, 0) == *"/" &&
                   Tokenizer_READ(self, 1) == *"/");
        if (slashes) {
            if (Tokenizer_emit_text(self, "//")) {
                Textbuffer_dealloc(buffer);
                return -1;
            }
            self->head += 2;
        }
        scheme = Textbuffer_render(buffer);
        Textbuffer_dealloc(buffer);
        if (!scheme)
            return -1;
        if (!IS_SCHEME(scheme, slashes, 0)) {
            Py_DECREF(scheme);
            Tokenizer_fail_route(self);
            return 0;
        }
        Py_DECREF(scheme);
    }
    return 0;
 }

 /*
    Parse the URI scheme of a free (no brackets) external link.
 */
 static int Tokenizer_parse_free_uri_scheme(Tokenizer* self)
 {
    static const char* valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-";
    Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer;
    PyObject *scheme;
    Py_UNICODE chunk;
    int slashes, i, j;

    if (!scheme_buffer)
        return -1;
    // We have to backtrack through the textbuffer looking for our scheme since
    // it was just parsed as text:
    temp_buffer = self->topstack->textbuffer;
    while (temp_buffer) {
        for (i = temp_buffer->size - 1; i >= 0; i--) {
            chunk = temp_buffer->data[i];
            if (Py_UNICODE_ISSPACE(chunk) || is_marker(chunk))
                goto end_of_loop;
            j = 0;
            while (1) {
                if (!valid[j]) {
                    Textbuffer_dealloc(scheme_buffer);
                    FAIL_ROUTE(0);
                    return 0;
                }
                if (chunk == valid[j])
                    break;
                j++;
            }
            Textbuffer_write(&scheme_buffer, chunk);
        }
        temp_buffer = temp_buffer->next;
    }
    end_of_loop:
    scheme = Textbuffer_render(scheme_buffer);
    if (!scheme) {
        Textbuffer_dealloc(scheme_buffer);
        return -1;
    }
    slashes = (Tokenizer_READ(self, 0) == *"/" &&
               Tokenizer_READ(self, 1) == *"/");
    if (!IS_SCHEME(scheme, slashes, 1)) {
        Py_DECREF(scheme);
        Textbuffer_dealloc(scheme_buffer);
        FAIL_ROUTE(0);
        return 0;
    }
    Py_DECREF(scheme);
    if (Tokenizer_push(self, LC_EXT_LINK_URI)) {
        Textbuffer_dealloc(scheme_buffer);
        return -1;
    }
    if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1))
        return -1;
    if (Tokenizer_emit_char(self, *":"))
        return -1;
    if (slashes) {
        if (Tokenizer_emit_text(self, "//"))
            return -1;
        self->head += 2;
    }
    return 0;
 }

 /*
    Handle text in a free external link, including trailing punctuation.
 */
 static int
 Tokenizer_handle_free_link_text(Tokenizer* self, int* parens,
                                Textbuffer** tail, Py_UNICODE this)
 {
    #define PUSH_TAIL_BUFFER(tail, error)                 \
        if ((tail)->size || (tail)->next) {               \
            if (Tokenizer_emit_textbuffer(self, tail, 0)) \
                return error;                             \
            tail = Textbuffer_new();                      \
            if (!(tail))                                  \
                return error;                             \
        }

    if (this == *"(" && !(*parens)) {
        *parens = 1;
        PUSH_TAIL_BUFFER(*tail, -1)
    }
    else if (this == *"," || this == *";" || this == *"\\" || this == *"." ||
             this == *":" || this == *"!" || this == *"?" ||
             (!(*parens) && this == *")"))
        return Textbuffer_write(tail, this);
    else
        PUSH_TAIL_BUFFER(*tail, -1)
    return Tokenizer_emit_char(self, this);
 }

 /*
    Really parse an external link.
 */
 static PyObject*
 Tokenizer_really_parse_external_link(Tokenizer* self, int brackets,
                                     Textbuffer** extra)
 {
    Py_UNICODE this, next;
    int parens = 0;

    if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) :
                   Tokenizer_parse_free_uri_scheme(self))
        return NULL;
    if (BAD_ROUTE)
        return NULL;
    this = Tokenizer_READ(self, 0);
    if (this == *"" || this == *"\n" || this == *" " || this == *"]")
        return Tokenizer_fail_route(self);
    if (!brackets && this == *"[")
        return Tokenizer_fail_route(self);
    while (1) {
        this = Tokenizer_READ(self, 0);
        next = Tokenizer_READ(self, 1);
        if (this == *"" || this == *"\n") {
            if (brackets)
                return Tokenizer_fail_route(self);
            self->head--;
            return Tokenizer_pop(self);
        }
        if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) {
            PUSH_TAIL_BUFFER(*extra, NULL)
            if (Tokenizer_parse_template_or_argument(self))
                return NULL;
        }
        else if (this == *"[") {
            if (!brackets) {
                self->head--;
                return Tokenizer_pop(self);
            }
            if (Tokenizer_emit_char(self, *"["))
                return NULL;
        }
        else if (this == *"]") {
            if (!brackets)
                self->head--;
            return Tokenizer_pop(self);
        }
        else if (this == *"&") {
            PUSH_TAIL_BUFFER(*extra, NULL)
            if (Tokenizer_parse_entity(self))
                return NULL;
        }
        else if (this == *" ") {
            if (brackets) {
                if (Tokenizer_emit(self, ExternalLinkSeparator))
                    return NULL;
                self->topstack->context ^= LC_EXT_LINK_URI;
                self->topstack->context |= LC_EXT_LINK_TITLE;
                self->head++;
                return Tokenizer_parse(self, 0, 0);
            }
            if (Textbuffer_write(extra, *" "))
                return NULL;
            return Tokenizer_pop(self);
        }
        else if (!brackets) {
            if (Tokenizer_handle_free_link_text(self, &parens, extra, this))
                return NULL;
        }
        else {
            if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        self->head++;
    }
 }

 /*
    Remove the URI scheme of a new external link from the textbuffer.
 */
 static int
 Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link)
 {
    PyObject *text = PyObject_GetAttrString(PyList_GET_ITEM(link, 0), "text"),
             *split, *scheme;
    Py_ssize_t length;
    Textbuffer* temp;

    if (!text)
        return -1;
    split = PyObject_CallMethod(text, "split", "si", ":", 1);
    Py_DECREF(text);
    if (!split)
        return -1;
    scheme = PyList_GET_ITEM(split, 0);
    length = PyUnicode_GET_SIZE(scheme);
    while (length) {
        temp = self->topstack->textbuffer;
        if (length <= temp->size) {
            temp->size -= length;
            break;
        }
        length -= temp->size;
        self->topstack->textbuffer = temp->next;
        free(temp->data);
        free(temp);
    }
    Py_DECREF(split);
    return 0;
 }

 /*
    Parse an external link at the head of the wikicode string.
 */
 static int Tokenizer_parse_external_link(Tokenizer* self, int brackets)
 {
    #define INVALID_CONTEXT self->topstack->context & AGG_INVALID_LINK
    #define NOT_A_LINK                                        \
        if (!brackets && self->topstack->context & LC_DLTERM) \
            return Tokenizer_handle_dl_term(self);            \
        return Tokenizer_emit_char(self, Tokenizer_READ(self, 0))

    Py_ssize_t reset = self->head;
    PyObject *link, *kwargs;
    Textbuffer *extra = 0;

    if (INVALID_CONTEXT || !(Tokenizer_CAN_RECURSE(self))) {
        NOT_A_LINK;
    }
    extra = Textbuffer_new();
    if (!extra)
        return -1;
    self->head++;
    link = Tokenizer_really_parse_external_link(self, brackets, &extra);
    if (BAD_ROUTE) {
        RESET_ROUTE();
        self->head = reset;
        Textbuffer_dealloc(extra);
        NOT_A_LINK;
    }
    if (!link) {
        Textbuffer_dealloc(extra);
        return -1;
    }
    if (!brackets) {
        if (Tokenizer_remove_uri_scheme_from_textbuffer(self, link)) {
            Textbuffer_dealloc(extra);
            Py_DECREF(link);
            return -1;
        }
    }
    kwargs = PyDict_New();
    if (!kwargs) {
        Textbuffer_dealloc(extra);
        Py_DECREF(link);
        return -1;
    }
    PyDict_SetItemString(kwargs, "brackets", brackets ? Py_True : Py_False);
    if (Tokenizer_emit_kwargs(self, ExternalLinkOpen, kwargs)) {
        Textbuffer_dealloc(extra);
        Py_DECREF(link);
        return -1;
    }
    if (Tokenizer_emit_all(self, link)) {
        Textbuffer_dealloc(extra);
        Py_DECREF(link);
        return -1;
    }
    Py_DECREF(link);
    if (Tokenizer_emit(self, ExternalLinkClose)) {
        Textbuffer_dealloc(extra);
        return -1;
    }
    if (extra->size || extra->next)
        return Tokenizer_emit_textbuffer(self, extra, 0);
    Textbuffer_dealloc(extra);
    return 0;
 }

 /*
    Parse a section heading at the head of the wikicode string.
 */
 static int Tokenizer_parse_heading(Tokenizer* self)
@@ -1238,15 +1637,8 @@ Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text)
 static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text)
 {
    Py_UNICODE next = Tokenizer_READ(self, 1);
    int i, is_marker = 0;

    for (i = 0; i < NUM_MARKERS; i++) {
        if (*MARKERS[i] == text) {
            is_marker = 1;
            break;
        }
    }
    if (!is_marker || !Tokenizer_CAN_RECURSE(self))
    if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self))
        return Tokenizer_emit_char(self, text);
    else if (text == next && next == *"{")
        return Tokenizer_parse_template_or_argument(self);
@@ -1264,17 +1656,11 @@ static int
 Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
 {
    PyObject *trash;
    int first_time, i, is_marker = 0, escaped;
    int first_time, escaped;

    if (data->context & TAG_NAME) {
        first_time = !(data->context & TAG_NOTE_SPACE);
        for (i = 0; i < NUM_MARKERS; i++) {
            if (*MARKERS[i] == chunk) {
                is_marker = 1;
                break;
            }
        }
        if (is_marker || (Py_UNICODE_ISSPACE(chunk) && first_time)) {
        if (is_marker(chunk) || (Py_UNICODE_ISSPACE(chunk) && first_time)) {
            // Tags must start with text, not spaces
            Tokenizer_fail_route(self);
            return 0;
@@ -1623,7 +2009,6 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self)
    Textbuffer* buf;
    PyObject *name, *tag;
    Py_UNICODE this;
    int is_marker, i;

    self->head += 2;
    buf = Textbuffer_new();
@@ -1631,14 +2016,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self)
        return -1;
    while (1) {
        this = Tokenizer_READ(self, pos);
        is_marker = 0;
        for (i = 0; i < NUM_MARKERS; i++) {
            if (*MARKERS[i] == this) {
                is_marker = 1;
                break;
            }
        }
        if (is_marker) {
        if (is_marker(this)) {
            name = Textbuffer_render(buf);
            if (!name) {
                Textbuffer_dealloc(buf);
@@ -1985,9 +2363,9 @@ static int Tokenizer_handle_hr(Tokenizer* self)
        self->head++;
    }
    markup = Textbuffer_render(buffer);
    Textbuffer_dealloc(buffer);
    if (!markup)
        return -1;
    Textbuffer_dealloc(buffer);
    kwargs = PyDict_New();
    if (!kwargs)
        return -1;
@@ -2047,21 +2425,21 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
 */
 static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 {
    if (context & LC_FAIL_NEXT) {
    if (context & LC_FAIL_NEXT)
        return -1;
    }
    if (context & LC_WIKILINK_TITLE) {
        if (data == *"]" || data == *"{")
    if (context & LC_WIKILINK) {
        if (context & LC_WIKILINK_TEXT)
            return (data == *"[" && Tokenizer_READ(self, 1) == *"[") ? -1 : 0;
        else if (data == *"]" || data == *"{")
            self->topstack->context |= LC_FAIL_NEXT;
        else if (data == *"\n" || data == *"[" || data == *"}")
            return -1;
        return 0;
    }
    if (context & LC_TAG_CLOSE) {
        if (data == *"<")
            return -1;
        return 0;
    }
    if (context & LC_EXT_LINK_TITLE)
        return (data == *"\n") ? -1 : 0;
    if (context & LC_TAG_CLOSE)
        return (data == *"<") ? -1 : 0;
    if (context & LC_TEMPLATE_NAME) {
        if (data == *"{" || data == *"}" || data == *"[") {
            self->topstack->context |= LC_FAIL_NEXT;
@@ -2126,7 +2504,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 */
 static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 {
    int this_context, is_marker, i;
    int this_context;
    Py_UNICODE this, next, next_next, last;
    PyObject* temp;

@@ -2146,14 +2524,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
                return Tokenizer_fail_route(self);
            }
        }
        is_marker = 0;
        for (i = 0; i < NUM_MARKERS; i++) {
            if (*MARKERS[i] == this) {
                is_marker = 1;
                break;
            }
        }
        if (!is_marker) {
        if (!is_marker(this)) {
            if (Tokenizer_emit_char(self, this))
                return NULL;
            self->head++;
@@ -2192,9 +2563,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
            if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if (this == next && next == *"[") {
            if (!(this_context & LC_WIKILINK_TITLE) &&
                                                Tokenizer_CAN_RECURSE(self)) {
        else if (this == next && next == *"[" && Tokenizer_CAN_RECURSE(self)) {
            if (!(this_context & AGG_INVALID_LINK)) {
                if (Tokenizer_parse_wikilink(self))
                    return NULL;
            }
@@ -2207,6 +2577,16 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
        }
        else if (this == next && next == *"]" && this_context & LC_WIKILINK)
            return Tokenizer_handle_wikilink_end(self);
        else if (this == *"[") {
            if (Tokenizer_parse_external_link(self, 1))
                return NULL;
        }
        else if (this == *":" && !is_marker(last)) {
            if (Tokenizer_parse_external_link(self, 0))
                return NULL;
        }
        else if (this == *"]" && this_context & LC_EXT_LINK_TITLE)
            return Tokenizer_pop(self);
        else if (this == *"=" && !(self->global & GL_HEADING)) {
            if (last == *"\n" || last == *"") {
                if (Tokenizer_parse_heading(self))
@@ -2243,9 +2623,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
                    return NULL;
            }
        }
        else if (this == *"<") {
            if (!(this_context & LC_TAG_CLOSE) &&
                                                Tokenizer_CAN_RECURSE(self)) {
        else if (this == *"<" && !(this_context & LC_TAG_CLOSE)) {
            if (Tokenizer_CAN_RECURSE(self)) {
                if (Tokenizer_parse_tag(self))
                    return NULL;
            }
@@ -2289,8 +2668,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
    PyObject *text, *temp;
    int context = 0;

    if (PyArg_ParseTuple(args, "U", &text)) {
    if (PyArg_ParseTuple(args, "U|i", &text, &context)) {
        Py_XDECREF(self->text);
        self->text = PySequence_Fast(text, "expected a sequence");
    }
@@ -2299,7 +2679,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
        Py_ssize_t size;
        /* Failed to parse a Unicode object; try a string instead. */
        PyErr_Clear();
        if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
        if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context))
            return NULL;
        temp = PyUnicode_FromStringAndSize(encoded, size);
        if (!text)
@@ -2311,7 +2691,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
    }
    self->head = self->global = self->depth = self->cycles = 0;
    self->length = PyList_GET_SIZE(self->text);
    return Tokenizer_parse(self, 0, 1);
    return Tokenizer_parse(self, context, 1);
 }

 static int load_entitydefs(void)
@@ -2389,6 +2769,11 @@ static int load_tokens(void)
    WikilinkSeparator = PyObject_GetAttrString(tokens, "WikilinkSeparator");
    WikilinkClose = PyObject_GetAttrString(tokens, "WikilinkClose");

    ExternalLinkOpen = PyObject_GetAttrString(tokens, "ExternalLinkOpen");
    ExternalLinkSeparator = PyObject_GetAttrString(tokens,
                                                   "ExternalLinkSeparator");
    ExternalLinkClose = PyObject_GetAttrString(tokens, "ExternalLinkClose");

    HTMLEntityStart = PyObject_GetAttrString(tokens, "HTMLEntityStart");
    HTMLEntityNumeric = PyObject_GetAttrString(tokens, "HTMLEntityNumeric");
    HTMLEntityHex = PyObject_GetAttrString(tokens, "HTMLEntityHex");
@@ -2413,13 +2798,13 @@ static int load_tokens(void)
    return 0;
 }

 static int load_tag_defs(void)
 static int load_definitions(void)
 {
    PyObject *tempmod,
             *globals = PyEval_GetGlobals(),
             *locals = PyEval_GetLocals(),
             *fromlist = PyList_New(1),
             *modname = IMPORT_NAME_FUNC("tag_defs");
             *modname = IMPORT_NAME_FUNC("definitions");
    char *name = "mwparserfromhell";

    if (!fromlist || !modname)
@@ -2429,7 +2814,7 @@ static int load_tag_defs(void)
    Py_DECREF(fromlist);
    if (!tempmod)
        return -1;
    tag_defs = PyObject_GetAttrString(tempmod, "tag_defs");
    definitions = PyObject_GetAttrString(tempmod, "definitions");
    Py_DECREF(tempmod);
    return 0;
 }
@@ -2452,7 +2837,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void)
    NOARGS = PyTuple_New(0);
    if (!EMPTY || !NOARGS)
        INIT_ERROR;
    if (load_entitydefs() || load_tokens() || load_tag_defs())
    if (load_entitydefs() || load_tokens() || load_definitions())
        INIT_ERROR;
 #ifdef IS_PY3K
    return module;
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -62,7 +62,7 @@ static char** entitydefs;

 static PyObject* EMPTY;
 static PyObject* NOARGS;
 static PyObject* tag_defs;
 static PyObject* definitions;


 /* Tokens: */
@@ -82,6 +82,10 @@ static PyObject* WikilinkOpen;
 static PyObject* WikilinkSeparator;
 static PyObject* WikilinkClose;

 static PyObject* ExternalLinkOpen;
 static PyObject* ExternalLinkSeparator;
 static PyObject* ExternalLinkClose;

 static PyObject* HTMLEntityStart;
 static PyObject* HTMLEntityNumeric;
 static PyObject* HTMLEntityHex;
@@ -104,48 +108,53 @@ static PyObject* TagCloseClose;

 /* Local contexts: */

 #define LC_TEMPLATE             0x0000007
 #define LC_TEMPLATE_NAME        0x0000001
 #define LC_TEMPLATE_PARAM_KEY   0x0000002
 #define LC_TEMPLATE_PARAM_VALUE 0x0000004

 #define LC_ARGUMENT             0x0000018
 #define LC_ARGUMENT_NAME        0x0000008
 #define LC_ARGUMENT_DEFAULT     0x0000010

 #define LC_WIKILINK             0x0000060
 #define LC_WIKILINK_TITLE       0x0000020
 #define LC_WIKILINK_TEXT        0x0000040

 #define LC_HEADING              0x0001F80
 #define LC_HEADING_LEVEL_1      0x0000080
 #define LC_HEADING_LEVEL_2      0x0000100
 #define LC_HEADING_LEVEL_3      0x0000200
 #define LC_HEADING_LEVEL_4      0x0000400
 #define LC_HEADING_LEVEL_5      0x0000800
 #define LC_HEADING_LEVEL_6      0x0001000

 #define LC_TAG                  0x001E000
 #define LC_TAG_OPEN             0x0002000
 #define LC_TAG_ATTR             0x0004000
 #define LC_TAG_BODY             0x0008000
 #define LC_TAG_CLOSE            0x0010000

 #define LC_STYLE                0x01E0000
 #define LC_STYLE_ITALICS        0x0020000
 #define LC_STYLE_BOLD           0x0040000
 #define LC_STYLE_PASS_AGAIN     0x0080000
 #define LC_STYLE_SECOND_PASS    0x0100000

 #define LC_DLTERM               0x0200000

 #define LC_SAFETY_CHECK         0xFC00000
 #define LC_HAS_TEXT             0x0400000
 #define LC_FAIL_ON_TEXT         0x0800000
 #define LC_FAIL_NEXT            0x1000000
 #define LC_FAIL_ON_LBRACE       0x2000000
 #define LC_FAIL_ON_RBRACE       0x4000000
 #define LC_FAIL_ON_EQUALS       0x8000000
 #define LC_TEMPLATE             0x00000007
 #define LC_TEMPLATE_NAME        0x00000001
 #define LC_TEMPLATE_PARAM_KEY   0x00000002
 #define LC_TEMPLATE_PARAM_VALUE 0x00000004

 #define LC_ARGUMENT             0x00000018
 #define LC_ARGUMENT_NAME        0x00000008
 #define LC_ARGUMENT_DEFAULT     0x00000010

 #define LC_WIKILINK             0x00000060
 #define LC_WIKILINK_TITLE       0x00000020
 #define LC_WIKILINK_TEXT        0x00000040

 #define LC_EXT_LINK             0x00000380
 #define LC_EXT_LINK_URI         0x00000080
 #define LC_EXT_LINK_TITLE       0x00000100
 #define LC_EXT_LINK_BRACKETS    0x00000200

 #define LC_HEADING              0x0000FC00
 #define LC_HEADING_LEVEL_1      0x00000400
 #define LC_HEADING_LEVEL_2      0x00000800
 #define LC_HEADING_LEVEL_3      0x00001000
 #define LC_HEADING_LEVEL_4      0x00002000
 #define LC_HEADING_LEVEL_5      0x00004000
 #define LC_HEADING_LEVEL_6      0x00008000

 #define LC_TAG                  0x000F0000
 #define LC_TAG_OPEN             0x00010000
 #define LC_TAG_ATTR             0x00020000
 #define LC_TAG_BODY             0x00040000
 #define LC_TAG_CLOSE            0x00080000

 #define LC_STYLE                0x00F00000
 #define LC_STYLE_ITALICS        0x00100000
 #define LC_STYLE_BOLD           0x00200000
 #define LC_STYLE_PASS_AGAIN     0x00400000
 #define LC_STYLE_SECOND_PASS    0x00800000

 #define LC_DLTERM               0x01000000

 #define LC_SAFETY_CHECK         0x7E000000
 #define LC_HAS_TEXT             0x02000000
 #define LC_FAIL_ON_TEXT         0x04000000
 #define LC_FAIL_NEXT            0x08000000
 #define LC_FAIL_ON_LBRACE       0x10000000
 #define LC_FAIL_ON_RBRACE       0x20000000
 #define LC_FAIL_ON_EQUALS       0x40000000

 /* Global contexts: */

@@ -153,9 +162,10 @@ static PyObject* TagCloseClose;

 /* Aggregate contexts: */

 #define AGG_FAIL   (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE)
 #define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK)

 /* Tag contexts: */

@@ -174,6 +184,7 @@ static PyObject* TagCloseClose;
 struct Textbuffer {
    Py_ssize_t size;
    Py_UNICODE* data;
    struct Textbuffer* prev;
    struct Textbuffer* next;
 };

@@ -228,12 +239,14 @@ typedef struct {
 #define Tokenizer_emit_first_kwargs(self, token, kwargs) Tokenizer_emit_token_kwargs(self, token, kwargs, 1)


 /* Macros for accessing HTML tag definitions: */
 /* Macros for accessing definitions: */

 #define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li")
 #define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag))
 #define IS_SINGLE(tag) (call_tag_def_func("is_single", tag))
 #define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag))
 #define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL))
 #define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL))
 #define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL))
 #define IS_SCHEME(scheme, slashes, reverse) \
    (call_def_func("is_scheme", scheme, slashes ? Py_True : Py_False, reverse ? Py_True : Py_False))


 /* Function prototypes: */
@@ -247,6 +260,8 @@ static void TagData_dealloc(TagData*);
 static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
 static void Tokenizer_dealloc(Tokenizer*);
 static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_handle_dl_term(Tokenizer*);
 static int Tokenizer_parse_tag(Tokenizer*);
 static PyObject* Tokenizer_parse(Tokenizer*, int, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -26,7 +26,8 @@ import re

 from . import contexts, tokens
 from ..compat import htmlentities
 from ..tag_defs import get_html_tag, is_parsable, is_single, is_single_only
 from ..definitions import (get_html_tag, is_parsable, is_single,
                           is_single_only, is_scheme)

 __all__ = ["Tokenizer"]

@@ -60,7 +61,7 @@ class Tokenizer(object):
    START = object()
    END = object()
    MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
               ":", "/", "-", "\n", END]
               ":", "/", "-", "\n", START, END]
    MAX_DEPTH = 40
    MAX_CYCLES = 100000
    regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
@@ -311,6 +312,168 @@ class Tokenizer(object):
        self._head += 1
        return self._pop()

    def _parse_bracketed_uri_scheme(self):
        """Parse the URI scheme of a bracket-enclosed external link."""
        self._push(contexts.EXT_LINK_URI)
        if self._read() == self._read(1) == "/":
            self._emit_text("//")
            self._head += 2
        else:
            valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-"
            all_valid = lambda: all(char in valid for char in self._read())
            scheme = ""
            while self._read() is not self.END and all_valid():
                scheme += self._read()
                self._emit_text(self._read())
                self._head += 1
            if self._read() != ":":
                self._fail_route()
            self._emit_text(":")
            self._head += 1
            slashes = self._read() == self._read(1) == "/"
            if slashes:
                self._emit_text("//")
                self._head += 2
            if not is_scheme(scheme, slashes):
                self._fail_route()

    def _parse_free_uri_scheme(self):
        """Parse the URI scheme of a free (no brackets) external link."""
        valid = "abcdefghijklmnopqrstuvwxyz0123456789+.-"
        scheme = []
        try:
            # We have to backtrack through the textbuffer looking for our
            # scheme since it was just parsed as text:
            for chunk in reversed(self._textbuffer):
                for char in reversed(chunk):
                    if char.isspace() or char in self.MARKERS:
                        raise StopIteration()
                    if char not in valid:
                        raise BadRoute()
                    scheme.append(char)
        except StopIteration:
            pass
        scheme = "".join(reversed(scheme))
        slashes = self._read() == self._read(1) == "/"
        if not is_scheme(scheme, slashes):
            raise BadRoute()
        self._push(contexts.EXT_LINK_URI)
        self._emit_text(scheme)
        self._emit_text(":")
        if slashes:
            self._emit_text("//")
            self._head += 2

    def _handle_free_link_text(self, punct, tail, this):
        """Handle text in a free ext link, including trailing punctuation."""
        if "(" in this and ")" in punct:
            punct = punct[:-1]  # ')' is not longer valid punctuation
        if this.endswith(punct):
            for i in reversed(range(-len(this), 0)):
                if i == -len(this) or this[i - 1] not in punct:
                    break
            stripped = this[:i]
            if stripped and tail:
                self._emit_text(tail)
                tail = ""
            tail += this[i:]
            this = stripped
        elif tail:
            self._emit_text(tail)
            tail = ""
        self._emit_text(this)
        return punct, tail

    def _really_parse_external_link(self, brackets):
        """Really parse an external link."""
        if brackets:
            self._parse_bracketed_uri_scheme()
            invalid = ("\n", " ", "]")
        else:
            self._parse_free_uri_scheme()
            invalid = ("\n", " ", "[", "]")
            punct = tuple(",;\.:!?)")
        if self._read() is self.END or self._read()[0] in invalid:
            self._fail_route()
        tail = ""
        while True:
            this, next = self._read(), self._read(1)
            if this is self.END or this == "\n":
                if brackets:
                    self._fail_route()
                return self._pop(), tail, -1
            elif this == next == "{" and self._can_recurse():
                if tail:
                    self._emit_text(tail)
                    tail = ""
                self._parse_template_or_argument()
            elif this == "[":
                if brackets:
                    self._emit_text("[")
                else:
                    return self._pop(), tail, -1
            elif this == "]":
                return self._pop(), tail, 0 if brackets else -1
            elif this == "&":
                if tail:
                    self._emit_text(tail)
                    tail = ""
                self._parse_entity()
            elif " " in this:
                before, after = this.split(" ", 1)
                if brackets:
                    self._emit_text(before)
                    self._emit(tokens.ExternalLinkSeparator())
                    if after:
                        self._emit_text(after)
                    self._context ^= contexts.EXT_LINK_URI
                    self._context |= contexts.EXT_LINK_TITLE
                    self._head += 1
                    return self._parse(push=False), None, 0
                punct, tail = self._handle_free_link_text(punct, tail, before)
                return self._pop(), tail + " " + after, 0
            elif not brackets:
                punct, tail = self._handle_free_link_text(punct, tail, this)
            else:
                self._emit_text(this)
            self._head += 1

    def _remove_uri_scheme_from_textbuffer(self, scheme):
        """Remove the URI scheme of a new external link from the textbuffer."""
        length = len(scheme)
        while length:
            if length < len(self._textbuffer[-1]):
                self._textbuffer[-1] = self._textbuffer[-1][:-length]
                break
            length -= len(self._textbuffer[-1])
            self._textbuffer.pop()

    def _parse_external_link(self, brackets):
        """Parse an external link at the head of the wikicode string."""
        reset = self._head
        self._head += 1
        try:
            bad_context = self._context & contexts.INVALID_LINK
            if bad_context or not self._can_recurse():
                raise BadRoute()
            link, extra, delta = self._really_parse_external_link(brackets)
        except BadRoute:
            self._head = reset
            if not brackets and self._context & contexts.DL_TERM:
                self._handle_dl_term()
            else:
                self._emit_text(self._read())
        else:
            if not brackets:
                scheme = link[0].text.split(":", 1)[0]
                self._remove_uri_scheme_from_textbuffer(scheme)
            self._emit(tokens.ExternalLinkOpen(brackets=brackets))
            self._emit_all(link)
            self._emit(tokens.ExternalLinkClose())
            self._head += delta
            if extra:
                self._emit_text(extra)

    def _parse_heading(self):
        """Parse a section heading at the head of the wikicode string."""
        self._global |= contexts.GL_HEADING
@@ -810,12 +973,16 @@ class Tokenizer(object):
        context = self._context
        if context & contexts.FAIL_NEXT:
            return False
        if context & contexts.WIKILINK_TITLE:
            if this == "]" or this == "{":
        if context & contexts.WIKILINK:
            if context & contexts.WIKILINK_TEXT:
                return not (this == self._read(1) == "[")
            elif this == "]" or this == "{":
                self._context |= contexts.FAIL_NEXT
            elif this == "\n" or this == "[" or this == "}":
                return False
            return True
        elif context & contexts.EXT_LINK_TITLE:
            return this != "\n"
        elif context & contexts.TEMPLATE_NAME:
            if this == "{" or this == "}" or this == "[":
                self._context |= contexts.FAIL_NEXT
@@ -898,8 +1065,8 @@ class Tokenizer(object):
                    return self._handle_argument_end()
                else:
                    self._emit_text("}")
            elif this == next == "[":
                if not self._context & contexts.WIKILINK_TITLE and self._can_recurse():
            elif this == next == "[" and self._can_recurse():
                if not self._context & contexts.INVALID_LINK:
                    self._parse_wikilink()
                else:
                    self._emit_text("[")
@@ -907,6 +1074,12 @@ class Tokenizer(object):
                self._handle_wikilink_separator()
            elif this == next == "]" and self._context & contexts.WIKILINK:
                return self._handle_wikilink_end()
            elif this == "[":
                self._parse_external_link(True)
            elif this == ":" and self._read(-1) not in self.MARKERS:
                self._parse_external_link(False)
            elif this == "]" and self._context & contexts.EXT_LINK_TITLE:
                return self._pop()
            elif this == "=" and not self._global & contexts.GL_HEADING:
                if self._read(-1) in ("\n", self.START):
                    self._parse_heading()
@@ -928,8 +1101,8 @@ class Tokenizer(object):
                    self._handle_tag_open_close()
                else:
                    self._handle_invalid_tag_start()
            elif this == "<":
                if not self._context & contexts.TAG_CLOSE and self._can_recurse():
            elif this == "<" and not self._context & contexts.TAG_CLOSE:
                if self._can_recurse():
                    self._parse_tag()
                else:
                    self._emit_text("<")
@@ -952,8 +1125,9 @@ class Tokenizer(object):
                self._emit_text(this)
            self._head += 1

    def tokenize(self, text):
    def tokenize(self, text, context=0):
        """Build a list of tokens from a string of wikicode and return it."""
        split = self.regex.split(text)
        self._text = [segment for segment in split if segment]
        return self._parse()
        self._head = self._global = self._depth = self._cycles = 0
        return self._parse(context)
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -84,6 +84,10 @@ WikilinkOpen = make("WikilinkOpen")                                 # [[
 WikilinkSeparator = make("WikilinkSeparator")                       # |
 WikilinkClose = make("WikilinkClose")                               # ]]

 ExternalLinkOpen = make("ExternalLinkOpen")                         # [
 ExternalLinkSeparator = make("ExternalLinkSeparator")               #
 ExternalLinkClose = make("ExternalLinkClose")                       # ]

 HTMLEntityStart = make("HTMLEntityStart")                           # &
 HTMLEntityNumeric = make("HTMLEntityNumeric")                       # #
 HTMLEntityHex = make("HTMLEntityHex")                               # x
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -33,7 +33,7 @@ from .smart_list import SmartList

 __all__ = ["parse_anything"]

 def parse_anything(value):
 def parse_anything(value, context=0):
    """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.

    This differs from :py:meth:`.Parser.parse` in that we accept more than just
@@ -44,6 +44,12 @@ def parse_anything(value):
    on-the-fly by various methods of :py:class:`~.Wikicode` and others like
    :py:class:`~.Template`, such as :py:meth:`wikicode.insert()
    <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.

    If given, *context* will be passed as a starting context to the parser.
    This is helpful when this function is used inside node attribute setters.
    For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url`
    setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to
    prevent the URL itself from becoming an :py:class:`~.ExternalLink`.
    """
    from .parser import Parser
    from .wikicode import Wikicode
@@ -53,17 +59,17 @@ def parse_anything(value):
    elif isinstance(value, Node):
        return Wikicode(SmartList([value]))
    elif isinstance(value, str):
        return Parser(value).parse()
        return Parser().parse(value, context)
    elif isinstance(value, bytes):
        return Parser(value.decode("utf8")).parse()
        return Parser().parse(value.decode("utf8"), context)
    elif isinstance(value, int):
        return Parser(str(value)).parse()
        return Parser().parse(str(value), context)
    elif value is None:
        return Wikicode(SmartList())
    try:
        nodelist = SmartList()
        for item in value:
            nodelist += parse_anything(item).nodes
            nodelist += parse_anything(item, context).nodes
    except TypeError:
        error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}"
        raise ValueError(error.format(type(value).__name__, value))
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -24,8 +24,8 @@ from __future__ import unicode_literals
 import re

 from .compat import maxsize, py3k, str
 from .nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag,
                    Template, Text, Wikilink)
 from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity,
                    Node, Tag, Template, Text, Wikilink)
 from .string_mixin import StringMixIn
 from .utils import parse_anything

@@ -509,6 +509,6 @@ class Wikicode(StringMixIn):
        return "\n".join(self._get_tree(self, [], marker, 0))

 Wikicode._build_filter_methods(
    arguments=Argument, comments=Comment, headings=Heading,
    html_entities=HTMLEntity, tags=Tag, templates=Template, text=Text,
    wikilinks=Wikilink)
    arguments=Argument, comments=Comment, external_links=ExternalLink,
    headings=Heading, html_entities=HTMLEntity, tags=Tag, templates=Template,
    text=Text, wikilinks=Wikilink)
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -23,8 +23,8 @@
 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
                                    Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading,
                                    HTMLEntity, Tag, Template, Text, Wikilink)
 from mwparserfromhell.nodes.extras import Attribute, Parameter
 from mwparserfromhell.parser import tokens
 from mwparserfromhell.parser.builder import Builder
@@ -150,6 +150,48 @@ class TestBuilder(TreeEqualityTestCase):
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_external_link(self):
        """tests for building ExternalLink nodes"""
        tests = [
            ([tokens.ExternalLinkOpen(brackets=False),
              tokens.Text(text="http://example.com/"),
              tokens.ExternalLinkClose()],
             wrap([ExternalLink(wraptext("http://example.com/"),
                                brackets=False)])),

            ([tokens.ExternalLinkOpen(brackets=True),
              tokens.Text(text="http://example.com/"),
              tokens.ExternalLinkClose()],
             wrap([ExternalLink(wraptext("http://example.com/"))])),

            ([tokens.ExternalLinkOpen(brackets=True),
              tokens.Text(text="http://example.com/"),
              tokens.ExternalLinkSeparator(), tokens.ExternalLinkClose()],
             wrap([ExternalLink(wraptext("http://example.com/"), wrap([]))])),

            ([tokens.ExternalLinkOpen(brackets=True),
              tokens.Text(text="http://example.com/"),
              tokens.ExternalLinkSeparator(), tokens.Text(text="Example"),
              tokens.ExternalLinkClose()],
             wrap([ExternalLink(wraptext("http://example.com/"),
                                wraptext("Example"))])),

            ([tokens.ExternalLinkOpen(brackets=False),
              tokens.Text(text="http://example"), tokens.Text(text=".com/foo"),
              tokens.ExternalLinkClose()],
             wrap([ExternalLink(wraptext("http://example", ".com/foo"),
                                brackets=False)])),

            ([tokens.ExternalLinkOpen(brackets=True),
              tokens.Text(text="http://example"), tokens.Text(text=".com/foo"),
              tokens.ExternalLinkSeparator(), tokens.Text(text="Example"),
              tokens.Text(text=" Web Page"), tokens.ExternalLinkClose()],
             wrap([ExternalLink(wraptext("http://example", ".com/foo"),
                                wraptext("Example", " Web Page"))])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_html_entity(self):
        """tests for building HTMLEntity nodes"""
        tests = [
--- a/tests/test_external_link.py
+++ b/tests/test_external_link.py
@@ -0,0 +1,130 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import ExternalLink, Text

 from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext

 class TestExternalLink(TreeEqualityTestCase):
    """Test cases for the ExternalLink node."""

    def test_unicode(self):
        """test ExternalLink.__unicode__()"""
        node = ExternalLink(wraptext("http://example.com/"), brackets=False)
        self.assertEqual("http://example.com/", str(node))
        node2 = ExternalLink(wraptext("http://example.com/"))
        self.assertEqual("[http://example.com/]", str(node2))
        node3 = ExternalLink(wraptext("http://example.com/"), wrap([]))
        self.assertEqual("[http://example.com/ ]", str(node3))
        node4 = ExternalLink(wraptext("http://example.com/"),
                             wraptext("Example Web Page"))
        self.assertEqual("[http://example.com/ Example Web Page]", str(node4))

    def test_iternodes(self):
        """test ExternalLink.__iternodes__()"""
        node1n1 = Text("http://example.com/")
        node2n1 = Text("http://example.com/")
        node2n2, node2n3 = Text("Example"), Text("Page")
        node1 = ExternalLink(wrap([node1n1]), brackets=False)
        node2 = ExternalLink(wrap([node2n1]), wrap([node2n2, node2n3]))
        gen1 = node1.__iternodes__(getnodes)
        gen2 = node2.__iternodes__(getnodes)
        self.assertEqual((None, node1), next(gen1))
        self.assertEqual((None, node2), next(gen2))
        self.assertEqual((node1.url, node1n1), next(gen1))
        self.assertEqual((node2.url, node2n1), next(gen2))
        self.assertEqual((node2.title, node2n2), next(gen2))
        self.assertEqual((node2.title, node2n3), next(gen2))
        self.assertRaises(StopIteration, next, gen1)
        self.assertRaises(StopIteration, next, gen2)

    def test_strip(self):
        """test ExternalLink.__strip__()"""
        node1 = ExternalLink(wraptext("http://example.com"), brackets=False)
        node2 = ExternalLink(wraptext("http://example.com"))
        node3 = ExternalLink(wraptext("http://example.com"), wrap([]))
        node4 = ExternalLink(wraptext("http://example.com"), wraptext("Link"))
        for a in (True, False):
            for b in (True, False):
                self.assertEqual("http://example.com", node1.__strip__(a, b))
                self.assertEqual(None, node2.__strip__(a, b))
                self.assertEqual(None, node3.__strip__(a, b))
                self.assertEqual("Link", node4.__strip__(a, b))

    def test_showtree(self):
        """test ExternalLink.__showtree__()"""
        output = []
        getter, marker = object(), object()
        get = lambda code: output.append((getter, code))
        mark = lambda: output.append(marker)
        node1 = ExternalLink(wraptext("http://example.com"), brackets=False)
        node2 = ExternalLink(wraptext("http://example.com"), wraptext("Link"))
        node1.__showtree__(output.append, get, mark)
        node2.__showtree__(output.append, get, mark)
        valid = [
            (getter, node1.url), "[", (getter, node2.url),
            (getter, node2.title), "]"]
        self.assertEqual(valid, output)

    def test_url(self):
        """test getter/setter for the url attribute"""
        url = wraptext("http://example.com/")
        node1 = ExternalLink(url, brackets=False)
        node2 = ExternalLink(url, wraptext("Example"))
        self.assertIs(url, node1.url)
        self.assertIs(url, node2.url)
        node1.url = "mailto:héhehé@spam.com"
        node2.url = "mailto:héhehé@spam.com"
        self.assertWikicodeEqual(wraptext("mailto:héhehé@spam.com"), node1.url)
        self.assertWikicodeEqual(wraptext("mailto:héhehé@spam.com"), node2.url)

    def test_title(self):
        """test getter/setter for the title attribute"""
        title = wraptext("Example!")
        node1 = ExternalLink(wraptext("http://example.com/"), brackets=False)
        node2 = ExternalLink(wraptext("http://example.com/"), title)
        self.assertIs(None, node1.title)
        self.assertIs(title, node2.title)
        node2.title = None
        self.assertIs(None, node2.title)
        node2.title = "My Website"
        self.assertWikicodeEqual(wraptext("My Website"), node2.title)

    def test_brackets(self):
        """test getter/setter for the brackets attribute"""
        node1 = ExternalLink(wraptext("http://example.com/"), brackets=False)
        node2 = ExternalLink(wraptext("http://example.com/"), wraptext("Link"))
        self.assertFalse(node1.brackets)
        self.assertTrue(node2.brackets)
        node1.brackets = True
        node2.brackets = False
        self.assertTrue(node1.brackets)
        self.assertFalse(node2.brackets)
        self.assertEqual("[http://example.com/]", str(node1))
        self.assertEqual("http://example.com/", str(node2))

 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -36,9 +36,9 @@ class TestParser(TreeEqualityTestCase):
    def test_use_c(self):
        """make sure the correct tokenizer is used"""
        if parser.use_c:
            self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
            self.assertTrue(parser.Parser()._tokenizer.USES_C)
            parser.use_c = False
        self.assertFalse(parser.Parser(None)._tokenizer.USES_C)
        self.assertFalse(parser.Parser()._tokenizer.USES_C)

    def test_parsing(self):
        """integration test for parsing overall"""
@@ -59,7 +59,7 @@ class TestParser(TreeEqualityTestCase):
                ]))
            ])
        ])
        actual = parser.Parser(text).parse()
        actual = parser.Parser().parse(text)
        self.assertWikicodeEqual(expected, actual)

 if __name__ == "__main__":
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -276,6 +276,7 @@ class TestWikicode(TreeEqualityTestCase):
            self.assertEqual(["{{{e}}}"], get_filter("arguments"))
            self.assertIs(code.get(4), get_filter("arguments")[0])
            self.assertEqual([], get_filter("comments"))
            self.assertEqual([], get_filter("external_links"))
            self.assertEqual([], get_filter("headings"))
            self.assertEqual([], get_filter("html_entities"))
            self.assertEqual([], get_filter("tags"))
--- a/tests/tokenizer/external_links.mwtest
+++ b/tests/tokenizer/external_links.mwtest
@@ -0,0 +1,473 @@
 name:   basic
 label:  basic external link
 input:  "http://example.com/"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/"), ExternalLinkClose()]

 ---

 name:   basic_brackets
 label:  basic external link in brackets
 input:  "[http://example.com/]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/"), ExternalLinkClose()]

 ---

 name:   brackets_space
 label:  basic external link in brackets, with a space after
 input:  "[http://example.com/ ]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/"), ExternalLinkSeparator(), ExternalLinkClose()]

 ---

 name:   brackets_title
 label:  basic external link in brackets, with a title
 input:  "[http://example.com/ Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   brackets_multiword_title
 label:  basic external link in brackets, with a multi-word title
 input:  "[http://example.com/ Example Web Page]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/"), ExternalLinkSeparator(), Text(text="Example Web Page"), ExternalLinkClose()]

 ---

 name:   brackets_adjacent
 label:  three adjacent bracket-enclosed external links
 input:  "[http://foo.com/ Foo][http://bar.com/ Bar]\n[http://baz.com/ Baz]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://foo.com/"), ExternalLinkSeparator(), Text(text="Foo"), ExternalLinkClose(), ExternalLinkOpen(brackets=True), Text(text="http://bar.com/"), ExternalLinkSeparator(), Text(text="Bar"), ExternalLinkClose(), Text(text="\n"), ExternalLinkOpen(brackets=True), Text(text="http://baz.com/"), ExternalLinkSeparator(), Text(text="Baz"), ExternalLinkClose()]

 ---

 name:   brackets_newline_before
 label:  bracket-enclosed link with a newline before the title
 input:  "[http://example.com/ \nExample]"
 output: [Text(text="["), ExternalLinkOpen(brackets=False), Text(text="http://example.com/"), ExternalLinkClose(), Text(text=" \nExample]")]

 ---

 name:   brackets_newline_inside
 label:  bracket-enclosed link with a newline in the title
 input:  "[http://example.com/ Example \nWeb Page]"
 output: [Text(text="["), ExternalLinkOpen(brackets=False), Text(text="http://example.com/"), ExternalLinkClose(), Text(text=" Example \nWeb Page]")]

 ---

 name:   brackets_newline_after
 label:  bracket-enclosed link with a newline after the title
 input:  "[http://example.com/ Example\n]"
 output: [Text(text="["), ExternalLinkOpen(brackets=False), Text(text="http://example.com/"), ExternalLinkClose(), Text(text=" Example\n]")]

 ---

 name:   brackets_space_before
 label:  bracket-enclosed link with a space before the URL
 input:  "[ http://example.com Example]"
 output: [Text(text="[ "), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text=" Example]")]

 ---

 name:   brackets_title_like_url
 label:  bracket-enclosed link with a title that looks like a URL
 input:  "[http://example.com http://example.com]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="http://example.com"), ExternalLinkClose()]

 ---

 name:   brackets_recursive
 label:  bracket-enclosed link with a bracket-enclosed link as the title
 input:  "[http://example.com [http://example.com]]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="[http://example.com"), ExternalLinkClose(), Text(text="]")]

 ---

 name:   period_after
 label:  a period after a free link that is excluded
 input:  "http://example.com."
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text=".")]

 ---

 name:   colons_after
 label:  colons after a free link that are excluded
 input:  "http://example.com/foo:bar.:;baz!?,"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo:bar.:;baz"), ExternalLinkClose(), Text(text="!?,")]

 ---

 name:   close_paren_after_excluded
 label:  a closing parenthesis after a free link that is excluded
 input:  "http://example.)com)"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.)com"), ExternalLinkClose(), Text(text=")")]

 ---

 name:   close_paren_after_included
 label:  a closing parenthesis after a free link that is included because of an opening parenthesis in the URL
 input:  "http://example.(com)"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.(com)"), ExternalLinkClose()]

 ---

 name:   open_bracket_inside
 label:  an open bracket inside a free link that causes it to be ended abruptly
 input:  "http://foobar[baz.com"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://foobar"), ExternalLinkClose(), Text(text="[baz.com")]

 ---

 name:   brackets_period_after
 label:  a period after a bracket-enclosed link that is included
 input:  "[http://example.com. Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com."), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   brackets_colons_after
 label:  colons after a bracket-enclosed link that are included
 input:  "[http://example.com/foo:bar.:;baz!?, Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo:bar.:;baz!?,"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   brackets_close_paren_after_included
 label:  a closing parenthesis after a bracket-enclosed link that is included
 input:  "[http://example.)com) Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.)com)"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   brackets_close_paren_after_included_2
 label:  a closing parenthesis after a bracket-enclosed link that is also included
 input:  "[http://example.(com) Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://example.(com)"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   brackets_open_bracket_inside
 label:  an open bracket inside a bracket-enclosed link that is also included
 input:  "[http://foobar[baz.com Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="http://foobar[baz.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   adjacent_space
 label:  two free links separated by a space
 input:  "http://example.com http://example.com"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text=" "), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose()]

 ---

 name:   adjacent_newline
 label:  two free links separated by a newline
 input:  "http://example.com\nhttp://example.com"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text="\n"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose()]

 ---

 name:   adjacent_close_bracket
 label:  two free links separated by a close bracket
 input:  "http://example.com]http://example.com"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose()]

 ---

 name:   html_entity_in_url
 label:  a HTML entity parsed correctly inside a free link
 input:  "http://exa&nbsp;mple.com/"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://exa"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="mple.com/"), ExternalLinkClose()]

 ---

 name:   template_in_url
 label:  a template parsed correctly inside a free link
 input:  "http://exa{{template}}mple.com/"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://exa"), TemplateOpen(), Text(text="template"), TemplateClose(), Text(text="mple.com/"), ExternalLinkClose()]

 ---

 name:   argument_in_url
 label:  an argument parsed correctly inside a free link
 input:  "http://exa{{{argument}}}mple.com/"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://exa"), ArgumentOpen(), Text(text="argument"), ArgumentClose(), Text(text="mple.com/"), ExternalLinkClose()]

 ---

 name:   wikilink_in_url
 label:  a wikilink that destroys a free link
 input:  "http://exa[[wikilink]]mple.com/"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://exa"), ExternalLinkClose(), WikilinkOpen(), Text(text="wikilink"), WikilinkClose(), Text(text="mple.com/")]

 ---

 name:   external_link_in_url
 label:  a bracketed link that destroys a free link
 input:  "http://exa[http://example.com/]mple.com/"
 output: [ExternalLinkOpen(brackets=False), Text(text="http://exa"), ExternalLinkClose(), ExternalLinkOpen(brackets=True), Text(text="http://example.com/"), ExternalLinkClose(), Text(text="mple.com/")]

 ---

 name:   spaces_padding
 label:  spaces padding a free link
 input:  "   http://example.com   "
 output: [Text(text="   "), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text="   ")]

 ---

 name:   text_and_spaces_padding
 label:  text and spaces padding a free link
 input:  "x   http://example.com   x"
 output: [Text(text="x   "), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), Text(text="   x")]

 ---

 name:   template_before
 label:  a template before a free link
 input:  "{{foo}}http://example.com"
 output: [TemplateOpen(), Text(text="foo"), TemplateClose(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose()]

 ---

 name:   spaces_padding_no_slashes
 label:  spaces padding a free link with no slashes after the colon
 input:  "   mailto:example@example.com   "
 output: [Text(text="   "), ExternalLinkOpen(brackets=False), Text(text="mailto:example@example.com"), ExternalLinkClose(), Text(text="   ")]

 ---

 name:   text_and_spaces_padding_no_slashes
 label:  text and spaces padding a free link with no slashes after the colon
 input:  "x   mailto:example@example.com   x"
 output: [Text(text="x   "), ExternalLinkOpen(brackets=False), Text(text="mailto:example@example.com"), ExternalLinkClose(), Text(text="   x")]

 ---

 name:   template_before_no_slashes
 label:  a template before a free link with no slashes after the colon
 input:  "{{foo}}mailto:example@example.com"
 output: [TemplateOpen(), Text(text="foo"), TemplateClose(), ExternalLinkOpen(brackets=False), Text(text="mailto:example@example.com"), ExternalLinkClose()]

 ---

 name:   no_slashes
 label:  a free link with no slashes after the colon
 input:  "mailto:example@example.com"
 output: [ExternalLinkOpen(brackets=False), Text(text="mailto:example@example.com"), ExternalLinkClose()]

 ---

 name:   slashes_optional
 label:  a free link using a scheme that doesn't need slashes, but has them anyway
 input:  "mailto://example@example.com"
 output: [ExternalLinkOpen(brackets=False), Text(text="mailto://example@example.com"), ExternalLinkClose()]

 ---

 name:   short
 label:  a very short free link
 input:  "mailto://abc"
 output: [ExternalLinkOpen(brackets=False), Text(text="mailto://abc"), ExternalLinkClose()]

 ---

 name:   slashes_missing
 label:  slashes missing from a free link with a scheme that requires them
 input:  "http:example@example.com"
 output: [Text(text="http:example@example.com")]

 ---

 name:   no_scheme_but_slashes
 label:  no scheme in a free link, but slashes (protocol-relative free links are not supported)
 input:  "//example.com"
 output: [Text(text="//example.com")]

 ---

 name:   no_scheme_but_colon
 label:  no scheme in a free link, but a colon
 input:  " :example.com"
 output: [Text(text=" :example.com")]

 ---

 name:   no_scheme_but_colon_and_slashes
 label:  no scheme in a free link, but a colon and slashes
 input:  " ://example.com"
 output: [Text(text=" ://example.com")]

 ---

 name:   fake_scheme_no_slashes
 label:  a nonexistent scheme in a free link, without slashes
 input:  "fake:example.com"
 output: [Text(text="fake:example.com")]

 ---

 name:   fake_scheme_slashes
 label:  a nonexistent scheme in a free link, with slashes
 input:  "fake://example.com"
 output: [Text(text="fake://example.com")]

 ---

 name:   fake_scheme_brackets_no_slashes
 label:  a nonexistent scheme in a bracketed link, without slashes
 input:  "[fake:example.com]"
 output: [Text(text="[fake:example.com]")]

 ---

 name:   fake_scheme_brackets_slashes
 label:  #=a nonexistent scheme in a bracketed link, with slashes
 input:  "[fake://example.com]"
 output: [Text(text="[fake://example.com]")]

 ---

 name:   interrupted_scheme
 label:  an otherwise valid scheme with something in the middle of it, in a free link
 input:  "ht?tp://example.com"
 output: [Text(text="ht?tp://example.com")]

 ---

 name:   interrupted_scheme_brackets
 label:  an otherwise valid scheme with something in the middle of it, in a bracketed link
 input:  "[ht?tp://example.com]"
 output: [Text(text="[ht?tp://example.com]")]

 ---

 name:   no_slashes_brackets
 label:  no slashes after the colon in a bracketed link
 input:  "[mailto:example@example.com Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="mailto:example@example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   space_before_no_slashes_brackets
 label:  a space before a bracketed link with no slashes after the colon
 input:  "[ mailto:example@example.com Example]"
 output: [Text(text="[ "), ExternalLinkOpen(brackets=False), Text(text="mailto:example@example.com"), ExternalLinkClose(), Text(text=" Example]")]

 ---

 name:   slashes_optional_brackets
 label:  a bracketed link using a scheme that doesn't need slashes, but has them anyway
 input:  "[mailto://example@example.com Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="mailto://example@example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   short_brackets
 label:  a very short link in brackets
 input:  "[mailto://abc Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="mailto://abc"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   slashes_missing_brackets
 label:  slashes missing from a scheme that requires them in a bracketed link
 input:  "[http:example@example.com Example]"
 output: [Text(text="[http:example@example.com Example]")]

 ---

 name:   protcol_relative
 label:  a protocol-relative link (in brackets)
 input:  "[//example.com Example]"
 output: [ExternalLinkOpen(brackets=True), Text(text="//example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()]

 ---

 name:   scheme_missing_but_colon_brackets
 label:  scheme missing from a bracketed link, but with a colon
 input:  "[:example.com Example]"
 output: [Text(text="[:example.com Example]")]

 ---

 name:   scheme_missing_but_colon_slashes_brackets
 label:  scheme missing from a bracketed link, but with a colon and slashes
 input:  "[://example.com Example]"
 output: [Text(text="[://example.com Example]")]

 ---

 name:   unclosed_protocol_relative
 label:  an unclosed protocol-relative bracketed link
 input:  "[//example.com"
 output: [Text(text="[//example.com")]

 ---

 name:   space_before_protcol_relative
 label:  a space before a protocol-relative bracketed link
 input:  "[ //example.com]"
 output: [Text(text="[ //example.com]")]

 ---

 name:   unclosed_just_scheme
 label:  an unclosed bracketed link, ending after the scheme
 input:  "[http"
 output: [Text(text="[http")]

 ---

 name:   unclosed_scheme_colon
 label:  an unclosed bracketed link, ending after the colon
 input:  "[http:"
 output: [Text(text="[http:")]

 ---

 name:   unclosed_scheme_colon_slashes
 label:  an unclosed bracketed link, ending after the slashes
 input:  "[http://"
 output: [Text(text="[http://")]

 ---

 name:   incomplete_bracket
 label:  just an open bracket
 input:  "["
 output: [Text(text="[")]

 ---

 name:   incomplete_scheme_colon
 label:  a free link with just a scheme and a colon
 input:  "http:"
 output: [Text(text="http:")]

 ---

 name:   incomplete_scheme_colon_slashes
 label:  a free link with just a scheme, colon, and slashes
 input:  "http://"
 output: [Text(text="http://")]

 ---

 name:   brackets_scheme_but_no_url
 label:  brackets around a scheme and a colon
 input:  "[mailto:]"
 output: [Text(text="[mailto:]")]

 ---

 name:   brackets_scheme_slashes_but_no_url
 label:  brackets around a scheme, colon, and slashes
 input:  "[http://]"
 output: [Text(text="[http://]")]

 ---

 name:   brackets_scheme_title_but_no_url
 label:  brackets around a scheme, colon, and slashes, with a title
 input:  "[http:// Example]"
 output: [Text(text="[http:// Example]")]
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -12,6 +12,13 @@ output: [TemplateOpen(), ArgumentOpen(), ArgumentOpen(), Text(text="foo"), Argum

 ---

 name:   link_in_template_name
 label:  a wikilink inside a template name, which breaks the template
 input:  "{{foo[[bar]]}}"
 output: [Text(text="{{foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="}}")]

 ---

 name:   rich_heading
 label:  a heading with templates/wikilinks in it
 input:  "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} =="
@@ -51,3 +58,17 @@ name:   wildcard_redux
 label:  an even wilder assortment of various things
 input:  "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j=&nbsp;}}"
 output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()]

 ---

 name:   link_inside_dl
 label:  an external link inside a def list, such that the external link is parsed
 input:  ";;;mailto:example"
 output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), ExternalLinkOpen(brackets=False), Text(text="mailto:example"), ExternalLinkClose()]

 ---

 name:   link_inside_dl_2
 label:  an external link inside a def list, such that the external link is not parsed
 input:  ";;;malito:example"
 output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="malito"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="example")]
--- a/tests/tokenizer/wikilinks.mwtest
+++ b/tests/tokenizer/wikilinks.mwtest
@@ -40,17 +40,17 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar|b

 ---

 name:   nested
 label:  a wikilink nested within the value of another
 input:  "[[foo|[[bar]]]]"
 output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()]
 name:   newline_text
 label:  a newline in the middle of the text
 input:  "[[foo|foo\nbar]]"
 output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="foo\nbar"), WikilinkClose()]

 ---

 name:   nested_with_text
 label:  a wikilink nested within the value of another, separated by other data
 input:  "[[foo|a[[b]]c]]"
 output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()]
 name:   bracket_text
 label:  a left bracket in the middle of the text
 input:  "[[foo|bar[baz]]"
 output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[baz"), WikilinkClose()]

 ---

@@ -96,13 +96,34 @@ output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(),

 ---

 name:   invalid_nested_text
 name:   invalid_nested_padding
 label:  invalid wikilink: trying to nest in the wrong context, with a text param
 input:  "[[foo[[bar]]|baz]]"
 output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="|baz]]")]

 ---

 name:   invalid_nested_text
 label:  invalid wikilink: a wikilink nested within the value of another
 input:  "[[foo|[[bar]]"
 output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()]

 ---

 name:   invalid_nested_text_2
 label:  invalid wikilink: a wikilink nested within the value of another, two pairs of closing brackets
 input:  "[[foo|[[bar]]]]"
 output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")]

 ---

 name:   invalid_nested_text_padding
 label:  invalid wikilink: a wikilink nested within the value of another, separated by other data
 input:  "[[foo|a[[b]]c]]"
 output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")]

 ---

 name:   incomplete_open_only
 label:  incomplete wikilinks: just an open
 input:  "[["