Merge branch 'feature/html_tags' into develop (#9)

11 年之前 · c568bcbaf4
--- a/docs/api/mwparserfromhell.nodes.rst
+++ b/docs/api/mwparserfromhell.nodes.rst
@@ -46,6 +46,7 @@ nodes Package

 .. automodule:: mwparserfromhell.nodes.tag
    :members:
    :undoc-members:
    :show-inheritance:

 :mod:`template` Module
--- a/docs/api/mwparserfromhell.rst
+++ b/docs/api/mwparserfromhell.rst
@@ -30,6 +30,12 @@ mwparserfromhell Package
    :members:
    :undoc-members:

 :mod:`tag_defs` Module
 ----------------------

 .. automodule:: mwparserfromhell.tag_defs
    :members:

 :mod:`utils` Module
 -------------------

--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -36,18 +36,23 @@ class Attribute(StringMixIn):
    whose value is ``"foo"``.
    """

    def __init__(self, name, value=None, quoted=True):
    def __init__(self, name, value=None, quoted=True, pad_first="",
                 pad_before_eq="", pad_after_eq=""):
        super(Attribute, self).__init__()
        self._name = name
        self._value = value
        self._quoted = quoted
        self._pad_first = pad_first
        self._pad_before_eq = pad_before_eq
        self._pad_after_eq = pad_after_eq

    def __unicode__(self):
        base = self.pad_first + str(self.name) + self.pad_before_eq
        if self.value:
            if self.quoted:
                return str(self.name) + '="' + str(self.value) + '"'
            return str(self.name) + "=" + str(self.value)
        return str(self.name)
                return base + '="' + self.pad_after_eq + str(self.value) + '"'
            return base + "=" + self.pad_after_eq + str(self.value)
        return base

    @property
    def name(self):
@@ -64,14 +69,41 @@ class Attribute(StringMixIn):
        """Whether the attribute's value is quoted with double quotes."""
        return self._quoted

    @property
    def pad_first(self):
        """Spacing to insert right before the attribute."""
        return self._pad_first

    @property
    def pad_before_eq(self):
        """Spacing to insert right before the equal sign."""
        return self._pad_before_eq

    @property
    def pad_after_eq(self):
        """Spacing to insert right after the equal sign."""
        return self._pad_after_eq

    @name.setter
    def name(self, newval):
        self._name = parse_anything(newval)
    def name(self, value):
        self._name = parse_anything(value)

    @value.setter
    def value(self, newval):
        self._value = parse_anything(newval)

    @quoted.setter
    def quoted(self, newval):
        self._quoted = bool(newval)
    def quoted(self, value):
        self._quoted = bool(value)

    @pad_first.setter
    def pad_first(self, value):
        self._pad_first = str(value)

    @pad_before_eq.setter
    def pad_before_eq(self, value):
        self._pad_before_eq = str(value)

    @pad_after_eq.setter
    def pad_after_eq(self, value):
        self._pad_after_eq = str(value)
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -24,6 +24,7 @@ from __future__ import unicode_literals

 from . import Node, Text
 from ..compat import str
 from ..tag_defs import get_wikicode, is_visible
 from ..utils import parse_anything

 __all__ = ["Tag"]
@@ -31,79 +32,39 @@ __all__ = ["Tag"]
 class Tag(Node):
    """Represents an HTML-style tag in wikicode, like ``<ref>``."""

    TAG_UNKNOWN = 0

    # Basic HTML:
    TAG_ITALIC = 1
    TAG_BOLD = 2
    TAG_UNDERLINE = 3
    TAG_STRIKETHROUGH = 4
    TAG_UNORDERED_LIST = 5
    TAG_ORDERED_LIST = 6
    TAG_DEF_TERM = 7
    TAG_DEF_ITEM = 8
    TAG_BLOCKQUOTE = 9
    TAG_RULE = 10
    TAG_BREAK = 11
    TAG_ABBR = 12
    TAG_PRE = 13
    TAG_MONOSPACE = 14
    TAG_CODE = 15
    TAG_SPAN = 16
    TAG_DIV = 17
    TAG_FONT = 18
    TAG_SMALL = 19
    TAG_BIG = 20
    TAG_CENTER = 21

    # MediaWiki parser hooks:
    TAG_REF = 101
    TAG_GALLERY = 102
    TAG_MATH = 103
    TAG_NOWIKI = 104
    TAG_NOINCLUDE = 105
    TAG_INCLUDEONLY = 106
    TAG_ONLYINCLUDE = 107

    # Additional parser hooks:
    TAG_SYNTAXHIGHLIGHT = 201
    TAG_POEM = 202

    # Lists of tags:
    TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE))
    TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE

    def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
                 self_closing=False, open_padding=0, close_padding=0):
    def __init__(self, tag, contents=None, attrs=None, showtag=True,
                 self_closing=False, invalid=False, implicit=False, padding="",
                 closing_tag=None):
        super(Tag, self).__init__()
        self._type = type_
        self._tag = tag
        self._contents = contents
        if attrs:
            self._attrs = attrs
        else:
            self._attrs = []
        self._attrs = attrs if attrs else []
        self._showtag = showtag
        self._self_closing = self_closing
        self._open_padding = open_padding
        self._close_padding = close_padding
        self._invalid = invalid
        self._implicit = implicit
        self._padding = padding
        if closing_tag:
            self._closing_tag = closing_tag
        elif not self_closing:
            self._closing_tag = tag

    def __unicode__(self):
        if not self.showtag:
            open_, close = self._translate()
            open_, close = get_wikicode[self.tag]
            if self.self_closing:
                return open_
            else:
                return open_ + str(self.contents) + close

        result = "<" + str(self.tag)
        if self.attrs:
            result += " " + " ".join([str(attr) for attr in self.attrs])
        result = ("</" if self.invalid else "<") + str(self.tag)
        if self.attributes:
            result += "".join([str(attr) for attr in self.attributes])
        if self.self_closing:
            result += " " * self.open_padding + "/>"
            result += self.padding + (">" if self.implicit else "/>")
        else:
            result += " " * self.open_padding + ">" + str(self.contents)
            result += "</" + str(self.tag) + " " * self.close_padding + ">"
            result += self.padding + ">" + str(self.contents)
            result += "</" + str(self.closing_tag) + ">"
        return result

    def __iternodes__(self, getter):
@@ -111,66 +72,43 @@ class Tag(Node):
        if self.showtag:
            for child in getter(self.tag):
                yield self.tag, child
            for attr in self.attrs:
            for attr in self.attributes:
                for child in getter(attr.name):
                    yield attr.name, child
                if attr.value:
                    for child in getter(attr.value):
                        yield attr.value, child
        for child in getter(self.contents):
            yield self.contents, child
        if self.contents:
            for child in getter(self.contents):
                yield self.contents, child
        if not self.self_closing and self.closing_tag:
            for child in getter(self.closing_tag):
                yield self.closing_tag, child

    def __strip__(self, normalize, collapse):
        if self.type in self.TAGS_VISIBLE:
        if is_visible(self.tag):
            return self.contents.strip_code(normalize, collapse)
        return None

    def __showtree__(self, write, get, mark):
        tagnodes = self.tag.nodes
        if (not self.attrs and len(tagnodes) == 1 and isinstance(tagnodes[0], Text)):
            write("<" + str(tagnodes[0]) + ">")
        write("</" if self.invalid else "<")
        get(self.tag)
        for attr in self.attributes:
            get(attr.name)
            if not attr.value:
                continue
            write("    = ")
            mark()
            get(attr.value)
        if self.self_closing:
            write(">" if self.implicit else "/>")
        else:
            write("<")
            get(self.tag)
            for attr in self.attrs:
                get(attr.name)
                if not attr.value:
                    continue
                write("    = ")
                mark()
                get(attr.value)
            write(">")
        get(self.contents)
        if len(tagnodes) == 1 and isinstance(tagnodes[0], Text):
            write("</" + str(tagnodes[0]) + ">")
        else:
            get(self.contents)
            write("</")
            get(self.tag)
            get(self.closing_tag)
            write(">")

    def _translate(self):
        """If the HTML-style tag has a wikicode representation, return that.

        For example, ``<b>Foo</b>`` can be represented as ``'''Foo'''``. This
        returns a tuple of the character starting the sequence and the
        character ending it.
        """
        translations = {
            self.TAG_ITALIC: ("''", "''"),
            self.TAG_BOLD: ("'''", "'''"),
            self.TAG_UNORDERED_LIST: ("*", ""),
            self.TAG_ORDERED_LIST: ("#", ""),
            self.TAG_DEF_TERM: (";", ""),
            self.TAG_DEF_ITEM: (":", ""),
            self.TAG_RULE: ("----", ""),
        }
        return translations[self.type]

    @property
    def type(self):
        """The tag type."""
        return self._type

    @property
    def tag(self):
        """The tag itself, as a :py:class:`~.Wikicode` object."""
@@ -182,7 +120,7 @@ class Tag(Node):
        return self._contents

    @property
    def attrs(self):
    def attributes(self):
        """The list of attributes affecting the tag.

        Each attribute is an instance of :py:class:`~.Attribute`.
@@ -196,29 +134,47 @@ class Tag(Node):

    @property
    def self_closing(self):
        """Whether the tag is self-closing with no content."""
        """Whether the tag is self-closing with no content (like ``<br/>``)."""
        return self._self_closing

    @property
    def open_padding(self):
        """How much spacing to insert before the first closing >."""
        return self._open_padding
    def invalid(self):
        """Whether the tag starts with a backslash after the opening bracket.

        This makes the tag look like a lone close tag. It is technically
        invalid and is only parsable Wikicode when the tag itself is
        single-only, like ``<br>`` and ``<img>``. See
        :py:func:`tag_defs.is_single_only`.
        """
        return self._invalid

    @property
    def close_padding(self):
        """How much spacing to insert before the last closing >."""
        return self._close_padding
    def implicit(self):
        """Whether the tag is implicitly self-closing, with no ending slash.

    @type.setter
    def type(self, value):
        value = int(value)
        if value not in self.TAGS_INVISIBLE | self.TAGS_VISIBLE:
            raise ValueError(value)
        self._type = value
        This is only possible for specific "single" tags like ``<br>`` and
        ``<li>``. See :py:func:`tag_defs.is_single`. This field only has an
        effect if :py:attr:`self_closing` is also ``True``.
        """
        return self._implicit

    @property
    def padding(self):
        """Spacing to insert before the first closing ``>``."""
        return self._padding

    @property
    def closing_tag(self):
        """The closing tag, as a :py:class:`~.Wikicode` object.

        This will usually equal :py:attr:`tag`, unless there is additional
        spacing, comments, or the like.
        """
        return self._closing_tag

    @tag.setter
    def tag(self, value):
        self._tag = parse_anything(value)
        self._tag = self._closing_tag = parse_anything(value)

    @contents.setter
    def contents(self, value):
@@ -232,10 +188,18 @@ class Tag(Node):
    def self_closing(self, value):
        self._self_closing = bool(value)

    @open_padding.setter
    def open_padding(self, value):
        self._open_padding = int(value)
    @invalid.setter
    def invalid(self, value):
        self._invalid = bool(value)

    @implicit.setter
    def implicit(self, value):
        self._implicit = bool(value)

    @padding.setter
    def padding(self, value):
        self._padding = str(value)

    @close_padding.setter
    def close_padding(self, value):
        self._close_padding = int(value)
    @closing_tag.setter
    def closing_tag(self, value):
        self._closing_tag = parse_anything(value)
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -170,7 +170,7 @@ class Builder(object):
                self._write(self._handle_token(token))

    def _handle_comment(self):
        """Handle a case where a hidden comment is at the head of the tokens."""
        """Handle a case where an HTML comment is at the head of the tokens."""
        self._push()
        while self._tokens:
            token = self._tokens.pop()
@@ -180,7 +180,7 @@ class Builder(object):
            else:
                self._write(self._handle_token(token))

    def _handle_attribute(self):
    def _handle_attribute(self, start):
        """Handle a case where a tag attribute is at the head of the tokens."""
        name, quoted = None, False
        self._push()
@@ -191,37 +191,47 @@ class Builder(object):
                self._push()
            elif isinstance(token, tokens.TagAttrQuote):
                quoted = True
            elif isinstance(token, (tokens.TagAttrStart,
                                    tokens.TagCloseOpen)):
            elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen,
                                    tokens.TagCloseSelfclose)):
                self._tokens.append(token)
                if name is not None:
                    return Attribute(name, self._pop(), quoted)
                return Attribute(self._pop(), quoted=quoted)
                if name:
                    value = self._pop()
                else:
                    name, value = self._pop(), None
                return Attribute(name, value, quoted, start.pad_first,
                                 start.pad_before_eq, start.pad_after_eq)
            else:
                self._write(self._handle_token(token))

    def _handle_tag(self, token):
        """Handle a case where a tag is at the head of the tokens."""
        type_, showtag = token.type, token.showtag
        attrs = []
        close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
        implicit, attrs, contents, closing_tag = False, [], None, None
        showtag = token.get("showtag", True)
        invalid = token.get("invalid", False)
        self._push()
        while self._tokens:
            token = self._tokens.pop()
            if isinstance(token, tokens.TagAttrStart):
                attrs.append(self._handle_attribute())
                attrs.append(self._handle_attribute(token))
            elif isinstance(token, tokens.TagCloseOpen):
                open_pad = token.padding
                padding = token.padding
                tag = self._pop()
                self._push()
            elif isinstance(token, tokens.TagCloseSelfclose):
                tag = self._pop()
                return Tag(type_, tag, attrs=attrs, showtag=showtag,
                           self_closing=True, open_padding=token.padding)
            elif isinstance(token, tokens.TagOpenClose):
                contents = self._pop()
            elif isinstance(token, tokens.TagCloseClose):
                return Tag(type_, tag, contents, attrs, showtag, False,
                           open_pad, token.padding)
                self._push()
            elif isinstance(token, close_tokens):
                if isinstance(token, tokens.TagCloseSelfclose):
                    tag = self._pop()
                    self_closing = True
                    padding = token.padding
                    implicit = token.get("implicit", False)
                else:
                    self_closing = False
                    closing_tag = self._pop()
                return Tag(tag, contents, attrs, showtag, self_closing,
                           invalid, implicit, padding, closing_tag)
            else:
                self._write(self._handle_token(token))

--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -62,6 +62,13 @@ Local (stack-specific) contexts:

 * :py:const:`COMMENT`

 * :py:const:`TAG`

    * :py:const:`TAG_OPEN`
    * :py:const:`TAG_ATTR`
    * :py:const:`TAG_BODY`
    * :py:const:`TAG_CLOSE`

 * :py:const:`SAFETY_CHECK`

    * :py:const:`HAS_TEXT`
@@ -78,37 +85,45 @@ Global contexts:

 # Local contexts:

 TEMPLATE =              0b00000000000000000111
 TEMPLATE_NAME =         0b00000000000000000001
 TEMPLATE_PARAM_KEY =    0b00000000000000000010
 TEMPLATE_PARAM_VALUE =  0b00000000000000000100

 ARGUMENT =              0b00000000000000011000
 ARGUMENT_NAME =         0b00000000000000001000
 ARGUMENT_DEFAULT =      0b00000000000000010000

 WIKILINK =              0b00000000000001100000
 WIKILINK_TITLE =        0b00000000000000100000
 WIKILINK_TEXT =         0b00000000000001000000

 HEADING =               0b00000001111110000000
 HEADING_LEVEL_1 =       0b00000000000010000000
 HEADING_LEVEL_2 =       0b00000000000100000000
 HEADING_LEVEL_3 =       0b00000000001000000000
 HEADING_LEVEL_4 =       0b00000000010000000000
 HEADING_LEVEL_5 =       0b00000000100000000000
 HEADING_LEVEL_6 =       0b00000001000000000000

 COMMENT =               0b00000010000000000000

 SAFETY_CHECK =          0b11111100000000000000
 HAS_TEXT =              0b00000100000000000000
 FAIL_ON_TEXT =          0b00001000000000000000
 FAIL_NEXT  =            0b00010000000000000000
 FAIL_ON_LBRACE =        0b00100000000000000000
 FAIL_ON_RBRACE =        0b01000000000000000000
 FAIL_ON_EQUALS =        0b10000000000000000000
 TEMPLATE_NAME =        1 << 0
 TEMPLATE_PARAM_KEY =   1 << 1
 TEMPLATE_PARAM_VALUE = 1 << 2
 TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE

 ARGUMENT_NAME =    1 << 3
 ARGUMENT_DEFAULT = 1 << 4
 ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT

 WIKILINK_TITLE = 1 << 5
 WIKILINK_TEXT =  1 << 6
 WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT

 HEADING_LEVEL_1 = 1 << 7
 HEADING_LEVEL_2 = 1 << 8
 HEADING_LEVEL_3 = 1 << 9
 HEADING_LEVEL_4 = 1 << 10
 HEADING_LEVEL_5 = 1 << 11
 HEADING_LEVEL_6 = 1 << 12
 HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 +
           HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6)

 COMMENT = 1 << 13

 TAG_OPEN =  1 << 14
 TAG_ATTR =  1 << 15
 TAG_BODY =  1 << 16
 TAG_CLOSE = 1 << 17
 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE

 HAS_TEXT =       1 << 18
 FAIL_ON_TEXT =   1 << 19
 FAIL_NEXT  =     1 << 20
 FAIL_ON_LBRACE = 1 << 21
 FAIL_ON_RBRACE = 1 << 22
 FAIL_ON_EQUALS = 1 << 23
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                FAIL_ON_RBRACE + FAIL_ON_EQUALS)

 # Global contexts:

 GL_HEADING = 0b1
 GL_HEADING = 1 << 0
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -41,10 +41,10 @@ SOFTWARE.
 #define ALPHANUM  "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

 static const char* MARKERS[] = {
    "{",  "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
    "!", "\n", ""};
    "{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
    "\n", ""};

 #define NUM_MARKERS 18
 #define NUM_MARKERS 17
 #define TEXTBUFFER_BLOCKSIZE 1024
 #define MAX_DEPTH 40
 #define MAX_CYCLES 100000
@@ -60,10 +60,10 @@ static char** entitydefs;

 static PyObject* EMPTY;
 static PyObject* NOARGS;
 static PyObject* tokens;
 static PyObject* tag_defs;


 /* Tokens */
 /* Tokens: */

 static PyObject* Text;

@@ -102,41 +102,58 @@ static PyObject* TagCloseClose;

 /* Local contexts: */

 #define LC_TEMPLATE             0x00007
 #define LC_TEMPLATE_NAME        0x00001
 #define LC_TEMPLATE_PARAM_KEY   0x00002
 #define LC_TEMPLATE_PARAM_VALUE 0x00004

 #define LC_ARGUMENT             0x00018
 #define LC_ARGUMENT_NAME        0x00008
 #define LC_ARGUMENT_DEFAULT     0x00010

 #define LC_WIKILINK             0x00060
 #define LC_WIKILINK_TITLE       0x00020
 #define LC_WIKILINK_TEXT        0x00040

 #define LC_HEADING              0x01F80
 #define LC_HEADING_LEVEL_1      0x00080
 #define LC_HEADING_LEVEL_2      0x00100
 #define LC_HEADING_LEVEL_3      0x00200
 #define LC_HEADING_LEVEL_4      0x00400
 #define LC_HEADING_LEVEL_5      0x00800
 #define LC_HEADING_LEVEL_6      0x01000

 #define LC_COMMENT              0x02000

 #define LC_SAFETY_CHECK         0xFC000
 #define LC_HAS_TEXT             0x04000
 #define LC_FAIL_ON_TEXT         0x08000
 #define LC_FAIL_NEXT            0x10000
 #define LC_FAIL_ON_LBRACE       0x20000
 #define LC_FAIL_ON_RBRACE       0x40000
 #define LC_FAIL_ON_EQUALS       0x80000
 #define LC_TEMPLATE             0x000007
 #define LC_TEMPLATE_NAME        0x000001
 #define LC_TEMPLATE_PARAM_KEY   0x000002
 #define LC_TEMPLATE_PARAM_VALUE 0x000004

 #define LC_ARGUMENT             0x000018
 #define LC_ARGUMENT_NAME        0x000008
 #define LC_ARGUMENT_DEFAULT     0x000010

 #define LC_WIKILINK             0x000060
 #define LC_WIKILINK_TITLE       0x000020
 #define LC_WIKILINK_TEXT        0x000040

 #define LC_HEADING              0x001F80
 #define LC_HEADING_LEVEL_1      0x000080
 #define LC_HEADING_LEVEL_2      0x000100
 #define LC_HEADING_LEVEL_3      0x000200
 #define LC_HEADING_LEVEL_4      0x000400
 #define LC_HEADING_LEVEL_5      0x000800
 #define LC_HEADING_LEVEL_6      0x001000

 #define LC_COMMENT              0x002000

 #define LC_TAG                  0x03C000
 #define LC_TAG_OPEN             0x004000
 #define LC_TAG_ATTR             0x008000
 #define LC_TAG_BODY             0x010000
 #define LC_TAG_CLOSE            0x020000

 #define LC_SAFETY_CHECK         0xFC0000
 #define LC_HAS_TEXT             0x040000
 #define LC_FAIL_ON_TEXT         0x080000
 #define LC_FAIL_NEXT            0x100000
 #define LC_FAIL_ON_LBRACE       0x200000
 #define LC_FAIL_ON_RBRACE       0x400000
 #define LC_FAIL_ON_EQUALS       0x800000

 /* Global contexts: */

 #define GL_HEADING 0x1

 /* Tag contexts: */

 #define TAG_NAME        0x01
 #define TAG_ATTR_READY  0x02
 #define TAG_ATTR_NAME   0x04
 #define TAG_ATTR_VALUE  0x08
 #define TAG_QUOTED      0x10
 #define TAG_NOTE_SPACE  0x20
 #define TAG_NOTE_EQUALS 0x40
 #define TAG_NOTE_QUOTE  0x80


 /* Miscellaneous structs: */

@@ -158,13 +175,24 @@ typedef struct {
    int level;
 } HeadingData;

 typedef struct {
    int context;
    struct Textbuffer* pad_first;
    struct Textbuffer* pad_before_eq;
    struct Textbuffer* pad_after_eq;
    Py_ssize_t reset;
 } TagData;

 typedef struct Textbuffer Textbuffer;
 typedef struct Stack Stack;


 /* Tokenizer object definition: */

 typedef struct {
    PyObject_HEAD
    PyObject* text;         /* text to tokenize */
    struct Stack* topstack; /* topmost stack */
    Stack* topstack;        /* topmost stack */
    Py_ssize_t head;        /* current position in text */
    Py_ssize_t length;      /* length of text */
    int global;             /* global context */
@@ -176,49 +204,31 @@ typedef struct {
 /* Macros for accessing Tokenizer data: */

 #define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta)))
 #define Tokenizer_READ_BACKWARDS(self, delta) \
                (*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta)))
 #define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES)


 /* Macros for accessing HTML tag definitions: */

 #define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag))
 #define IS_SINGLE(tag) (call_tag_def_func("is_single", tag))
 #define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag))


 /* Function prototypes: */

 static int heading_level_from_context(int);
 static Textbuffer* Textbuffer_new(void);
 static void Textbuffer_dealloc(Textbuffer*);

 static TagData* TagData_new(void);
 static void TagData_dealloc(TagData*);

 static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
 static struct Textbuffer* Textbuffer_new(void);
 static void Tokenizer_dealloc(Tokenizer*);
 static void Textbuffer_dealloc(struct Textbuffer*);
 static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
 static int Tokenizer_push(Tokenizer*, int);
 static PyObject* Textbuffer_render(struct Textbuffer*);
 static int Tokenizer_push_textbuffer(Tokenizer*);
 static void Tokenizer_delete_top_of_stack(Tokenizer*);
 static PyObject* Tokenizer_pop(Tokenizer*);
 static PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
 static void* Tokenizer_fail_route(Tokenizer*);
 static int Tokenizer_write(Tokenizer*, PyObject*);
 static int Tokenizer_write_first(Tokenizer*, PyObject*);
 static int Tokenizer_write_text(Tokenizer*, Py_UNICODE);
 static int Tokenizer_write_all(Tokenizer*, PyObject*);
 static int Tokenizer_write_text_then_stack(Tokenizer*, const char*);
 static PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t);
 static PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t);
 static int Tokenizer_parse_template_or_argument(Tokenizer*);
 static int Tokenizer_parse_template(Tokenizer*);
 static int Tokenizer_parse_argument(Tokenizer*);
 static int Tokenizer_handle_template_param(Tokenizer*);
 static int Tokenizer_handle_template_param_value(Tokenizer*);
 static PyObject* Tokenizer_handle_template_end(Tokenizer*);
 static int Tokenizer_handle_argument_separator(Tokenizer*);
 static PyObject* Tokenizer_handle_argument_end(Tokenizer*);
 static int Tokenizer_parse_wikilink(Tokenizer*);
 static int Tokenizer_handle_wikilink_separator(Tokenizer*);
 static PyObject* Tokenizer_handle_wikilink_end(Tokenizer*);
 static int Tokenizer_parse_heading(Tokenizer*);
 static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
 static int Tokenizer_really_parse_entity(Tokenizer*);
 static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
 static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
 static PyObject* Tokenizer_parse(Tokenizer*, int);
 static int Tokenizer_parse_tag(Tokenizer*);
 static PyObject* Tokenizer_parse(Tokenizer*, int, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);


--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -24,9 +24,9 @@ from __future__ import unicode_literals
 from math import log
 import re

 from . import contexts
 from . import tokens
 from . import contexts, tokens
 from ..compat import htmlentities
 from ..tag_defs import is_parsable, is_single, is_single_only

 __all__ = ["Tokenizer"]

@@ -35,16 +35,34 @@ class BadRoute(Exception):
    pass


 class _TagOpenData(object):
    """Stores data about an HTML open tag, like ``<ref name="foo">``."""
    CX_NAME =        1 << 0
    CX_ATTR_READY =  1 << 1
    CX_ATTR_NAME =   1 << 2
    CX_ATTR_VALUE =  1 << 3
    CX_QUOTED =      1 << 4
    CX_NOTE_SPACE =  1 << 5
    CX_NOTE_EQUALS = 1 << 6
    CX_NOTE_QUOTE =  1 << 7

    def __init__(self):
        self.context = self.CX_NAME
        self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""}
        self.reset = 0


 class Tokenizer(object):
    """Creates a list of tokens from a string of wikicode."""
    USES_C = False
    START = object()
    END = object()
    MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
               "/", "-", "!", "\n", END]
               "/", "-", "\n", END]
    MAX_DEPTH = 40
    MAX_CYCLES = 100000
    regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE)
    regex = re.compile(r"([{}\[\]<>|=&#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
    tag_splitter = re.compile(r"([\s\"\\]+)")

    def __init__(self):
        self._text = None
@@ -117,33 +135,33 @@ class Tokenizer(object):
        self._pop()
        raise BadRoute()

    def _write(self, token):
    def _emit(self, token):
        """Write a token to the end of the current token stack."""
        self._push_textbuffer()
        self._stack.append(token)

    def _write_first(self, token):
    def _emit_first(self, token):
        """Write a token to the beginning of the current token stack."""
        self._push_textbuffer()
        self._stack.insert(0, token)

    def _write_text(self, text):
    def _emit_text(self, text):
        """Write text to the current textbuffer."""
        self._textbuffer.append(text)

    def _write_all(self, tokenlist):
    def _emit_all(self, tokenlist):
        """Write a series of tokens to the current stack at once."""
        if tokenlist and isinstance(tokenlist[0], tokens.Text):
            self._write_text(tokenlist.pop(0).text)
            self._emit_text(tokenlist.pop(0).text)
        self._push_textbuffer()
        self._stack.extend(tokenlist)

    def _write_text_then_stack(self, text):
    def _emit_text_then_stack(self, text):
        """Pop the current stack, write *text*, and then write the stack."""
        stack = self._pop()
        self._write_text(text)
        self._emit_text(text)
        if stack:
            self._write_all(stack)
            self._emit_all(stack)
        self._head -= 1

    def _read(self, delta=0, wrap=False, strict=False):
@@ -168,6 +186,30 @@ class Tokenizer(object):
                self._fail_route()
            return self.END

    def _parse_template(self):
        """Parse a template at the head of the wikicode string."""
        reset = self._head
        try:
            template = self._parse(contexts.TEMPLATE_NAME)
        except BadRoute:
            self._head = reset
            raise
        self._emit_first(tokens.TemplateOpen())
        self._emit_all(template)
        self._emit(tokens.TemplateClose())

    def _parse_argument(self):
        """Parse an argument at the head of the wikicode string."""
        reset = self._head
        try:
            argument = self._parse(contexts.ARGUMENT_NAME)
        except BadRoute:
            self._head = reset
            raise
        self._emit_first(tokens.ArgumentOpen())
        self._emit_all(argument)
        self._emit(tokens.ArgumentClose())

    def _parse_template_or_argument(self):
        """Parse a template or argument at the head of the wikicode string."""
        self._head += 2
@@ -179,12 +221,12 @@ class Tokenizer(object):

        while braces:
            if braces == 1:
                return self._write_text_then_stack("{")
                return self._emit_text_then_stack("{")
            if braces == 2:
                try:
                    self._parse_template()
                except BadRoute:
                    return self._write_text_then_stack("{{")
                    return self._emit_text_then_stack("{{")
                break
            try:
                self._parse_argument()
@@ -194,35 +236,13 @@ class Tokenizer(object):
                    self._parse_template()
                    braces -= 2
                except BadRoute:
                    return self._write_text_then_stack("{" * braces)
                    return self._emit_text_then_stack("{" * braces)
            if braces:
                self._head += 1

        self._write_all(self._pop())

    def _parse_template(self):
        """Parse a template at the head of the wikicode string."""
        reset = self._head
        try:
            template = self._parse(contexts.TEMPLATE_NAME)
        except BadRoute:
            self._head = reset
            raise
        self._write_first(tokens.TemplateOpen())
        self._write_all(template)
        self._write(tokens.TemplateClose())

    def _parse_argument(self):
        """Parse an argument at the head of the wikicode string."""
        reset = self._head
        try:
            argument = self._parse(contexts.ARGUMENT_NAME)
        except BadRoute:
            self._head = reset
            raise
        self._write_first(tokens.ArgumentOpen())
        self._write_all(argument)
        self._write(tokens.ArgumentClose())
        self._emit_all(self._pop())
        if self._context & contexts.FAIL_NEXT:
            self._context ^= contexts.FAIL_NEXT

    def _handle_template_param(self):
        """Handle a template parameter at the head of the string."""
@@ -231,22 +251,22 @@ class Tokenizer(object):
        elif self._context & contexts.TEMPLATE_PARAM_VALUE:
            self._context ^= contexts.TEMPLATE_PARAM_VALUE
        elif self._context & contexts.TEMPLATE_PARAM_KEY:
            self._write_all(self._pop(keep_context=True))
            self._emit_all(self._pop(keep_context=True))
        self._context |= contexts.TEMPLATE_PARAM_KEY
        self._write(tokens.TemplateParamSeparator())
        self._emit(tokens.TemplateParamSeparator())
        self._push(self._context)

    def _handle_template_param_value(self):
        """Handle a template parameter's value at the head of the string."""
        self._write_all(self._pop(keep_context=True))
        self._emit_all(self._pop(keep_context=True))
        self._context ^= contexts.TEMPLATE_PARAM_KEY
        self._context |= contexts.TEMPLATE_PARAM_VALUE
        self._write(tokens.TemplateParamEquals())
        self._emit(tokens.TemplateParamEquals())

    def _handle_template_end(self):
        """Handle the end of a template at the head of the string."""
        if self._context & contexts.TEMPLATE_PARAM_KEY:
            self._write_all(self._pop(keep_context=True))
            self._emit_all(self._pop(keep_context=True))
        self._head += 1
        return self._pop()

@@ -254,7 +274,7 @@ class Tokenizer(object):
        """Handle the separator between an argument's name and default."""
        self._context ^= contexts.ARGUMENT_NAME
        self._context |= contexts.ARGUMENT_DEFAULT
        self._write(tokens.ArgumentSeparator())
        self._emit(tokens.ArgumentSeparator())

    def _handle_argument_end(self):
        """Handle the end of an argument at the head of the string."""
@@ -269,17 +289,19 @@ class Tokenizer(object):
            wikilink = self._parse(contexts.WIKILINK_TITLE)
        except BadRoute:
            self._head = reset
            self._write_text("[[")
            self._emit_text("[[")
        else:
            self._write(tokens.WikilinkOpen())
            self._write_all(wikilink)
            self._write(tokens.WikilinkClose())
            if self._context & contexts.FAIL_NEXT:
                self._context ^= contexts.FAIL_NEXT
            self._emit(tokens.WikilinkOpen())
            self._emit_all(wikilink)
            self._emit(tokens.WikilinkClose())

    def _handle_wikilink_separator(self):
        """Handle the separator between a wikilink's title and its text."""
        self._context ^= contexts.WIKILINK_TITLE
        self._context |= contexts.WIKILINK_TEXT
        self._write(tokens.WikilinkSeparator())
        self._emit(tokens.WikilinkSeparator())

    def _handle_wikilink_end(self):
        """Handle the end of a wikilink at the head of the string."""
@@ -301,13 +323,13 @@ class Tokenizer(object):
            title, level = self._parse(context)
        except BadRoute:
            self._head = reset + best - 1
            self._write_text("=" * best)
            self._emit_text("=" * best)
        else:
            self._write(tokens.HeadingStart(level=level))
            self._emit(tokens.HeadingStart(level=level))
            if level < best:
                self._write_text("=" * (best - level))
            self._write_all(title)
            self._write(tokens.HeadingEnd())
                self._emit_text("=" * (best - level))
            self._emit_all(title)
            self._emit(tokens.HeadingEnd())
        finally:
            self._global ^= contexts.GL_HEADING

@@ -326,28 +348,28 @@ class Tokenizer(object):
            after, after_level = self._parse(self._context)
        except BadRoute:
            if level < best:
                self._write_text("=" * (best - level))
                self._emit_text("=" * (best - level))
            self._head = reset + best - 1
            return self._pop(), level
        else:  # Found another closure
            self._write_text("=" * best)
            self._write_all(after)
            self._emit_text("=" * best)
            self._emit_all(after)
            return self._pop(), after_level

    def _really_parse_entity(self):
        """Actually parse an HTML entity and ensure that it is valid."""
        self._write(tokens.HTMLEntityStart())
        self._emit(tokens.HTMLEntityStart())
        self._head += 1

        this = self._read(strict=True)
        if this == "#":
            numeric = True
            self._write(tokens.HTMLEntityNumeric())
            self._emit(tokens.HTMLEntityNumeric())
            self._head += 1
            this = self._read(strict=True)
            if this[0].lower() == "x":
                hexadecimal = True
                self._write(tokens.HTMLEntityHex(char=this[0]))
                self._emit(tokens.HTMLEntityHex(char=this[0]))
                this = this[1:]
                if not this:
                    self._fail_route()
@@ -373,8 +395,8 @@ class Tokenizer(object):
            if this not in htmlentities.entitydefs:
                self._fail_route()

        self._write(tokens.Text(text=this))
        self._write(tokens.HTMLEntityEnd())
        self._emit(tokens.Text(text=this))
        self._emit(tokens.HTMLEntityEnd())

    def _parse_entity(self):
        """Parse an HTML entity at the head of the wikicode string."""
@@ -384,9 +406,9 @@ class Tokenizer(object):
            self._really_parse_entity()
        except BadRoute:
            self._head = reset
            self._write_text(self._read())
            self._emit_text(self._read())
        else:
            self._write_all(self._pop())
            self._emit_all(self._pop())

    def _parse_comment(self):
        """Parse an HTML comment at the head of the wikicode string."""
@@ -396,13 +418,231 @@ class Tokenizer(object):
            comment = self._parse(contexts.COMMENT)
        except BadRoute:
            self._head = reset
            self._write_text("<!--")
            self._emit_text("<!--")
        else:
            self._write(tokens.CommentStart())
            self._write_all(comment)
            self._write(tokens.CommentEnd())
            self._emit(tokens.CommentStart())
            self._emit_all(comment)
            self._emit(tokens.CommentEnd())
            self._head += 2

    def _push_tag_buffer(self, data):
        """Write a pending tag attribute from *data* to the stack."""
        if data.context & data.CX_QUOTED:
            self._emit_first(tokens.TagAttrQuote())
            self._emit_all(self._pop())
        buf = data.padding_buffer
        self._emit_first(tokens.TagAttrStart(pad_first=buf["first"],
            pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"]))
        self._emit_all(self._pop())
        data.padding_buffer = {key: "" for key in data.padding_buffer}

    def _handle_tag_space(self, data, text):
        """Handle whitespace (*text*) inside of an HTML open tag."""
        ctx = data.context
        end_of_value = ctx & data.CX_ATTR_VALUE and not ctx & (data.CX_QUOTED | data.CX_NOTE_QUOTE)
        if end_of_value or (ctx & data.CX_QUOTED and ctx & data.CX_NOTE_SPACE):
            self._push_tag_buffer(data)
            data.context = data.CX_ATTR_READY
        elif ctx & data.CX_NOTE_SPACE:
            data.context = data.CX_ATTR_READY
        elif ctx & data.CX_ATTR_NAME:
            data.context |= data.CX_NOTE_EQUALS
            data.padding_buffer["before_eq"] += text
        if ctx & data.CX_QUOTED and not ctx & data.CX_NOTE_SPACE:
            self._emit_text(text)
        elif data.context & data.CX_ATTR_READY:
            data.padding_buffer["first"] += text
        elif data.context & data.CX_ATTR_VALUE:
            data.padding_buffer["after_eq"] += text

    def _handle_tag_text(self, text):
        """Handle regular *text* inside of an HTML open tag."""
        next = self._read(1)
        if not self._can_recurse() or text not in self.MARKERS:
            self._emit_text(text)
        elif text == next == "{":
            self._parse_template_or_argument()
        elif text == next == "[":
            self._parse_wikilink()
        elif text == "<":
            self._parse_tag()
        else:
            self._emit_text(text)

    def _handle_tag_data(self, data, text):
        """Handle all sorts of *text* data inside of an HTML open tag."""
        for chunk in self.tag_splitter.split(text):
            if not chunk:
                continue
            if data.context & data.CX_NAME:
                if chunk in self.MARKERS or chunk.isspace():
                    self._fail_route()  # Tags must start with text, not spaces
                data.context = data.CX_NOTE_SPACE
            elif chunk.isspace():
                self._handle_tag_space(data, chunk)
                continue
            elif data.context & data.CX_NOTE_SPACE:
                if data.context & data.CX_QUOTED:
                    data.context = data.CX_ATTR_VALUE
                    self._pop()
                    self._head = data.reset - 1  # Will be auto-incremented
                    return  # Break early
                self._fail_route()
            elif data.context & data.CX_ATTR_READY:
                data.context = data.CX_ATTR_NAME
                self._push(contexts.TAG_ATTR)
            elif data.context & data.CX_ATTR_NAME:
                if chunk == "=":
                    data.context = data.CX_ATTR_VALUE | data.CX_NOTE_QUOTE
                    self._emit(tokens.TagAttrEquals())
                    continue
                if data.context & data.CX_NOTE_EQUALS:
                    self._push_tag_buffer(data)
                    data.context = data.CX_ATTR_NAME
                    self._push(contexts.TAG_ATTR)
            elif data.context & data.CX_ATTR_VALUE:
                escaped = self._read(-1) == "\\" and self._read(-2) != "\\"
                if data.context & data.CX_NOTE_QUOTE:
                    data.context ^= data.CX_NOTE_QUOTE
                    if chunk == '"' and not escaped:
                        data.context |= data.CX_QUOTED
                        self._push(self._context)
                        data.reset = self._head
                        continue
                elif data.context & data.CX_QUOTED:
                    if chunk == '"' and not escaped:
                        data.context |= data.CX_NOTE_SPACE
                        continue
            self._handle_tag_text(chunk)

    def _handle_tag_close_open(self, data, token):
        """Handle the closing of a open tag (``<foo>``)."""
        if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
            self._push_tag_buffer(data)
        self._emit(token(padding=data.padding_buffer["first"]))
        self._head += 1

    def _handle_tag_open_close(self):
        """Handle the opening of a closing tag (``</foo>``)."""
        self._emit(tokens.TagOpenClose())
        self._push(contexts.TAG_CLOSE)
        self._head += 1

    def _handle_tag_close_close(self):
        """Handle the ending of a closing tag (``</foo>``)."""
        strip = lambda tok: tok.text.rstrip().lower()
        closing = self._pop()
        if len(closing) != 1 or (not isinstance(closing[0], tokens.Text) or
                                 strip(closing[0]) != strip(self._stack[1])):
            self._fail_route()
        self._emit_all(closing)
        self._emit(tokens.TagCloseClose())
        return self._pop()

    def _handle_blacklisted_tag(self):
        """Handle the body of an HTML tag that is parser-blacklisted."""
        while True:
            this, next = self._read(), self._read(1)
            self._head += 1
            if this is self.END:
                self._fail_route()
            elif this == "<" and next == "/":
                self._handle_tag_open_close()
                return self._parse(push=False)
            else:
                self._emit_text(this)

    def _handle_single_only_tag_end(self):
        """Handle the end of an implicitly closing single-only HTML tag."""
        padding = self._stack.pop().padding
        self._emit(tokens.TagCloseSelfclose(padding=padding, implicit=True))
        self._head -= 1  # Offset displacement done by _handle_tag_close_open
        return self._pop()

    def _handle_single_tag_end(self):
        """Handle the stream end when inside a single-supporting HTML tag."""
        gen = enumerate(self._stack)
        index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen))
        padding = self._stack[index].padding
        token = tokens.TagCloseSelfclose(padding=padding, implicit=True)
        self._stack[index] = token
        return self._pop()

    def _really_parse_tag(self):
        """Actually parse an HTML tag, starting with the open (``<foo>``)."""
        data = _TagOpenData()
        self._push(contexts.TAG_OPEN)
        self._emit(tokens.TagOpenOpen())
        while True:
            this, next = self._read(), self._read(1)
            can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or
                        data.context & data.CX_NOTE_SPACE)
            if this is self.END:
                if self._context & contexts.TAG_ATTR:
                    if data.context & data.CX_QUOTED:
                        # Unclosed attribute quote: reset, don't die
                        data.context = data.CX_ATTR_VALUE
                        self._pop()
                        self._head = data.reset
                        continue
                    self._pop()
                self._fail_route()
            elif this == ">" and can_exit:
                self._handle_tag_close_open(data, tokens.TagCloseOpen)
                self._context = contexts.TAG_BODY
                if is_single_only(self._stack[1].text):
                    return self._handle_single_only_tag_end()
                if is_parsable(self._stack[1].text):
                    return self._parse(push=False)
                return self._handle_blacklisted_tag()
            elif this == "/" and next == ">" and can_exit:
                self._handle_tag_close_open(data, tokens.TagCloseSelfclose)
                return self._pop()
            else:
                self._handle_tag_data(data, this)
            self._head += 1

    def _handle_invalid_tag_start(self):
        """Handle the (possible) start of an implicitly closing single tag."""
        reset = self._head + 1
        self._head += 2
        try:
            if not is_single_only(self.tag_splitter.split(self._read())[0]):
                raise BadRoute()
            tag = self._really_parse_tag()
        except BadRoute:
            self._head = reset
            self._emit_text("</")
        else:
            tag[0].invalid = True  # Set flag of TagOpenOpen
            self._emit_all(tag)

    def _parse_tag(self):
        """Parse an HTML tag at the head of the wikicode string."""
        reset = self._head
        self._head += 1
        try:
            tag = self._really_parse_tag()
        except BadRoute:
            self._head = reset
            self._emit_text("<")
        else:
            self._emit_all(tag)

    def _handle_end(self):
        """Handle the end of the stream of wikitext."""
        fail = (contexts.TEMPLATE | contexts.ARGUMENT | contexts.WIKILINK |
                contexts.HEADING | contexts.COMMENT | contexts.TAG)
        double_fail = (contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE)
        if self._context & fail:
            if self._context & contexts.TAG_BODY:
                if is_single(self._stack[1].text):
                    return self._handle_single_tag_end()
            if self._context & double_fail:
                self._pop()
            self._fail_route()
        return self._pop()

    def _verify_safe(self, this):
        """Make sure we are not trying to write an invalid character."""
        context = self._context
@@ -414,7 +654,7 @@ class Tokenizer(object):
            elif this == "\n" or this == "[" or this == "}":
                return False
            return True
        if context & contexts.TEMPLATE_NAME:
        elif context & contexts.TEMPLATE_NAME:
            if this == "{" or this == "}" or this == "[":
                self._context |= contexts.FAIL_NEXT
                return True
@@ -432,6 +672,8 @@ class Tokenizer(object):
            elif this is self.END or not this.isspace():
                self._context |= contexts.HAS_TEXT
            return True
        elif context & contexts.TAG_CLOSE:
            return this != "<"
        else:
            if context & contexts.FAIL_ON_EQUALS:
                if this == "=":
@@ -458,44 +700,38 @@ class Tokenizer(object):
                self._context |= contexts.FAIL_ON_RBRACE
            return True

    def _parse(self, context=0):
    def _parse(self, context=0, push=True):
        """Parse the wikicode string, using *context* for when to stop."""
        self._push(context)
        unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE |
                  contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME |
                  contexts.TAG_CLOSE)
        double_unsafe = (contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE)
        if push:
            self._push(context)
        while True:
            this = self._read()
            unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE |
                      contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME)
            if self._context & unsafe:
                if not self._verify_safe(this):
                    if self._context & contexts.TEMPLATE_PARAM_KEY:
                    if self._context & double_unsafe:
                        self._pop()
                    self._fail_route()
            if this not in self.MARKERS:
                self._write_text(this)
                self._emit_text(this)
                self._head += 1
                continue
            if this is self.END:
                fail = (contexts.TEMPLATE | contexts.ARGUMENT |
                        contexts.WIKILINK | contexts.HEADING |
                        contexts.COMMENT)
                if self._context & contexts.TEMPLATE_PARAM_KEY:
                    self._pop()
                if self._context & fail:
                    self._fail_route()
                return self._pop()
                return self._handle_end()
            next = self._read(1)
            if self._context & contexts.COMMENT:
                if this == next == "-" and self._read(2) == ">":
                    return self._pop()
                else:
                    self._write_text(this)
                    self._emit_text(this)
            elif this == next == "{":
                if self._can_recurse():
                    self._parse_template_or_argument()
                    if self._context & contexts.FAIL_NEXT:
                        self._context ^= contexts.FAIL_NEXT
                else:
                    self._write_text("{")
                    self._emit_text("{")
            elif this == "|" and self._context & contexts.TEMPLATE:
                self._handle_template_param()
            elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
@@ -508,14 +744,12 @@ class Tokenizer(object):
                if self._read(2) == "}":
                    return self._handle_argument_end()
                else:
                    self._write_text("}")
                    self._emit_text("}")
            elif this == next == "[":
                if not self._context & contexts.WIKILINK_TITLE and self._can_recurse():
                    self._parse_wikilink()
                    if self._context & contexts.FAIL_NEXT:
                        self._context ^= contexts.FAIL_NEXT
                else:
                    self._write_text("[")
                    self._emit_text("[")
            elif this == "|" and self._context & contexts.WIKILINK_TITLE:
                self._handle_wikilink_separator()
            elif this == next == "]" and self._context & contexts.WIKILINK:
@@ -524,7 +758,7 @@ class Tokenizer(object):
                if self._read(-1) in ("\n", self.START):
                    self._parse_heading()
                else:
                    self._write_text("=")
                    self._emit_text("=")
            elif this == "=" and self._context & contexts.HEADING:
                return self._handle_heading_end()
            elif this == "\n" and self._context & contexts.HEADING:
@@ -535,9 +769,21 @@ class Tokenizer(object):
                if self._read(2) == self._read(3) == "-":
                    self._parse_comment()
                else:
                    self._write_text(this)
                    self._emit_text(this)
            elif this == "<" and next == "/" and self._read(2) is not self.END:
                if self._context & contexts.TAG_BODY:
                    self._handle_tag_open_close()
                else:
                    self._handle_invalid_tag_start()
            elif this == "<":
                if not self._context & contexts.TAG_CLOSE and self._can_recurse():
                    self._parse_tag()
                else:
                    self._emit_text("<")
            elif this == ">" and self._context & contexts.TAG_CLOSE:
                return self._handle_tag_close_close()
            else:
                self._write_text(this)
                self._emit_text(this)
            self._head += 1

    def tokenize(self, text):
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -63,6 +63,10 @@ class Token(object):
    def __delattr__(self, key):
        del self._kwargs[key]

    def get(self, key, default=None):
        """Same as :py:meth:`__getattr__`, but has a *default* if missing."""
        return self._kwargs.get(key, default)


 def make(name):
    """Create a new Token class using ``type()`` and add it to ``__all__``."""
--- a/mwparserfromhell/tag_defs.py
+++ b/mwparserfromhell/tag_defs.py
@@ -0,0 +1,76 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 """Contains data regarding certain HTML tags."""

 from __future__ import unicode_literals

 __all__ = ["get_wikicode", "is_parsable", "is_visible", "is_single",
           "is_single_only"]

 PARSER_BLACKLIST = [
    # enwiki extensions @ 2013-06-28
    "categorytree", "gallery", "hiero", "imagemap", "inputbox", "math",
    "nowiki", "pre", "score", "section", "source", "syntaxhighlight",
    "templatedata", "timeline"
 ]

 INVISIBLE_TAGS = [
    # enwiki extensions @ 2013-06-28
    "categorytree", "gallery", "imagemap", "inputbox", "math", "score",
    "section", "templatedata", "timeline"
 ]

 # [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762
 SINGLE_ONLY = ["br", "hr", "meta", "link", "img"]
 SINGLE = SINGLE_ONLY + ["li", "dt", "dd"]

 WIKICODE = {
    "i": {"open": "''", "close": "''"},
    "b": {"open": "'''", "close": "'''"},
    "ul": {"open": "*"},
    "ol": {"open": "#"},
    "dt": {"open": ";"},
    "dd": {"open": ":"},
    "hr": {"open": "----"},
 }

 def get_wikicode(tag):
    """Return the appropriate wikicode before and after the given *tag*."""
    data = WIKICODE[tag.lower()]
    return (data.get("open"), data.get("close"))

 def is_parsable(tag):
    """Return if the given *tag*'s contents should be passed to the parser."""
    return tag.lower() not in PARSER_BLACKLIST

 def is_visible(tag):
    """Return whether or not the given *tag* contains visible text."""
    return tag.lower() not in INVISIBLE_TAGS

 def is_single(tag):
    """Return whether or not the given *tag* can exist without a close tag."""
    return tag.lower() in SINGLE

 def is_single_only(tag):
    """Return whether or not the given *tag* must exist without a close tag."""
    return tag.lower() in SINGLE_ONLY
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -31,6 +31,8 @@ from .compat import bytes, str
 from .nodes import Node
 from .smart_list import SmartList

 __all__ = ["parse_anything"]

 def parse_anything(value):
    """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.

--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -91,7 +91,23 @@ class TreeEqualityTestCase(TestCase):

    def assertTagNodeEqual(self, expected, actual):
        """Assert that two Tag nodes have the same data."""
        self.fail("Holding this until feature/html_tags is ready.")
        self.assertWikicodeEqual(expected.tag, actual.tag)
        if expected.contents is not None:
            self.assertWikicodeEqual(expected.contents, actual.contents)
        length = len(expected.attributes)
        self.assertEqual(length, len(actual.attributes))
        for i in range(length):
            exp_attr = expected.attributes[i]
            act_attr = actual.attributes[i]
            self.assertWikicodeEqual(exp_attr.name, act_attr.name)
            if exp_attr.value is not None:
                self.assertWikicodeEqual(exp_attr.value, act_attr.value)
            self.assertIs(exp_attr.quoted, act_attr.quoted)
            self.assertEqual(exp.attr.padding, act_attr.padding)
        self.assertIs(expected.showtag, actual.showtag)
        self.assertIs(expected.self_closing, actual.self_closing)
        self.assertEqual(expected.padding, actual.padding)
        self.assertWikicodeEqual(expected.closing_tag, actual.closing_tag)

    def assertTemplateNodeEqual(self, expected, actual):
        """Assert that two Template nodes have the same data."""
--- a/+ 0
+++ b/+ 0
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -198,6 +198,18 @@ class TestBuilder(TreeEqualityTestCase):
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_tag(self):
        """tests for building Tag nodes"""
        tests = [
            ([tokens.TagOpenOpen(), tokens.Text(text="ref"),
              tokens.TagCloseOpen(padding=""), tokens.TagOpenClose(),
              tokens.Text(text="ref"), tokens.TagCloseClose()],
             wrap([Tag(wraptext("ref"), wrap([]), [], True, False, "",
                       wraptext("ref"))])),
        ]
        for test, valid in tests:
            self.assertWikicodeEqual(valid, self.builder.build(test))

    def test_integration(self):
        """a test for building a combination of templates together"""
        # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}
--- a/+ 0
+++ b/+ 0
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -33,6 +33,13 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t

 ---

 name:   rich_tags
 label:  a HTML tag with tons of other things in it
 input:  "{{dubious claim}}<ref name={{abc}}   foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno =  "{{p}} [[q]] {{r}}">[[Source]]</ref>"
 output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first="   ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq="  "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   wildcard
 label:  a wildcard assortment of various things
 input:  "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -0,0 +1,529 @@
 name:   basic
 label:  a basic tag with an open and close
 input:  "<ref></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   basic_selfclosing
 label:  a basic self-closing tag
 input:  "<ref/>"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseSelfclose(padding="")]

 ---

 name:   content
 label:  a tag with some content in the middle
 input:  "<ref>this is a reference</ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), Text(text="this is a reference"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   padded_open
 label:  a tag with some padding in the open tag
 input:  "<ref ></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=" "), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   padded_close
 label:  a tag with some padding in the close tag
 input:  "<ref></ref >"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref "), TagCloseClose()]

 ---

 name:   padded_selfclosing
 label:  a self-closing tag with padding
 input:  "<ref />"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseSelfclose(padding=" ")]

 ---

 name:   attribute
 label:  a tag with a single attribute
 input:  "<ref name></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   attribute_value
 label:  a tag with a single attribute with a value
 input:  "<ref name=foo></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   attribute_quoted
 label:  a tag with a single quoted attribute
 input:  "<ref name="foo bar"></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   attribute_hyphen
 label:  a tag with a single attribute, containing a hyphen
 input:  "<ref name=foo-bar></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   attribute_quoted_hyphen
 label:  a tag with a single quoted attribute, containing a hyphen
 input:  "<ref name="foo-bar"></ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   attribute_selfclosing
 label:  a self-closing tag with a single attribute
 input:  "<ref name/>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(padding="")]

 ---

 name:   attribute_selfclosing_value
 label:  a self-closing tag with a single attribute with a value
 input:  "<ref name=foo/>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseSelfclose(padding="")]

 ---

 name:   attribute_selfclosing_value_quoted
 label:  a self-closing tag with a single quoted attribute
 input:  "<ref name="foo"/>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")]

 ---

 name:   nested_tag
 label:  a tag nested within the attributes of another
 input:  "<ref name=<span style="color: red;">foo</span>>citation</ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   nested_tag_quoted
 label:  a tag nested within the attributes of another, quoted
 input:  "<ref name="<span style="color: red;">foo</span>">citation</ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   nested_troll_tag
 label:  a bogus tag that appears to be nested within the attributes of another
 input:  "<ref name=</ ><//>>citation</ref>"
 output: [Text(text="<ref name=</ ><//>>citation</ref>")]

 ---

 name:   nested_troll_tag_quoted
 label:  a bogus tag that appears to be nested within the attributes of another, quoted
 input:  "<ref name="</ ><//>">citation</ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   invalid_space_begin_open
 label:  invalid tag: a space at the beginning of the open tag
 input:  "< ref>test</ref>"
 output: [Text(text="< ref>test</ref>")]

 ---

 name:   invalid_space_begin_close
 label:  invalid tag: a space at the beginning of the close tag
 input:  "<ref>test</ ref>"
 output: [Text(text="<ref>test</ ref>")]

 ---

 name:   valid_space_end
 label:  valid tag: spaces at the ends of both the open and close tags
 input:  "<ref >test</ref >"
 output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=" "), Text(text="test"), TagOpenClose(), Text(text="ref "), TagCloseClose()]

 ---

 name:   invalid_template_ends
 label:  invalid tag: a template at the ends of both the open and close tags
 input:  "<ref {{foo}}>test</ref {{foo}}>"
 output: [Text(text="<ref "), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">test</ref "), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">")]

 ---

 name:   invalid_template_ends_nospace
 label:  invalid tag: a template at the ends of both the open and close tags, without spacing
 input:  "<ref {{foo}}>test</ref{{foo}}>"
 output: [Text(text="<ref "), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">test</ref"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">")]

 ---

 name:   valid_template_end_open
 label:  valid tag: a template at the end of the open tag
 input:  "<ref {{foo}}>test</ref>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding=""), Text(text="test"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

 ---

 name:   valid_template_end_open_space_end_close
 label:  valid tag: a template at the end of the open tag; whitespace at the end of the close tag
 input:  "<ref {{foo}}>test</ref\n>"
 output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding=""), Text(text="test"), TagOpenClose(), Text(text="ref\n"), TagCloseClose()]

 ---

 name:   invalid_template_end_open_nospace
 label:  invalid tag: a template at the end of the open tag, without spacing
 input:  "<ref{{foo}}>test</ref>"
 output: [Text(text="<ref"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">test</ref>")]

 ---

 name:   invalid_template_start_close
 label:  invalid tag: a template at the beginning of the close tag
 input:  "<ref>test</{{foo}}ref>"
 output: [Text(text="<ref>test</"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="ref>")]

 ---

 name:   invalid_template_start_open
 label:  invalid tag: a template at the beginning of the open tag
 input:  "<{{foo}}ref>test</ref>"
 output: [Text(text="<"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="ref>test</ref>")]

 ---

 name:   unclosed_quote
 label:  a quoted attribute that is never closed
 input:  "<span style="foobar>stuff</span>"
 output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"foobar"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]

 ---

 name:   fake_quote
 label:  a fake quoted attribute
 input:  "<span style="foo"bar>stuff</span>"
 output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"foo\"bar"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]

 ---

 name:   fake_quote_complex
 label:  a fake quoted attribute, with spaces and templates and links
 input:  "<span style="foo {{bar}}\n[[baz]]"buzz >stuff</span>"
 output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"foo"), TagAttrStart(pad_first=" ", pad_before_eq="\n", pad_after_eq=""), TemplateOpen(), Text(text="bar"), TemplateClose(), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), WikilinkOpen(), Text(text="baz"), WikilinkClose(), Text(text="\"buzz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]

 ---

 name:   incomplete_lbracket
 label:  incomplete tags: just a left bracket
 input:  "<"
 output: [Text(text="<")]

 ---

 name:   incomplete_lbracket_junk
 label:  incomplete tags: just a left bracket, surrounded by stuff
 input:  "foo<bar"
 output: [Text(text="foo<bar")]

 ---

 name:   incomplete_unclosed_open
 label:  incomplete tags: an unclosed open tag
 input:  "junk <ref"
 output: [Text(text="junk <ref")]

 ---

 name:   incomplete_unclosed_open_space
 label:  incomplete tags: an unclosed open tag, space
 input:  "junk <ref "
 output: [Text(text="junk <ref ")]

 ---

 name:   incomplete_unclosed_open_unnamed_attr
 label:  incomplete tags: an unclosed open tag, unnamed attribute
 input:  "junk <ref name"
 output: [Text(text="junk <ref name")]

 ---

 name:   incomplete_unclosed_open_attr_equals
 label:  incomplete tags: an unclosed open tag, attribute, equal sign
 input:  "junk <ref name="
 output: [Text(text="junk <ref name=")]

 ---

 name:   incomplete_unclosed_open_attr_equals_quoted
 label:  incomplete tags: an unclosed open tag, attribute, equal sign, quote
 input:  "junk <ref name=""
 output: [Text(text="junk <ref name=\"")]

 ---

 name:   incomplete_unclosed_open_attr
 label:  incomplete tags: an unclosed open tag, attribute with a key/value
 input:  "junk <ref name=foo"
 output: [Text(text="junk <ref name=foo")]

 ---

 name:   incomplete_unclosed_open_attr_quoted
 label:  incomplete tags: an unclosed open tag, attribute with a key/value, quoted
 input:  "junk <ref name="foo""
 output: [Text(text="junk <ref name=\"foo\"")]

 ---

 name:   incomplete_open
 label:  incomplete tags: an open tag
 input:  "junk <ref>"
 output: [Text(text="junk <ref>")]

 ---

 name:   incomplete_open_unnamed_attr
 label:  incomplete tags: an open tag, unnamed attribute
 input:  "junk <ref name>"
 output: [Text(text="junk <ref name>")]

 ---

 name:   incomplete_open_attr_equals
 label:  incomplete tags: an open tag, attribute, equal sign
 input:  "junk <ref name=>"
 output: [Text(text="junk <ref name=>")]

 ---

 name:   incomplete_open_attr
 label:  incomplete tags: an open tag, attribute with a key/value
 input:  "junk <ref name=foo>"
 output: [Text(text="junk <ref name=foo>")]

 ---

 name:   incomplete_open_attr_quoted
 label:  incomplete tags: an open tag, attribute with a key/value, quoted
 input:  "junk <ref name="foo">"
 output: [Text(text="junk <ref name=\"foo\">")]

 ---

 name:   incomplete_open_text
 label:  incomplete tags: an open tag, text
 input:  "junk <ref>foo"
 output: [Text(text="junk <ref>foo")]

 ---

 name:   incomplete_open_attr_text
 label:  incomplete tags: an open tag, attribute with a key/value, text
 input:  "junk <ref name=foo>bar"
 output: [Text(text="junk <ref name=foo>bar")]

 ---

 name:   incomplete_open_text_lbracket
 label:  incomplete tags: an open tag, text, left open bracket
 input:  "junk <ref>bar<"
 output: [Text(text="junk <ref>bar<")]

 ---

 name:   incomplete_open_text_lbracket_slash
 label:  incomplete tags: an open tag, text, left bracket, slash
 input:  "junk <ref>bar</"
 output: [Text(text="junk <ref>bar</")]

 ---

 name:   incomplete_open_text_unclosed_close
 label:  incomplete tags: an open tag, text, unclosed close
 input:  "junk <ref>bar</ref"
 output: [Text(text="junk <ref>bar</ref")]

 ---

 name:   incomplete_open_text_wrong_close
 label:  incomplete tags: an open tag, text, wrong close
 input:  "junk <ref>bar</span>"
 output: [Text(text="junk <ref>bar</span>")]

 ---

 name:   incomplete_close
 label:  incomplete tags: a close tag
 input:  "junk </ref>"
 output: [Text(text="junk </ref>")]

 ---

 name:   incomplete_no_tag_name_open
 label:  incomplete tags: no tag name within brackets; just an open
 input:  "junk <>"
 output: [Text(text="junk <>")]

 ---

 name:   incomplete_no_tag_name_selfclosing
 label:  incomplete tags: no tag name within brackets; self-closing
 input:  "junk < />"
 output: [Text(text="junk < />")]

 ---

 name:   incomplete_no_tag_name_open_close
 label:  incomplete tags: no tag name within brackets; open and close
 input:  "junk <></>"
 output: [Text(text="junk <></>")]

 ---

 name:   backslash_premature_before
 label:  a backslash before a quote before a space
 input:  "<foo attribute="this is\\" quoted">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_premature_after
 label:  a backslash before a quote after a space
 input:  "<foo attribute="this is \\"quoted">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_premature_middle
 label:  a backslash before a quote in the middle of a word
 input:  "<foo attribute="this i\\"s quoted">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_adjacent
 label:  escaped quotes next to unescaped quotes
 input:  "<foo attribute="\\"this is quoted\\"">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_endquote
 label:  backslashes before the end quote, causing the attribute to become unquoted
 input:  "<foo attribute="this_is quoted\\">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), Text(text="\"this_is"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_double
 label:  two adjacent backslashes, which do *not* affect the quote
 input:  "<foo attribute="this is\\\\" quoted">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_triple
 label:  three adjacent backslashes, which do *not* affect the quote
 input:  "<foo attribute="this is\\\\\\" quoted">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   backslash_unaffecting
 label:  backslashes near quotes, but not immediately adjacent, thus having no effect
 input:  "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>"
 output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]

 ---

 name:   unparsable
 label:  a tag that should not be put through the normal parser
 input:  "{{t1}}<nowiki>{{t2}}</nowiki>{{t3}}"
 output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]

 ---

 name:   unparsable_complex
 label:  a tag that should not be put through the normal parser; lots of stuff inside
 input:  "{{t1}}<pre>{{t2}}\n==Heading==\nThis is some text with a [[page|link]].</pre>{{t3}}"
 output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="pre"), TagCloseOpen(padding=""), Text(text="{{t2}}\n==Heading==\nThis is some text with a [[page|link]]."), TagOpenClose(), Text(text="pre"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]

 ---

 name:   unparsable_attributed
 label:  a tag that should not be put through the normal parser; parsed attributes
 input:  "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}"
 output: [TemplateOpen(), Text(text=u't1'), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]

 ---

 name:   unparsable_incomplete
 label:  a tag that should not be put through the normal parser; incomplete
 input:  "{{t1}}<nowiki>{{t2}}{{t3}}"
 output: [TemplateOpen(), Text(text="t1"), TemplateClose(), Text(text="<nowiki>"), TemplateOpen(), Text(text="t2"), TemplateClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]

 ---

 name:   single_open_close
 label:  a tag that supports being single; both an open and a close tag
 input:  "foo<li>bar{{baz}}</li>"
 output: [Text(text="foo"), TagOpenOpen(), Text(text="li"), TagCloseOpen(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose(), TagOpenClose(), Text(text="li"), TagCloseClose()]

 ---

 name:   single_open
 label:  a tag that supports being single; just an open tag
 input:  "foo<li>bar{{baz}}"
 output: [Text(text="foo"), TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]

 ---

 name:   single_selfclose
 label:  a tag that supports being single; a self-closing tag
 input:  "foo<li/>bar{{baz}}"
 output: [Text(text="foo"), TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]

 ---

 name:   single_close
 label:  a tag that supports being single; just a close tag
 input:  "foo</li>bar{{baz}}"
 output: [Text(text="foo</li>bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]

 ---

 name:   single_only_open_close
 label:  a tag that can only be single; both an open and a close tag
 input:  "foo<br>bar{{baz}}</br>"
 output: [Text(text="foo"), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose(), TagOpenOpen(invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True)]

 ---

 name:   single_only_open
 label:  a tag that can only be single; just an open tag
 input:  "foo<br>bar{{baz}}"
 output: [Text(text="foo"), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]

 ---

 name:   single_only_selfclose
 label:  a tag that can only be single; a self-closing tag
 input:  "foo<br/>bar{{baz}}"
 output: [Text(text="foo"), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]

 ---

 name:   single_only_close
 label:  a tag that can only be single; just a close tag
 input:  "foo</br>bar{{baz}}"
 output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]

 ---

 name:   single_only_double
 label:  a tag that can only be single; a tag with backslashes at the beginning and end
 input:  "foo</br/>bar{{baz}}"
 output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]
--- a/tests/tokenizer/text.mwtest
+++ b/tests/tokenizer/text.mwtest
@@ -23,3 +23,10 @@ name:   unicode2
 label:  additional unicode check for non-BMP codepoints
 input:  "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰"
 output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]

 ---

 name:   large
 label:  a lot of text, requiring multiple textbuffer blocks in the C tokenizer
 input:  "ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN"
 output: [Text(text="ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN")]