Merge branch 'feature/tables' into develop (closes #81)

10 jaren geleden · 45ed8c445c
--- a/+ 1
+++ b/+ 1
@@ -2,6 +2,7 @@ v0.4 (unreleased):

 - The parser is now distributed with Windows binaries, fixing an issue that
  prevented Windows users from using the C tokenizer.
 - Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in scripts/memtest.py.
 - Added a script to do releases in scripts/release.sh.
 - skip_style_tags can now be passed to mwparserfromhell.parse() (previously,
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,6 +9,7 @@ Unreleased

 - The parser is now distributed with Windows binaries, fixing an issue that
  prevented Windows users from using the C tokenizer.
 - Added support for parsing wikicode tables (patches by David Winegar).
 - Added a script to test for memory leaks in :file:`scripts/memtest.py`.
 - Added a script to do releases in :file:`scripts/release.sh`.
 - *skip_style_tags* can now be passed to :func:`mwparserfromhell.parse()
--- a/mwparserfromhell/definitions.py
+++ b/mwparserfromhell/definitions.py
@@ -52,7 +52,7 @@ INVISIBLE_TAGS = [

 # [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762
 SINGLE_ONLY = ["br", "hr", "meta", "link", "img"]
 SINGLE = SINGLE_ONLY + ["li", "dt", "dd"]
 SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"]

 MARKUP_TO_HTML = {
    "#": "li",
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -35,7 +35,8 @@ class Tag(Node):

    def __init__(self, tag, contents=None, attrs=None, wiki_markup=None,
                 self_closing=False, invalid=False, implicit=False, padding="",
                 closing_tag=None):
                 closing_tag=None, wiki_style_separator=None,
                 closing_wiki_markup=None):
        super(Tag, self).__init__()
        self._tag = tag
        if contents is None and not self_closing:
@@ -52,13 +53,28 @@ class Tag(Node):
            self._closing_tag = closing_tag
        else:
            self._closing_tag = tag
        self._wiki_style_separator = wiki_style_separator
        if closing_wiki_markup is not None:
            self._closing_wiki_markup = closing_wiki_markup
        elif wiki_markup and not self_closing:
            self._closing_wiki_markup = wiki_markup
        else:
            self._closing_wiki_markup = None

    def __unicode__(self):
        if self.wiki_markup:
            if self.attributes:
                attrs = "".join([str(attr) for attr in self.attributes])
            else:
                attrs = ""
            padding = self.padding or ""
            separator = self.wiki_style_separator or ""
            close = self.closing_wiki_markup or ""
            if self.self_closing:
                return self.wiki_markup
                return self.wiki_markup + attrs + padding + separator
            else:
                return self.wiki_markup + str(self.contents) + self.wiki_markup
                return self.wiki_markup + attrs + padding + separator + \
                       str(self.contents) + close

        result = ("</" if self.invalid else "<") + str(self.tag)
        if self.attributes:
@@ -73,10 +89,10 @@ class Tag(Node):
    def __children__(self):
        if not self.wiki_markup:
            yield self.tag
            for attr in self.attributes:
                yield attr.name
                if attr.value is not None:
                    yield attr.value
        for attr in self.attributes:
            yield attr.name
            if attr.value is not None:
                yield attr.value
        if self.contents:
            yield self.contents
        if not self.self_closing and not self.wiki_markup and self.closing_tag:
@@ -174,6 +190,27 @@ class Tag(Node):
        """
        return self._closing_tag

    @property
    def wiki_style_separator(self):
        """The separator between the padding and content in a wiki markup tag.

        Essentially the wiki equivalent of the TagCloseOpen.
        """
        return self._wiki_style_separator

    @property
    def closing_wiki_markup(self):
        """The wikified version of the closing tag to show instead of HTML.

        If set to a value, this will be displayed instead of the close tag
        brackets. If tag is :attr:`self_closing` is ``True`` then this is not
        displayed. If :attr:`wiki_markup` is set and this has not been set, this
        is set to the value of :attr:`wiki_markup`. If this has been set and
        :attr:`wiki_markup` is set to a ``False`` value, this is set to
        ``None``.
        """
        return self._closing_wiki_markup

    @tag.setter
    def tag(self, value):
        self._tag = self._closing_tag = parse_anything(value)
@@ -185,6 +222,8 @@ class Tag(Node):
    @wiki_markup.setter
    def wiki_markup(self, value):
        self._wiki_markup = str(value) if value else None
        if not value or not self.closing_wiki_markup:
            self._closing_wiki_markup = self._wiki_markup

    @self_closing.setter
    def self_closing(self, value):
@@ -212,6 +251,14 @@ class Tag(Node):
    def closing_tag(self, value):
        self._closing_tag = parse_anything(value)

    @wiki_style_separator.setter
    def wiki_style_separator(self, value):
        self._wiki_style_separator = str(value) if value else None

    @closing_wiki_markup.setter
    def closing_wiki_markup(self, value):
        self._closing_wiki_markup = str(value) if value else None

    def has(self, name):
        """Return whether any attribute in the tag has the given *name*.

--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -249,20 +249,24 @@ class Builder(object):
        close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
        implicit, attrs, contents, closing_tag = False, [], None, None
        wiki_markup, invalid = token.wiki_markup, token.invalid or False
        wiki_style_separator, closing_wiki_markup = None, wiki_markup
        self._push()
        while self._tokens:
            token = self._tokens.pop()
            if isinstance(token, tokens.TagAttrStart):
                attrs.append(self._handle_attribute(token))
            elif isinstance(token, tokens.TagCloseOpen):
                wiki_style_separator = token.wiki_markup
                padding = token.padding or ""
                tag = self._pop()
                self._push()
            elif isinstance(token, tokens.TagOpenClose):
                closing_wiki_markup = token.wiki_markup
                contents = self._pop()
                self._push()
            elif isinstance(token, close_tokens):
                if isinstance(token, tokens.TagCloseSelfclose):
                    closing_wiki_markup = token.wiki_markup
                    tag = self._pop()
                    self_closing = True
                    padding = token.padding or ""
@@ -271,7 +275,8 @@ class Builder(object):
                    self_closing = False
                    closing_tag = self._pop()
                return Tag(tag, contents, attrs, wiki_markup, self_closing,
                           invalid, implicit, padding, closing_tag)
                           invalid, implicit, padding, closing_tag,
                           wiki_style_separator, closing_wiki_markup)
            else:
                self._write(self._handle_token(token))
        raise ParserError("_handle_tag() missed a close token")
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -90,6 +90,15 @@ Local (stack-specific) contexts:
    * :const:`FAIL_ON_RBRACE`
    * :const:`FAIL_ON_EQUALS`

 * :const:`TABLE`

    * :const:`TABLE_OPEN`
    * :const:`TABLE_CELL_OPEN`
    * :const:`TABLE_CELL_STYLE`
    * :const:`TABLE_TD_LINE`
    * :const:`TABLE_TH_LINE`
    * :const:`TABLE_CELL_LINE_CONTEXTS`

 Global contexts:

 * :const:`GL_HEADING`
@@ -155,15 +164,26 @@ FAIL_ON_EQUALS = 1 << 29
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                FAIL_ON_RBRACE + FAIL_ON_EQUALS)

 TABLE_OPEN =       1 << 30
 TABLE_CELL_OPEN =  1 << 31
 TABLE_CELL_STYLE = 1 << 32
 TABLE_ROW_OPEN =   1 << 33
 TABLE_TD_LINE =    1 << 34
 TABLE_TH_LINE =    1 << 35
 TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
 TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN +
         TABLE_TD_LINE + TABLE_TH_LINE)

 # Global contexts:

 GL_HEADING = 1 << 0

 # Aggregate contexts:

 FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE
 FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG +
        STYLE + TABLE)
 UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE +
          TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE)
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
 DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN
 NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI
 NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -241,7 +241,7 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds)
 /*
    Add a new token stack, context, and textbuffer to the list.
 */
 static int Tokenizer_push(Tokenizer* self, int context)
 static int Tokenizer_push(Tokenizer* self, uint64_t context)
 {
    Stack* top = malloc(sizeof(Stack));

@@ -333,7 +333,7 @@ static PyObject* Tokenizer_pop(Tokenizer* self)
 static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self)
 {
    PyObject* stack;
    int context;
    uint64_t context;

    if (Tokenizer_push_textbuffer(self))
        return NULL;
@@ -351,7 +351,7 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self)
 */
 static void* Tokenizer_fail_route(Tokenizer* self)
 {
    int context = self->topstack->context;
    uint64_t context = self->topstack->context;
    PyObject* stack = Tokenizer_pop(self);

    Py_XDECREF(stack);
@@ -676,11 +676,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
                RESET_ROUTE();
                for (i = 0; i < braces; i++) text[i] = '{';
                text[braces] = '\0';
                if (Tokenizer_emit_text_then_stack(self, text)) {
                    Py_XDECREF(text);
                if (Tokenizer_emit_text_then_stack(self, text))
                    return -1;
                }
                Py_XDECREF(text);
                return 0;
            }
            else
@@ -1034,7 +1031,7 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next)
 {
    // Built from Tokenizer_parse()'s end sentinels:
    Py_UNICODE after = Tokenizer_READ(self, 2);
    int ctx = self->topstack->context;
    uint64_t ctx = self->topstack->context;

    return (!this || this == '\n' || this == '[' || this == ']' ||
        this == '<' || this == '>'  || (this == '\'' && next == '\'') ||
@@ -1629,9 +1626,9 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
 static int
 Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text)
 {
    int ctx = data->context;
    int end_of_value = (ctx & TAG_ATTR_VALUE &&
                        !(ctx & (TAG_QUOTED | TAG_NOTE_QUOTE)));
    uint64_t ctx = data->context;
    uint64_t end_of_value = (ctx & TAG_ATTR_VALUE &&
                             !(ctx & (TAG_QUOTED | TAG_NOTE_QUOTE)));

    if (end_of_value || (ctx & TAG_QUOTED && ctx & TAG_NOTE_SPACE)) {
        if (Tokenizer_push_tag_buffer(self, data))
@@ -2153,7 +2150,7 @@ static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag,
 static int Tokenizer_parse_italics(Tokenizer* self)
 {
    Py_ssize_t reset = self->head;
    int context;
    uint64_t context;
    PyObject *stack;

    stack = Tokenizer_parse(self, LC_STYLE_ITALICS, 1);
@@ -2273,7 +2270,7 @@ static int Tokenizer_parse_italics_and_bold(Tokenizer* self)
 */
 static PyObject* Tokenizer_parse_style(Tokenizer* self)
 {
    int context = self->topstack->context, ticks = 2, i;
    uint64_t context = self->topstack->context, ticks = 2, i;

    self->head += 2;
    while (Tokenizer_READ(self, 0) == '\'') {
@@ -2426,9 +2423,363 @@ static int Tokenizer_handle_dl_term(Tokenizer* self)
 }

 /*
    Emit a table tag.
 */
 static int
 Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup,
                         const char* tag, PyObject* style, PyObject* padding,
                         const char* close_open_markup, PyObject* contents,
                         const char* open_close_markup)
 {
    PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs,
             *close_open_markup_unicode, *open_close_kwargs,
             *open_close_markup_unicode;

    open_open_kwargs = PyDict_New();
    if (!open_open_kwargs)
        goto fail_decref_all;
    open_open_markup_unicode = PyUnicode_FromString(open_open_markup);
    if (!open_open_markup_unicode) {
        Py_DECREF(open_open_kwargs);
        goto fail_decref_all;
    }
    PyDict_SetItemString(open_open_kwargs, "wiki_markup",
                         open_open_markup_unicode);
    Py_DECREF(open_open_markup_unicode);
    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
        goto fail_decref_all;
    if (Tokenizer_emit_text(self, tag))
        goto fail_decref_all;

    if (style) {
        if (Tokenizer_emit_all(self, style))
            goto fail_decref_all;
        Py_DECREF(style);
    }

    close_open_kwargs = PyDict_New();
    if (!close_open_kwargs)
        goto fail_decref_padding_contents;
    if (close_open_markup && strlen(close_open_markup) != 0) {
        close_open_markup_unicode = PyUnicode_FromString(close_open_markup);
        if (!close_open_markup_unicode) {
            Py_DECREF(close_open_kwargs);
            goto fail_decref_padding_contents;
        }
        PyDict_SetItemString(close_open_kwargs, "wiki_markup",
                             close_open_markup_unicode);
        Py_DECREF(close_open_markup_unicode);
    }
    PyDict_SetItemString(close_open_kwargs, "padding", padding);
    Py_DECREF(padding);
    if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs))
        goto fail_decref_contents;

    if (contents) {
        if (Tokenizer_emit_all(self, contents))
            goto fail_decref_contents;
        Py_DECREF(contents);
    }

    open_close_kwargs = PyDict_New();
    if (!open_close_kwargs)
        return -1;
    open_close_markup_unicode = PyUnicode_FromString(open_close_markup);
    if (!open_close_markup_unicode) {
        Py_DECREF(open_close_kwargs);
        return -1;
    }
    PyDict_SetItemString(open_close_kwargs, "wiki_markup",
                         open_close_markup_unicode);
    Py_DECREF(open_close_markup_unicode);
    if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
        return -1;
    if (Tokenizer_emit_text(self, tag))
        return -1;
    if (Tokenizer_emit(self, TagCloseClose))
        return -1;
    return 0;

    fail_decref_all:
    Py_XDECREF(style);
    fail_decref_padding_contents:
    Py_DECREF(padding);
    fail_decref_contents:
    Py_DECREF(contents);
    return -1;
 }

 /*
    Handle style attributes for a table until an ending token.
 */
 static PyObject* Tokenizer_handle_table_style(Tokenizer* self, char end_token)
 {
    TagData *data = TagData_new();
    PyObject *padding, *trash;
    Py_UNICODE this;
    int can_exit;

    if (!data)
        return NULL;
    data->context = TAG_ATTR_READY;

    while (1) {
        this = Tokenizer_READ(self, 0);
        can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE);
        if (this == end_token && can_exit) {
            if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) {
                if (Tokenizer_push_tag_buffer(self, data)) {
                    TagData_dealloc(data);
                    return NULL;
                }
            }
            if (Py_UNICODE_ISSPACE(this))
                Textbuffer_write(&(data->pad_first), this);
            padding = Textbuffer_render(data->pad_first);
            TagData_dealloc(data);
            if (!padding)
                return NULL;
            return padding;
        }
        else if (!this || this == end_token) {
           if (self->topstack->context & LC_TAG_ATTR) {
                if (data->context & TAG_QUOTED) {
                    // Unclosed attribute quote: reset, don't die
                    data->context = TAG_ATTR_VALUE;
                    trash = Tokenizer_pop(self);
                    Py_XDECREF(trash);
                    self->head = data->reset;
                    continue;
                }
                trash = Tokenizer_pop(self);
                Py_XDECREF(trash);
            }
            TagData_dealloc(data);
            return Tokenizer_fail_route(self);
        }
        else {
            if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) {
                TagData_dealloc(data);
                return NULL;
            }
        }
        self->head++;
    }
 }

 /*
    Parse a wikicode table by starting with the first line.
 */
 static int Tokenizer_parse_table(Tokenizer* self)
 {
    Py_ssize_t reset = self->head + 1;
    PyObject *style, *padding;
    PyObject *table = NULL;
    self->head += 2;

    if(Tokenizer_push(self, LC_TABLE_OPEN))
        return -1;
    padding = Tokenizer_handle_table_style(self, '\n');
    if (BAD_ROUTE) {
        RESET_ROUTE();
        self->head = reset;
        if (Tokenizer_emit_text(self, "{|"))
            return -1;
        return 0;
    }
    if (!padding)
        return -1;
    style = Tokenizer_pop(self);
    if (!style) {
        Py_DECREF(padding);
        return -1;
    }

    self->head++;
    table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
    if (BAD_ROUTE) {
        RESET_ROUTE();
        Py_DECREF(padding);
        Py_DECREF(style);
        self->head = reset;
        if (Tokenizer_emit_text(self, "{|"))
            return -1;
        return 0;
    }
    if (!table) {
        Py_DECREF(padding);
        Py_DECREF(style);
        return -1;
    }

    if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL,
                                 table, "|}"))
        return -1;
    // Offset displacement done by _parse()
    self->head--;
    return 0;
 }

 /*
    Parse as style until end of the line, then continue.
 */
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
    PyObject *padding, *style, *row, *trash;
    self->head += 2;

    if (!Tokenizer_CAN_RECURSE(self)) {
        if (Tokenizer_emit_text(self, "|-"))
            return -1;
        self->head -= 1;
        return 0;
    }

    if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN))
        return -1;
    padding = Tokenizer_handle_table_style(self, '\n');
    if (BAD_ROUTE) {
        trash = Tokenizer_pop(self);
        Py_XDECREF(trash);
        return 0;
    }
    if (!padding)
        return -1;
    style = Tokenizer_pop(self);
    if (!style) {
        Py_DECREF(padding);
        return -1;
    }

    // Don't parse the style separator
    self->head++;
    row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1);
    if (!row) {
        Py_DECREF(padding);
        Py_DECREF(style);
        return -1;
    }

    if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row, ""))
        return -1;
    // Offset displacement done by _parse()
    self->head--;
    return 0;
 }

 /*
    Parse as normal syntax unless we hit a style marker, then parse style
    as HTML attributes and the remainder as normal syntax.
 */
 static int
 Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
                            const char *tag, uint64_t line_context)
 {
    uint64_t old_context = self->topstack->context;
    uint64_t cell_context;
    Py_ssize_t reset;
    PyObject *padding, *cell, *style = NULL;
    const char *close_open_markup = NULL;

    self->head += strlen(markup);
    reset = self->head;

    if (!Tokenizer_CAN_RECURSE(self)) {
        if (Tokenizer_emit_text(self, markup))
            return -1;
        self->head--;
        return 0;
    }

    cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                           LC_TABLE_CELL_STYLE | line_context, 1);
    if (!cell)
        return -1;
    cell_context = self->topstack->context;
    self->topstack->context = old_context;

    if (cell_context & LC_TABLE_CELL_STYLE) {
        Py_DECREF(cell);
        self->head = reset;
        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                          line_context))
            return -1;
        padding = Tokenizer_handle_table_style(self, '|');
        if (!padding)
            return -1;
        style = Tokenizer_pop(self);
        if (!style) {
            Py_DECREF(padding);
            return -1;
        }
        // Don't parse the style separator
        self->head++;
        cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN |
                               line_context, 1);
        if (!cell) {
            Py_DECREF(padding);
            Py_DECREF(style);
            return -1;
        }
        cell_context = self->topstack->context;
        self->topstack->context = old_context;
    }
    else {
        padding = PyUnicode_FromString("");
        if (!padding) {
            Py_DECREF(cell);
            return -1;
        }
    }

    if (style) {
        close_open_markup = "|";
    }
    if (Tokenizer_emit_table_tag(self, markup, tag, style, padding,
                                 close_open_markup, cell, ""))
        return -1;
    // Keep header/cell line contexts
    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
    // Offset displacement done by parse()
    self->head--;
    return 0;
 }

 /*
    Returns the context, stack, and whether to reset the cell for style
    in a tuple.
 */
 static PyObject*
 Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
 {
    if (reset_for_style)
        self->topstack->context |= LC_TABLE_CELL_STYLE;
    else
        self->topstack->context &= ~LC_TABLE_CELL_STYLE;
    return Tokenizer_pop_keeping_context(self);
 }

 /*
    Return the stack in order to handle the table row end.
 */
 static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self)
 {
    return Tokenizer_pop(self);
 }

 /*
    Return the stack in order to handle the table end.
 */
 static PyObject* Tokenizer_handle_table_end(Tokenizer* self)
 {
    self->head += 2;
    return Tokenizer_pop(self);
 }

 /*
    Handle the end of the stream of wikitext.
 */
 static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
 static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 {
    PyObject *token, *text, *trash;
    int single;
@@ -2444,9 +2795,16 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
            if (single)
                return Tokenizer_handle_single_tag_end(self);
        }
        else if (context & AGG_DOUBLE) {
            trash = Tokenizer_pop(self);
            Py_XDECREF(trash);
        else {
            if (context & LC_TABLE_CELL_OPEN) {
                trash = Tokenizer_pop(self);
                Py_XDECREF(trash);
                context = self->topstack->context;
            }
            if (context & AGG_DOUBLE) {
                trash = Tokenizer_pop(self);
                Py_XDECREF(trash);
            }
        }
        return Tokenizer_fail_route(self);
    }
@@ -2457,7 +2815,8 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
    Make sure we are not trying to write an invalid character. Return 0 if
    everything is safe, or -1 if the route must be failed.
 */
 static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 static int
 Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
 {
    if (context & LC_FAIL_NEXT)
        return -1;
@@ -2508,7 +2867,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
        }
        else if (context & LC_FAIL_ON_LBRACE) {
            if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' &&
                                 Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
                                Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
                if (context & LC_TEMPLATE)
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                else
@@ -2533,12 +2892,30 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
 }

 /*
    Returns whether the current head has leading whitespace.
    TODO: treat comments and templates as whitespace, allow fail on non-newline spaces.
 */
 static int Tokenizer_has_leading_whitespace(Tokenizer* self)
 {
    int offset = 1;
    Py_UNICODE current_character;
    while (1) {
        current_character = Tokenizer_READ_BACKWARDS(self, offset);
        if (!current_character || current_character == '\n')
            return 1;
        else if (!Py_UNICODE_ISSPACE(current_character))
            return 0;
        offset++;
    }
 }

 /*
    Parse the wikicode string, using context for when to stop. If push is true,
    we will push a new context, otherwise we won't and context will be ignored.
 */
 static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
 {
    int this_context;
    uint64_t this_context;
    Py_UNICODE this, next, next_next, last;
    PyObject* temp;

@@ -2667,22 +3044,99 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
            if (temp != Py_None)
                return temp;
        }
        else if (!last || last == '\n') {
            if (this == '#' || this == '*' || this == ';' || this == ':') {
                if (Tokenizer_handle_list(self))
        else if ((!last || last == '\n') && (this == '#' || this == '*' || this == ';' || this == ':')) {
            if (Tokenizer_handle_list(self))
                return NULL;
        }
        else if ((!last || last == '\n') && (this == '-' && this == next &&
                 this == Tokenizer_READ(self, 2) &&
                 this == Tokenizer_READ(self, 3))) {
            if (Tokenizer_handle_hr(self))
                return NULL;
        }
        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
            if (Tokenizer_handle_dl_term(self))
                return NULL;
            // Kill potential table contexts
            if (this == '\n')
                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
        }

        // Start of table parsing
        else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) {
            if (Tokenizer_CAN_RECURSE(self)) {
                if (Tokenizer_parse_table(self))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
                return NULL;
            else
                self->head++;
        }
        else if (this_context & LC_TABLE_OPEN) {
            if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) {
                if (this_context & LC_TABLE_CELL_OPEN)
                    return Tokenizer_handle_table_cell_end(self, 0);
                else if (Tokenizer_handle_table_cell(self, "||", "td", LC_TABLE_TD_LINE))
                    return NULL;
            }
            else if (this == '-' && this == next &&
                     this == Tokenizer_READ(self, 2) &&
                     this == Tokenizer_READ(self, 3)) {
                if (Tokenizer_handle_hr(self))
            else if (this == '|' && next == '|' && this_context & LC_TABLE_TH_LINE) {
                if (this_context & LC_TABLE_CELL_OPEN)
                    return Tokenizer_handle_table_cell_end(self, 0);
                else if (Tokenizer_handle_table_cell(self, "||", "th", LC_TABLE_TH_LINE))
                    return NULL;
            }
            else if (this == '!' && next == '!' && this_context & LC_TABLE_TH_LINE) {
                if (this_context & LC_TABLE_CELL_OPEN)
                    return Tokenizer_handle_table_cell_end(self, 0);
                else if (Tokenizer_handle_table_cell(self, "!!", "th", LC_TABLE_TH_LINE))
                    return NULL;
            }
            else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) {
                return Tokenizer_handle_table_cell_end(self, 1);
            }
            // On newline, clear out cell line contexts
            else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) {
                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
                if (Tokenizer_emit_char(self, this))
                    return NULL;
            }
            else if (Tokenizer_has_leading_whitespace(self)) {
                if (this == '|' && next == '}') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    if (this_context & LC_TABLE_ROW_OPEN)
                        return Tokenizer_handle_table_row_end(self);
                    else
                        return Tokenizer_handle_table_end(self);
                }
                else if (this == '|' && next == '-') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    if (this_context & LC_TABLE_ROW_OPEN)
                        return Tokenizer_handle_table_row_end(self);
                    else if (Tokenizer_handle_table_row(self))
                        return NULL;
                }
                else if (this == '|') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    else if (Tokenizer_handle_table_cell(self, "|", "td", LC_TABLE_TD_LINE))
                        return NULL;
                }
                else if (this == '!') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    else if (Tokenizer_handle_table_cell(self, "!", "th", LC_TABLE_TH_LINE))
                        return NULL;
                }
                else if (Tokenizer_emit_char(self, this))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
            if (Tokenizer_handle_dl_term(self))
            // Raise BadRoute to table start
            if (BAD_ROUTE)
                return NULL;
        }
        else if (Tokenizer_emit_char(self, this))
@@ -2697,7 +3151,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
    PyObject *text, *temp, *tokens;
    int context = 0, skip_style_tags = 0;
    uint64_t context = 0;
    int skip_style_tags = 0;

    if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) {
        Py_XDECREF(self->text);
@@ -2725,7 +3180,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
    self->skip_style_tags = skip_style_tags;
    tokens = Tokenizer_parse(self, context, 1);

    if (!tokens && !PyErr_Occurred()) {
    if ((!tokens && !PyErr_Occurred()) || self->topstack) {
        if (!ParserError) {
            if (load_exceptions())
                return NULL;
@@ -2734,6 +3189,9 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
            RESET_ROUTE();
            PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE");
        }
        else if (self->topstack)
            PyErr_SetString(ParserError,
                            "C tokenizer exited with non-empty token stack");
        else
            PyErr_SetString(ParserError, "C tokenizer exited unexpectedly");
        return NULL;
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -29,6 +29,7 @@ SOFTWARE.
 #include <math.h>
 #include <structmember.h>
 #include <bytesobject.h>
 #include <stdint.h>

 #if PY_MAJOR_VERSION >= 3
 #define IS_PY3K
@@ -43,16 +44,17 @@ SOFTWARE.

 static const char MARKERS[] = {
    '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/',
    '-', '\n', '\0'};
    '-', '!', '\n', '\0'};

 #define NUM_MARKERS 18
 #define NUM_MARKERS 19
 #define TEXTBUFFER_BLOCKSIZE 1024
 #define MAX_DEPTH 40
 #define MAX_CYCLES 100000
 #define MAX_BRACES 255
 #define MAX_ENTITY_SIZE 8

 static int route_state = 0, route_context = 0;
 static int route_state = 0;
 static uint64_t route_context = 0;
 #define BAD_ROUTE            route_state
 #define BAD_ROUTE_CONTEXT    route_context
 #define FAIL_ROUTE(context)  route_state = 1; route_context = context
@@ -109,52 +111,61 @@ static PyObject* TagCloseClose;

 /* Local contexts: */

 #define LC_TEMPLATE             0x00000007
 #define LC_TEMPLATE_NAME        0x00000001
 #define LC_TEMPLATE_PARAM_KEY   0x00000002
 #define LC_TEMPLATE_PARAM_VALUE 0x00000004

 #define LC_ARGUMENT             0x00000018
 #define LC_ARGUMENT_NAME        0x00000008
 #define LC_ARGUMENT_DEFAULT     0x00000010

 #define LC_WIKILINK             0x00000060
 #define LC_WIKILINK_TITLE       0x00000020
 #define LC_WIKILINK_TEXT        0x00000040

 #define LC_EXT_LINK             0x00000180
 #define LC_EXT_LINK_URI         0x00000080
 #define LC_EXT_LINK_TITLE       0x00000100

 #define LC_HEADING              0x00007E00
 #define LC_HEADING_LEVEL_1      0x00000200
 #define LC_HEADING_LEVEL_2      0x00000400
 #define LC_HEADING_LEVEL_3      0x00000800
 #define LC_HEADING_LEVEL_4      0x00001000
 #define LC_HEADING_LEVEL_5      0x00002000
 #define LC_HEADING_LEVEL_6      0x00004000

 #define LC_TAG                  0x00078000
 #define LC_TAG_OPEN             0x00008000
 #define LC_TAG_ATTR             0x00010000
 #define LC_TAG_BODY             0x00020000
 #define LC_TAG_CLOSE            0x00040000

 #define LC_STYLE                0x00780000
 #define LC_STYLE_ITALICS        0x00080000
 #define LC_STYLE_BOLD           0x00100000
 #define LC_STYLE_PASS_AGAIN     0x00200000
 #define LC_STYLE_SECOND_PASS    0x00400000

 #define LC_DLTERM               0x00800000

 #define LC_SAFETY_CHECK         0x3F000000
 #define LC_HAS_TEXT             0x01000000
 #define LC_FAIL_ON_TEXT         0x02000000
 #define LC_FAIL_NEXT            0x04000000
 #define LC_FAIL_ON_LBRACE       0x08000000
 #define LC_FAIL_ON_RBRACE       0x10000000
 #define LC_FAIL_ON_EQUALS       0x20000000
 #define LC_TEMPLATE                 0x0000000000000007
 #define LC_TEMPLATE_NAME            0x0000000000000001
 #define LC_TEMPLATE_PARAM_KEY       0x0000000000000002
 #define LC_TEMPLATE_PARAM_VALUE     0x0000000000000004

 #define LC_ARGUMENT                 0x0000000000000018
 #define LC_ARGUMENT_NAME            0x0000000000000008
 #define LC_ARGUMENT_DEFAULT         0x0000000000000010

 #define LC_WIKILINK                 0x0000000000000060
 #define LC_WIKILINK_TITLE           0x0000000000000020
 #define LC_WIKILINK_TEXT            0x0000000000000040

 #define LC_EXT_LINK                 0x0000000000000180
 #define LC_EXT_LINK_URI             0x0000000000000080
 #define LC_EXT_LINK_TITLE           0x0000000000000100

 #define LC_HEADING                  0x0000000000007E00
 #define LC_HEADING_LEVEL_1          0x0000000000000200
 #define LC_HEADING_LEVEL_2          0x0000000000000400
 #define LC_HEADING_LEVEL_3          0x0000000000000800
 #define LC_HEADING_LEVEL_4          0x0000000000001000
 #define LC_HEADING_LEVEL_5          0x0000000000002000
 #define LC_HEADING_LEVEL_6          0x0000000000004000

 #define LC_TAG                      0x0000000000078000
 #define LC_TAG_OPEN                 0x0000000000008000
 #define LC_TAG_ATTR                 0x0000000000010000
 #define LC_TAG_BODY                 0x0000000000020000
 #define LC_TAG_CLOSE                0x0000000000040000

 #define LC_STYLE                    0x0000000000780000
 #define LC_STYLE_ITALICS            0x0000000000080000
 #define LC_STYLE_BOLD               0x0000000000100000
 #define LC_STYLE_PASS_AGAIN         0x0000000000200000
 #define LC_STYLE_SECOND_PASS        0x0000000000400000

 #define LC_DLTERM                   0x0000000000800000

 #define LC_SAFETY_CHECK             0x000000003F000000
 #define LC_HAS_TEXT                 0x0000000001000000
 #define LC_FAIL_ON_TEXT             0x0000000002000000
 #define LC_FAIL_NEXT                0x0000000004000000
 #define LC_FAIL_ON_LBRACE           0x0000000008000000
 #define LC_FAIL_ON_RBRACE           0x0000000010000000
 #define LC_FAIL_ON_EQUALS           0x0000000020000000

 #define LC_TABLE                    0x0000000FC0000000
 #define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000
 #define LC_TABLE_OPEN               0x0000000040000000
 #define LC_TABLE_CELL_OPEN          0x0000000080000000
 #define LC_TABLE_CELL_STYLE         0x0000000100000000
 #define LC_TABLE_ROW_OPEN           0x0000000200000000
 #define LC_TABLE_TD_LINE            0x0000000400000000
 #define LC_TABLE_TH_LINE            0x0000000800000000

 /* Global contexts: */

@@ -162,9 +173,9 @@ static PyObject* TagCloseClose;

 /* Aggregate contexts: */

 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE)
 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN)
 #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
 #define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK)

@@ -191,7 +202,7 @@ struct Textbuffer {

 struct Stack {
    PyObject* stack;
    int context;
    uint64_t context;
    struct Textbuffer* textbuffer;
    struct Stack* next;
 };
@@ -202,7 +213,7 @@ typedef struct {
 } HeadingData;

 typedef struct {
    int context;
    uint64_t context;
    struct Textbuffer* pad_first;
    struct Textbuffer* pad_before_eq;
    struct Textbuffer* pad_after_eq;
@@ -267,7 +278,7 @@ static int Tokenizer_parse_entity(Tokenizer*);
 static int Tokenizer_parse_comment(Tokenizer*);
 static int Tokenizer_handle_dl_term(Tokenizer*);
 static int Tokenizer_parse_tag(Tokenizer*);
 static PyObject* Tokenizer_parse(Tokenizer*, int, int);
 static PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int);
 static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);

 static int load_exceptions(void);
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -63,7 +63,7 @@ class Tokenizer(object):
    START = object()
    END = object()
    MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
               ":", "/", "-", "\n", START, END]
               ":", "/", "-", "!", "\n", START, END]
    MAX_DEPTH = 40
    MAX_CYCLES = 100000
    regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
@@ -991,12 +991,166 @@ class Tokenizer(object):
        else:
            self._emit_text("\n")

    def _emit_table_tag(self, open_open_markup, tag, style, padding,
                        close_open_markup, contents, open_close_markup):
        """Emit a table tag."""
        self._emit(tokens.TagOpenOpen(wiki_markup=open_open_markup))
        self._emit_text(tag)
        if style:
            self._emit_all(style)
        if close_open_markup:
            self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup,
                                           padding=padding))
        else:
            self._emit(tokens.TagCloseOpen(padding=padding))
        if contents:
            self._emit_all(contents)
        self._emit(tokens.TagOpenClose(wiki_markup=open_close_markup))
        self._emit_text(tag)
        self._emit(tokens.TagCloseClose())

    def _handle_table_style(self, end_token):
        """Handle style attributes for a table until ``end_token``."""
        data = _TagOpenData()
        data.context = _TagOpenData.CX_ATTR_READY
        while True:
            this = self._read()
            can_exit = (not data.context & data.CX_QUOTED or
                        data.context & data.CX_NOTE_SPACE)
            if this == end_token and can_exit:
                if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
                    self._push_tag_buffer(data)
                if this.isspace():
                    data.padding_buffer["first"] += this
                return data.padding_buffer["first"]
            elif this is self.END or this == end_token:
                if self._context & contexts.TAG_ATTR:
                    if data.context & data.CX_QUOTED:
                        # Unclosed attribute quote: reset, don't die
                        data.context = data.CX_ATTR_VALUE
                        self._pop()
                        self._head = data.reset
                        continue
                    self._pop()
                self._fail_route()
            else:
                self._handle_tag_data(data, this)
            self._head += 1

    def _parse_table(self):
        """Parse a wikicode table by starting with the first line."""
        reset = self._head + 1
        self._head += 2
        self._push(contexts.TABLE_OPEN)
        try:
            padding = self._handle_table_style("\n")
        except BadRoute:
            self._head = reset
            self._emit_text("{|")
            return
        style = self._pop()

        self._head += 1
        try:
            table = self._parse(contexts.TABLE_OPEN)
        except BadRoute:
            self._head = reset
            self._emit_text("{|")
            return

        self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
        # Offset displacement done by _parse():
        self._head -= 1

    def _handle_table_row(self):
        """Parse as style until end of the line, then continue."""
        self._head += 2
        if not self._can_recurse():
            self._emit_text("|-")
            self._head -= 1
            return

        self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
        try:
            padding = self._handle_table_style("\n")
        except BadRoute:
            self._pop()
            raise
        style = self._pop()

        # Don't parse the style separator:
        self._head += 1
        row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)

        self._emit_table_tag("|-", "tr", style, padding, None, row, "")
        # Offset displacement done by parse():
        self._head -= 1

    def _handle_table_cell(self, markup, tag, line_context):
        """Parse as normal syntax unless we hit a style marker, then parse
        style as HTML attributes and the remainder as normal syntax."""
        old_context = self._context
        padding, style = "", None
        self._head += len(markup)
        reset = self._head
        if not self._can_recurse():
            self._emit_text(markup)
            self._head -= 1
            return

        cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
                           line_context | contexts.TABLE_CELL_STYLE)
        cell_context = self._context
        self._context = old_context
        reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
        if reset_for_style:
            self._head = reset
            self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
                       line_context)
            padding = self._handle_table_style("|")
            style = self._pop()
            # Don't parse the style separator:
            self._head += 1
            cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN |
                               line_context)
            cell_context = self._context
            self._context = old_context

        close_open_markup = "|" if reset_for_style else None
        self._emit_table_tag(markup, tag, style, padding, close_open_markup,
                             cell, "")
        # Keep header/cell line contexts:
        self._context |= cell_context & (contexts.TABLE_TH_LINE |
                                         contexts.TABLE_TD_LINE)
        # Offset displacement done by parse():
        self._head -= 1

    def _handle_table_cell_end(self, reset_for_style=False):
        """Returns the current context, with the TABLE_CELL_STYLE flag set if
        it is necessary to reset and parse style attributes."""
        if reset_for_style:
            self._context |= contexts.TABLE_CELL_STYLE
        else:
            self._context &= ~contexts.TABLE_CELL_STYLE
        return self._pop(keep_context=True)

    def _handle_table_row_end(self):
        """Return the stack in order to handle the table row end."""
        return self._pop()

    def _handle_table_end(self):
        """Return the stack in order to handle the table end."""
        self._head += 2
        return self._pop()

    def _handle_end(self):
        """Handle the end of the stream of wikitext."""
        if self._context & contexts.FAIL:
            if self._context & contexts.TAG_BODY:
                if is_single(self._stack[1].text):
                    return self._handle_single_tag_end()
            if self._context & contexts.TABLE_CELL_OPEN:
                self._pop()
            if self._context & contexts.DOUBLE:
                self._pop()
            self._fail_route()
@@ -1144,15 +1298,68 @@ class Tokenizer(object):
                result = self._parse_style()
                if result is not None:
                    return result
            elif self._read(-1) in ("\n", self.START):
                if this in ("#", "*", ";", ":"):
            elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"):
                    self._handle_list()
                elif this == next == self._read(2) == self._read(3) == "-":
            elif self._read(-1) in ("\n", self.START) and this == next == self._read(2) == self._read(3) == "-":
                    self._handle_hr()
                else:
                    self._emit_text(this)
            elif this in ("\n", ":") and self._context & contexts.DL_TERM:
                self._handle_dl_term()
                if this == "\n":
                    # Kill potential table contexts
                    self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
            # Start of table parsing
            elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or
                    (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                if self._can_recurse():
                    self._parse_table()
                else:
                    self._emit_text("{|")
            elif self._context & contexts.TABLE_OPEN:
                if this == next == "|" and self._context & contexts.TABLE_TD_LINE:
                    if self._context & contexts.TABLE_CELL_OPEN:
                        return self._handle_table_cell_end()
                    self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE)
                elif this == next == "|" and self._context & contexts.TABLE_TH_LINE:
                    if self._context & contexts.TABLE_CELL_OPEN:
                        return self._handle_table_cell_end()
                    self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE)
                elif this == next == "!" and self._context & contexts.TABLE_TH_LINE:
                    if self._context & contexts.TABLE_CELL_OPEN:
                        return self._handle_table_cell_end()
                    self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE)
                elif this == "|" and self._context & contexts.TABLE_CELL_STYLE:
                    return self._handle_table_cell_end(reset_for_style=True)
                # on newline, clear out cell line contexts
                elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS:
                    self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
                    self._emit_text(this)
                elif (self._read(-1) in ("\n", self.START) or
                    (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
                    if this == "|" and next == "}":
                        if self._context & contexts.TABLE_CELL_OPEN:
                            return self._handle_table_cell_end()
                        if self._context & contexts.TABLE_ROW_OPEN:
                            return self._handle_table_row_end()
                        return self._handle_table_end()
                    elif this == "|" and next == "-":
                        if self._context & contexts.TABLE_CELL_OPEN:
                            return self._handle_table_cell_end()
                        if self._context & contexts.TABLE_ROW_OPEN:
                            return self._handle_table_row_end()
                        self._handle_table_row()
                    elif this == "|":
                        if self._context & contexts.TABLE_CELL_OPEN:
                            return self._handle_table_cell_end()
                        self._handle_table_cell("|", "td", contexts.TABLE_TD_LINE)
                    elif this == "!":
                        if self._context & contexts.TABLE_CELL_OPEN:
                            return self._handle_table_cell_end()
                        self._handle_table_cell("!", "th", contexts.TABLE_TH_LINE)
                    else:
                        self._emit_text(this)
                else:
                    self._emit_text(this)

            else:
                self._emit_text(this)
            self._head += 1
@@ -1164,6 +1371,10 @@ class Tokenizer(object):
        self._text = [segment for segment in split if segment]
        self._head = self._global = self._depth = self._cycles = 0
        try:
            return self._parse(context)
            tokens = self._parse(context)
        except BadRoute:  # pragma: no cover (untestable/exceptional case)
            raise ParserError("Python tokenizer exited with BadRoute")
        if self._stacks:  # pragma: no cover (untestable/exceptional case)
            err = "Python tokenizer exited with non-empty token stack"
            raise ParserError(err)
        return tokens
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -25,8 +25,9 @@ import codecs
 from os import listdir, path
 import sys

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.compat import py3k, str
 from mwparserfromhell.parser import tokens
 from mwparserfromhell.parser.builder import Builder

 class _TestParseError(Exception):
    """Raised internally when a test could not be parsed."""
@@ -50,8 +51,12 @@ class TokenizerTestCase(object):
        *label* for the method's docstring.
        """
        def inner(self):
            expected = data["output"]
            actual = self.tokenizer().tokenize(data["input"])
            if hasattr(self, "roundtrip"):
                expected = data["input"]
                actual = str(Builder().build(data["output"][:]))
            else:
                expected = data["output"]
                actual = self.tokenizer().tokenize(data["input"])
            self.assertEqual(expected, actual)
        if not py3k:
            inner.__name__ = funcname.encode("utf8")
--- a/tests/test_roundtripping.py
+++ b/tests/test_roundtripping.py
@@ -0,0 +1,41 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from ._test_tokenizer import TokenizerTestCase

 class TestRoundtripping(TokenizerTestCase, unittest.TestCase):
    """Test cases for roundtripping tokens back to wikitext."""

    @classmethod
    def setUpClass(cls):
        cls.roundtrip = True


 if __name__ == "__main__":
    unittest.main(verbosity=2)
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -226,6 +226,38 @@ class TestTag(TreeEqualityTestCase):
        self.assertWikicodeEqual(parsed, node.closing_tag)
        self.assertEqual("<ref>foobar</ref {{ignore me}}>", node)

    def test_wiki_style_separator(self):
        """test getter/setter for wiki_style_separator attribute"""
        node = Tag(wraptext("table"), wraptext("\n"))
        self.assertIs(None, node.wiki_style_separator)
        node.wiki_style_separator = "|"
        self.assertEqual("|", node.wiki_style_separator)
        node.wiki_markup = "{"
        self.assertEqual("{|\n{", node)
        node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|")
        self.assertEqual("|", node.wiki_style_separator)

    def test_closing_wiki_markup(self):
        """test getter/setter for closing_wiki_markup attribute"""
        node = Tag(wraptext("table"), wraptext("\n"))
        self.assertIs(None, node.closing_wiki_markup)
        node.wiki_markup = "{|"
        self.assertEqual("{|", node.closing_wiki_markup)
        node.closing_wiki_markup = "|}"
        self.assertEqual("|}", node.closing_wiki_markup)
        self.assertEqual("{|\n|}", node)
        node.wiki_markup = "!!"
        self.assertEqual("|}", node.closing_wiki_markup)
        self.assertEqual("!!\n|}", node)
        node.wiki_markup = False
        self.assertFalse(node.closing_wiki_markup)
        self.assertEqual("<table>\n</table>", node)
        node2 = Tag(wraptext("table"), wraptext("\n"),
                    attrs=[agen("id", "foo")], wiki_markup="{|",
                    closing_wiki_markup="|}")
        self.assertEqual("|}", node2.closing_wiki_markup)
        self.assertEqual('{| id="foo"\n|}', node2)

    def test_has(self):
        """test Tag.has()"""
        node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")])
--- a/tests/tokenizer/tables.mwtest
+++ b/tests/tokenizer/tables.mwtest
@@ -0,0 +1,410 @@
 name:   empty_table
 label:  parsing an empty table
 input:  "{|\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   inline_table
 label:  tables with a close on the same line are not valid
 input:  "{||}"
 output: [Text(text="{||}")]

 ---

 name:   no_table_close_simple
 label:  no table close on inline table
 input:  "{| "
 output: [Text(text="{| ")]

 ---

 name:   no_table_close_newline
 label:  no table close with a newline
 input:  "{| \n "
 output: [Text(text="{| \n ")]

 ---

 name:   no_table_close_inside_cell
 label:  no table close while inside of a cell
 input:  "{| \n| "
 output: [Text(text="{| \n| ")]

 ---

 name:   no_table_close_inside_cell_after_newline
 label:  no table close while inside of a cell after a newline
 input:  "{| \n| \n "
 output: [Text(text="{| \n| \n ")]

 ---

 name:   no_table_close_inside_cell_with_attributes
 label:  no table close while inside of a cell with attributes
 input:  "{| \n| red | test"
 output: [Text(text="{| \n| red | test")]

 ---

 name:   no_table_close_inside_row
 label:  no table close while inside of a row
 input:  "{| \n|- "
 output: [Text(text="{| \n|- ")]

 ---

 name:   no_table_close_inside_row_after_newline
 label:  no table close while inside of a row after a newline
 input:  "{| \n|- \n "
 output: [Text(text="{| \n|- \n ")]

 ---

 name:   no_table_close_row_and_cell
 label:  no table close while inside a cell inside a row
 input:  "{| \n|- \n|"
 output: [Text(text="{| \n|- \n|")]

 ---

 name:   no_table_close_attributes
 label:  don't parse attributes as attributes if the table doesn't exist
 input:  "{| border="1""
 output: [Text(text="{| border=\"1\"")]

 ---

 name:   no_table_close_unclosed_attributes
 label:  don't parse unclosed attributes if the table doesn't exist
 input:  "{| border="
 output: [Text(text="{| border=")]

 ---

 name:   no_table_close_row_attributes
 label:  don't parse row attributes as attributes if the table doesn't exist
 input:  "{| |- border="1""
 output: [Text(text="{| |- border=\"1\"")]

 ---

 name:   no_table_close_cell
 label:  don't parse cells if the table doesn't close
 input:  "{| | border="1"| test || red | foo"
 output: [Text(text="{| | border=\"1\"| test || red | foo")]

 ---

 name:   crazy_no_table_close
 label:  lots of opened wiki syntax without closes
 input:  "{{{ {{ {| <ref"
 output: [Text(text="{{{ {{ {| <ref")]

 ---

 name:   leading_whitespace_table
 label:  handle leading whitespace for a table
 input:  "foo \n    \t {|\n|}"
 output: [Text(text="foo \n    \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   whitespace_after_table
 label:  handle whitespace after a table close
 input:  "{|\n|}\n    \t "
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text="\n    \t ")]

 ---

 name:   different_whitespace_after_table
 label:  handle spaces after a table close
 input:  "{|\n|} \n  "
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n  ")]

 ---

 name:   characters_after_table
 label:  handle characters after a table close
 input:  "{|\n|} tsta"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")]

 ---

 name:   characters_after_inline_table
 label:  handle characters after an inline table close
 input:  "{| |} tsta"
 output: [Text(text="{| |} tsta")]

 ---

 name:   leading_characters_table
 label:  don't parse as a table when leading characters are not newline or whitespace
 input:  "foo \n  foo  \t {|\n|}"
 output: [Text(text="foo \n  foo  \t {|\n|}")]

 ---

 name:   table_row_simple
 label:  simple table row
 input:  "{|\n |- \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_row_multiple
 label:  simple table row
 input:  "{|\n |- \n|- \n   |-\n |}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), Text(text="   "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_simple
 label:  simple table cell
 input:  "{|\n | foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_inline
 label:  multiple inline table cells
 input:  "{|\n | foo || bar || test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_multiple
 label:  multiple table cells (non-inline)
 input:  "{|\n| foo \n| bar \n| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_header_simple
 label:  simple header cell
 input:  "{|\n ! foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_header_inline
 label:  multiple inline header cells
 input:  "{|\n ! foo || bar !! test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_header_multiple
 label:  multiple table header cells (non-inline)
 input:  "{|\n! foo \n! bar \n! test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   nested_cells_and_rows
 label:  combination of cells and rows in a table
 input:  "{|\n|- \n| foo \n|- \n| bar\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_fake_close
 label:  looks like a table close but is not
 input:  "{|\n | |} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text="} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_more_fake_close
 label:  looks like a table close but is not
 input:  "{|\n || |} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" |} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_extra_close
 label:  process second close as text
 input:  "{| \n |} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n|}")]

 ---

 name:   nowiki_inside_table
 label:  nowiki handles pipe characters in tables
 input:  "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_text_outside_cell
 label:  parse text inside table but outside of a cell
 input:  "{|\n bar \n | foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   no_table_cell_with_leading_characters
 label:  fail to create a table cell when there are leading non-whitespace characters
 input:  "{|\n bar | foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   no_table_row_with_leading_characters
 label:  fail to create a table row when there are leading non-whitespace characters
 input:  "{|\n bar |- foo \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   template_inside_table_cell
 label:  template within table cell
 input:  "{|\n |{{foo\n|bar=baz}} \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_attributes
 label:  parse table cell style attributes
 input:  "{| \n | name="foo bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_empty_attributes
 label:  parse table cell with style markers but no attributes
 input:  "{| \n | | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_with_dash
 label:  parse a situation in which a cell line looks like a row line
 input:  "{|\n ||- \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="- \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_attributes_quote_with_pipe
 label:  pipe inside an attribute quote should still be used as a style separator
 input:  "{| \n | name="foo|bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_attributes_name_with_pipe
 label:  pipe inside an attribute name should still be used as a style separator
 input:  "{| \n | name|="foo bar" | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="=\"foo bar\" | test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_attributes_pipe_after_equals
 label:  pipe inside an attribute should still be used as a style separator after an equals
 input:  "{| \n | name=|"foo|bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseOpen(wiki_markup="|", padding=""), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_cell_attributes_templates
 label:  pipe inside attributes shouldn't be style separator
 input:  "{| \n | {{comment|template=baz}} | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   header_cell_attributes
 label:  parse header cell style attributes
 input:  "{| \n ! name="foo bar"| test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   inline_cell_attributes
 label:  parse cell style attributes of inline cells
 input:  "{| \n ! name="foo bar" | test ||color="red"| markup!!foo | time \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" markup"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" time \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_row_attributes
 label:  parse table row style attributes
 input:  "{| \n |- name="foo bar"\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_row_attributes_crazy_whitespace
 label:  parse table row style attributes with different whitespace
 input:  "{| \t    \n |- \t   name="foo bar"  \t \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \t    \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t   ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="  \t \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   table_attributes
 label:  parse table style attributes
 input:  "{| name="foo bar"\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   inline_table_attributes
 label:  handle attributes in inline tables
 input:  "{| foo="tee bar" |}"
 output: [Text(text='{| foo="tee bar" |}')]

 ---

 name:   table_incorrect_attributes
 label:  parse incorrect table style attributes
 input:  "{| name="foo\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   templates_in_table_attribute
 label:  templates in the attributes of a table, after the start
 input:  "{| {{class}}="{{wikitable}}"\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="class"), TemplateClose(), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="wikitable"), TemplateClose(), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   templates_in_table_attribute_2
 label:  templates in the attributes of a table, after the start
 input:  "{|{{foo}} \n | name="foo bar" | test \n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   inappropriate_marker_at_line_start
 label:  an inappropriate marker (a right bracket) at the start of a line in the table
 input:  "{|\n}"
 output: [Text(text="{|\n}")]

 ---

 name:   fake_close_near_start
 label:  a fake closing token at the end of the first line in the table
 input:  "{| class="wikitable" style="text-align: center; width=100%;|}\n|\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"text-align:"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="center;"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="width"), TagAttrEquals(), Text(text="100%;|}"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   fake_close_near_start_2
 label:  a fake closing token at the end of the first line in the table
 input:  "{| class="wikitable|}"\n|\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable|}"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   junk_after_table_start
 label:  ignore more junk on the first line of the table
 input:  "{| class="wikitable" | foobar\n|\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="foobar"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

 ---

 name:   junk_after_table_row
 label:  ignore junk on the first line of a table row
 input:  "{|\n|- foo="bar" | baz\n|blerp\n|}"
 output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="bar"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="baz"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="blerp\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]
--- a/tests/tokenizer/tags_wikimarkup.mwtest
+++ b/tests/tokenizer/tags_wikimarkup.mwtest
@@ -447,6 +447,13 @@ output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Tag

 ---

 name:   dt_dd_mix4
 label:  another example of correct dt/dd usage, with a trigger for a specific parse route
 input:  ";foo]:bar"
 output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="foo]"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="bar")]

 ---

 name:   ul_ol_dt_dd_mix
 label:  an assortment of uls, ols, dds, and dts
 input:  ";:#*foo\n:#*;foo\n#*;:foo\n*;:#foo"