Implement CTokenizer for tables

CTokenizer is completely implemented in this commit - it didn't make much sense to me to split it up. All tests passing, memory test shows no leaks on Linux.
10 years ago · 0128b1f78a
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2454,6 +2454,399 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 }

 /*
    Parse until ``end_token`` as style attributes for a table.
 */
 static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
                                                int break_on_table_end)
 {
    TagData *data = TagData_new();
    PyObject *padding, *trash;
    Py_UNICODE this, next;
    int can_exit, table_end;

    if (!data)
        return NULL;
    data->context = TAG_ATTR_READY;

    while (1) {
        this = Tokenizer_READ(self, 0);
        next = Tokenizer_READ(self, 1);
        can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE);
        table_end = (break_on_table_end && this == '|' && next == '}');
        if ((this == end_token && can_exit) || table_end) {
            if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) {
                if (Tokenizer_push_tag_buffer(self, data)) {
                    TagData_dealloc(data);
                    return NULL;
                }
            }
            if (Py_UNICODE_ISSPACE(this))
                Textbuffer_write(&(data->pad_first), this);
            padding = Textbuffer_render(data->pad_first);
            TagData_dealloc(data);
            if (!padding)
                return NULL;
            return padding;
        }
        else if (!this || table_end || this == end_token) {
           if (self->topstack->context & LC_TAG_ATTR) {
                if (data->context & TAG_QUOTED) {
                    // Unclosed attribute quote: reset, don't die
                    data->context = TAG_ATTR_VALUE;
                    trash = Tokenizer_pop(self);
                    Py_XDECREF(trash);
                    self->head = data->reset;
                    continue;
                }
                trash = Tokenizer_pop(self);
                Py_XDECREF(trash);
            }
            TagData_dealloc(data);
            return Tokenizer_fail_route(self);
        }
        else {
            if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) {
                TagData_dealloc(data);
                return NULL;
            }
        }
        self->head++;
    }
 }

 /*
    Handle the start of a table.
 */
 static int Tokenizer_handle_table_start(Tokenizer* self)
 {
    self->head += 2;
    Py_ssize_t reset = self->head;
    PyObject *style, *open_open_kwargs, *close_open_kwargs, *open_close_kwargs,
             *padding, *newline_character, *open_wiki_markup, *close_wiki_markup;
    PyObject *table = NULL;

    if(Tokenizer_push(self, LC_TABLE_OPEN))
        return -1;
    padding = Tokenizer_parse_as_table_style(self, '\n', 1);
    if (BAD_ROUTE) {
        RESET_ROUTE();
        self->head = reset - 1;
        if (Tokenizer_emit_text(self, "{|"))
            return -1;
        return 0;
    }
    if (!padding)
        return -1;
    style = Tokenizer_pop(self);
    if (!style) {
        Py_DECREF(padding);
        return -1;
    }

    newline_character = PyUnicode_FromString("\n");
    if (!newline_character) {
        Py_DECREF(padding);
        Py_DECREF(style);
        return -1;
    }
    // continue to parse if it is NOT an inline table
    if (PyUnicode_Contains(padding, newline_character)) {
        Py_DECREF(newline_character);
        self->head++;
        table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
        if (BAD_ROUTE) {
            RESET_ROUTE();
            // offset displacement done by parse()
            self->head = reset - 1;
            if (Tokenizer_emit_text(self, "{|"))
                return -1;
            return 0;
        }
        if (!table) {
            Py_DECREF(padding);
            Py_DECREF(style);
            return -1;
        }
    } else {
        Py_DECREF(newline_character);
        // close tag
        self->head += 2;
    }

    open_open_kwargs = PyDict_New();
    if (!open_open_kwargs)
        goto fail_decref_all;
    open_wiki_markup = PyUnicode_FromString("{|");
    if (!open_wiki_markup) {
        Py_DECREF(open_open_kwargs);
        goto fail_decref_all;
    }
    PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_wiki_markup);
    Py_DECREF(open_wiki_markup);
    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
        goto fail_decref_all;
    if (Tokenizer_emit_text(self, "table"))
        goto fail_decref_all;

    if (style) {
        if (Tokenizer_emit_all(self, style))
            goto fail_decref_padding_table;
        Py_DECREF(style);
    }

    close_open_kwargs = PyDict_New();
    if (!close_open_kwargs)
        goto fail_decref_padding_table;
    PyDict_SetItemString(close_open_kwargs, "padding", padding);
    Py_DECREF(padding);
    if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs))
        goto fail_decref_table;

    if (table) {
        if (Tokenizer_emit_all(self, table))
            goto fail_decref_table;
        Py_DECREF(table);
    }

    open_close_kwargs = PyDict_New();
    if (!open_close_kwargs)
        return -1;
    close_wiki_markup = PyUnicode_FromString("|}");
    if (!close_wiki_markup) {
        Py_DECREF(open_close_kwargs);
        return -1;
    }
    PyDict_SetItemString(open_close_kwargs, "wiki_markup", close_wiki_markup);
    Py_DECREF(close_wiki_markup);
    if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
        return -1;
    if (Tokenizer_emit_text(self, "table"))
        return -1;
    if (Tokenizer_emit(self, TagCloseClose))
        return -1;
    // offset displacement done by _parse()
    self->head--;
    return 0;

    fail_decref_all:
    Py_DECREF(style);
    fail_decref_padding_table:
    Py_DECREF(padding);
    fail_decref_table:
    Py_XDECREF(table);
    return -1;
 }

 /*
    Return the stack in order to handle the table end.
 */
 static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
 {
    self->head += 2;
    return Tokenizer_pop(self);
 }

 /*
    Parse as style until end of the line, then continue.
 */
 static int Tokenizer_handle_table_row(Tokenizer* self)
 {
    Py_ssize_t reset = self->head;
    self->head += 2;
    PyObject *padding, *open_kwargs, *close_kwargs, *wiki_markup;
    PyObject *style = NULL;

    // If we can't recurse, still tokenize tag but parse style attrs as text
    if (Tokenizer_CAN_RECURSE(self)) {
        if(Tokenizer_push(self, LC_TABLE_OPEN))
            return -1;
        padding = Tokenizer_parse_as_table_style(self, '\n', 0);
        if (BAD_ROUTE) {
            self->head = reset;
            return 0;
        }
        if (!padding)
            return -1;
        style = Tokenizer_pop(self);
        if (!style) {
            Py_DECREF(padding);
            return -1;
        }
    } else {
        padding = PyUnicode_FromString("");
        if (!padding)
            return -1;
    }

    open_kwargs = PyDict_New();
    if (!open_kwargs)
        goto fail_decref_all;
    wiki_markup = PyUnicode_FromString("|-");
    if (!wiki_markup) {
        Py_DECREF(open_kwargs);
        goto fail_decref_all;
    }
    PyDict_SetItemString(open_kwargs, "wiki_markup", wiki_markup);
    Py_DECREF(wiki_markup);
    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
        goto fail_decref_all;
    if (Tokenizer_emit_text(self, "tr"))
        goto fail_decref_all;

    if (style) {
        if (Tokenizer_emit_all(self, style))
            goto fail_decref_all;
        Py_DECREF(style);
    }

    close_kwargs = PyDict_New();
    if (!close_kwargs)
        goto fail_decref_all;
    PyDict_SetItemString(close_kwargs, "padding", padding);
    Py_DECREF(padding);
    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
        return -1;
    return 0;

    fail_decref_all:
    Py_XDECREF(style);
    Py_DECREF(padding);
    return -1;
 }

 /*
    Parse as normal syntax unless we hit a style marker, then parse style
    as HTML attributes and the remainder as normal syntax.
 */
 static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
                                       const char *tag, uint64_t line_context)
 {
    if (!Tokenizer_CAN_RECURSE(self)) {
        if (Tokenizer_emit_text(self, markup))
            return -1;
        self->head += strlen(markup) - 1;
        return 0;
    }

    uint64_t old_context = self->topstack->context;
    uint64_t cell_context;
    Py_ssize_t reset = self->head;
    self->head += strlen(markup);
    PyObject *padding;
    PyObject *cell, *open_kwargs, *close_kwargs, *open_wiki_markup, *close_wiki_markup;
    PyObject *style = NULL;

    cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
    if (BAD_ROUTE) {
        self->head = reset;
        return 0;
    }
    if (!cell)
        return -1;
    cell_context = self->topstack->context;
    self->topstack->context = old_context;

    if (cell_context & LC_TABLE_CELL_STYLE) {
        Py_DECREF(cell);
        self->head = reset + strlen(markup);
        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context))
            return -1;
        padding = Tokenizer_parse_as_table_style(self, '|', 0);
        if (BAD_ROUTE) {
            self->head = reset;
            return 0;
        }
        if (!padding)
            return -1;
        style = Tokenizer_pop(self);
        if (!style) {
            Py_DECREF(padding);
            return -1;
        }
        // Don't parse the style separator
        self->head++;
        cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
        if (BAD_ROUTE) {
            self->head = reset;
            return 0;
        }
        if (!cell)
            return -1;
        cell_context = self->topstack->context;
        self->topstack->context = old_context;
    }
    else {
        padding = PyUnicode_FromString("");
        if (!padding) {
            Py_DECREF(cell);
            return -1;
        }
    }

    open_kwargs = PyDict_New();
    if (!open_kwargs)
        goto fail_decref_all;
    close_kwargs = PyDict_New();
    if (!close_kwargs)
        goto fail_decref_all;
    open_wiki_markup = PyUnicode_FromString(markup);
    if (!open_wiki_markup)
        goto fail_decref_all;
    PyDict_SetItemString(open_kwargs, "wiki_markup", open_wiki_markup);
    Py_DECREF(open_wiki_markup);
    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
        goto fail_decref_all;
    if (Tokenizer_emit_text(self, tag))
        goto fail_decref_all;

    if (style) {
        if (Tokenizer_emit_all(self, style))
            goto fail_decref_all;
        close_wiki_markup = PyUnicode_FromString("|");
        if (!close_wiki_markup)
            goto fail_decref_all;
        PyDict_SetItemString(close_kwargs, "wiki_markup", close_wiki_markup);
        Py_DECREF(close_wiki_markup);
        Py_DECREF(style);
    }

    PyDict_SetItemString(close_kwargs, "padding", padding);
    Py_DECREF(padding);
    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
        goto fail_decref_cell;
    if (Tokenizer_emit_all(self, cell))
        goto fail_decref_cell;
    Py_DECREF(cell);
    // keep header/cell line contexts
    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
    // offset displacement done by parse()
    self->head--;
    return 0;

    fail_decref_all:
    Py_XDECREF(style);
    Py_DECREF(padding);
    Py_XDECREF(open_kwargs);
    Py_XDECREF(close_kwargs);
    fail_decref_cell:
    Py_DECREF(cell);
    return -1;
 }

 /*
    Returns the context, stack, and whether to reset the cell for style
    in a tuple.
 */
 static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
 {
    if (reset_for_style)
        self->topstack->context |= LC_TABLE_CELL_STYLE;
    else
        self->topstack->context &= ~LC_TABLE_CELL_STYLE;
    return Tokenizer_pop_keeping_context(self);
 }

 /*
    Make sure we are not trying to write an invalid character. Return 0 if
    everything is safe, or -1 if the route must be failed.
 */
@@ -2533,6 +2926,24 @@ static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE d
 }

 /*
    Returns whether the current head has leading whitespace.
    TODO: treat comments and templates as whitespace, allow fail on non-newline spaces.
 */
 static int Tokenizer_has_leading_whitespace(Tokenizer* self)
 {
    int offset = 1;
    Py_UNICODE current_character;
    while (1) {
        current_character = Tokenizer_READ_BACKWARDS(self, offset);
        if (!current_character || current_character == '\n')
            return 1;
        else if (!Py_UNICODE_ISSPACE(current_character))
            return 0;
        offset++;
    }
 }

 /*
    Parse the wikicode string, using context for when to stop. If push is true,
    we will push a new context, otherwise we won't and context will be ignored.
 */
@@ -2667,24 +3078,94 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
            if (temp != Py_None)
                return temp;
        }
        else if (!last || last == '\n') {
            if (this == '#' || this == '*' || this == ';' || this == ':') {
                if (Tokenizer_handle_list(self))
        else if ((!last || last == '\n') && (this == '#' || this == '*' || this == ';' || this == ':')) {
            if (Tokenizer_handle_list(self))
                return NULL;
        }
        else if ((!last || last == '\n') && (this == '-' && this == next &&
                 this == Tokenizer_READ(self, 2) &&
                 this == Tokenizer_READ(self, 3))) {
            if (Tokenizer_handle_hr(self))
                return NULL;
        }
        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
            if (Tokenizer_handle_dl_term(self))
                return NULL;
            // kill potential table contexts
            if (this == '\n')
                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
        }

        // Start of table parsing
        else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) {
            if (Tokenizer_CAN_RECURSE(self)) {
                if (Tokenizer_handle_table_start(self))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
                return NULL;
            else
                self->head++;
        }
        else if (this_context & LC_TABLE_OPEN) {
            if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) {
                if (this_context & LC_TABLE_CELL_OPEN)
                    return Tokenizer_handle_table_cell_end(self, 0);
                else if (Tokenizer_handle_table_cell(self, "||", "td", LC_TABLE_TD_LINE))
                    return NULL;
            }
            else if (this == '|' && next == '|' && this_context & LC_TABLE_TH_LINE) {
                if (this_context & LC_TABLE_CELL_OPEN)
                    return Tokenizer_handle_table_cell_end(self, 0);
                else if (Tokenizer_handle_table_cell(self, "||", "th", LC_TABLE_TH_LINE))
                    return NULL;
            }
            else if (this == '-' && this == next &&
                     this == Tokenizer_READ(self, 2) &&
                     this == Tokenizer_READ(self, 3)) {
                if (Tokenizer_handle_hr(self))
            else if (this == '!' && next == '!' && this_context & LC_TABLE_TH_LINE) {
                if (this_context & LC_TABLE_CELL_OPEN)
                    return Tokenizer_handle_table_cell_end(self, 0);
                else if (Tokenizer_handle_table_cell(self, "!!", "th", LC_TABLE_TH_LINE))
                    return NULL;
            }
            else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) {
                return Tokenizer_handle_table_cell_end(self, 1);
            }
            // on newline, clear out cell line contexts
            else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) {
                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
                if (Tokenizer_emit_char(self, this))
                    return NULL;
            }
            else if (Tokenizer_has_leading_whitespace(self)) {
                if (this == '|' && next == '}') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    else
                        return Tokenizer_handle_table_end(self);
                }
                else if (this == '|' && next == '-') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    else if (Tokenizer_handle_table_row(self))
                        return NULL;
                }
                else if (this == '|') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    else if (Tokenizer_handle_table_cell(self, "|", "td", LC_TABLE_TD_LINE))
                        return NULL;
                }
                else if (this == '!') {
                    if (this_context & LC_TABLE_CELL_OPEN)
                        return Tokenizer_handle_table_cell_end(self, 0);
                    else if (Tokenizer_handle_table_cell(self, "!", "th", LC_TABLE_TH_LINE))
                        return NULL;
                }
                else if (Tokenizer_emit_char(self, this))
                    return NULL;
            }
            else if (Tokenizer_emit_char(self, this))
                return NULL;
        }
        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
            if (Tokenizer_handle_dl_term(self))
                return NULL;
        }
        else if (Tokenizer_emit_char(self, this))
            return NULL;
        self->head++;
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -44,9 +44,9 @@ SOFTWARE.

 static const char MARKERS[] = {
    '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/',
    '-', '\n', '\0'};
    '-', '!', '\n', '\0'};

 #define NUM_MARKERS 18
 #define NUM_MARKERS 19
 #define TEXTBUFFER_BLOCKSIZE 1024
 #define MAX_DEPTH 40
 #define MAX_CYCLES 100000
@@ -110,60 +110,68 @@ static PyObject* TagCloseClose;

 /* Local contexts: */

 #define LC_TEMPLATE             0x00000007
 #define LC_TEMPLATE_NAME        0x00000001
 #define LC_TEMPLATE_PARAM_KEY   0x00000002
 #define LC_TEMPLATE_PARAM_VALUE 0x00000004

 #define LC_ARGUMENT             0x00000018
 #define LC_ARGUMENT_NAME        0x00000008
 #define LC_ARGUMENT_DEFAULT     0x00000010

 #define LC_WIKILINK             0x00000060
 #define LC_WIKILINK_TITLE       0x00000020
 #define LC_WIKILINK_TEXT        0x00000040

 #define LC_EXT_LINK             0x00000180
 #define LC_EXT_LINK_URI         0x00000080
 #define LC_EXT_LINK_TITLE       0x00000100

 #define LC_HEADING              0x00007E00
 #define LC_HEADING_LEVEL_1      0x00000200
 #define LC_HEADING_LEVEL_2      0x00000400
 #define LC_HEADING_LEVEL_3      0x00000800
 #define LC_HEADING_LEVEL_4      0x00001000
 #define LC_HEADING_LEVEL_5      0x00002000
 #define LC_HEADING_LEVEL_6      0x00004000

 #define LC_TAG                  0x00078000
 #define LC_TAG_OPEN             0x00008000
 #define LC_TAG_ATTR             0x00010000
 #define LC_TAG_BODY             0x00020000
 #define LC_TAG_CLOSE            0x00040000

 #define LC_STYLE                0x00780000
 #define LC_STYLE_ITALICS        0x00080000
 #define LC_STYLE_BOLD           0x00100000
 #define LC_STYLE_PASS_AGAIN     0x00200000
 #define LC_STYLE_SECOND_PASS    0x00400000

 #define LC_DLTERM               0x00800000

 #define LC_SAFETY_CHECK         0x3F000000
 #define LC_HAS_TEXT             0x01000000
 #define LC_FAIL_ON_TEXT         0x02000000
 #define LC_FAIL_NEXT            0x04000000
 #define LC_FAIL_ON_LBRACE       0x08000000
 #define LC_FAIL_ON_RBRACE       0x10000000
 #define LC_FAIL_ON_EQUALS       0x20000000

 #define LC_TEMPLATE                 0x0000000000000007
 #define LC_TEMPLATE_NAME            0x0000000000000001
 #define LC_TEMPLATE_PARAM_KEY       0x0000000000000002
 #define LC_TEMPLATE_PARAM_VALUE     0x0000000000000004

 #define LC_ARGUMENT                 0x0000000000000018
 #define LC_ARGUMENT_NAME            0x0000000000000008
 #define LC_ARGUMENT_DEFAULT         0x0000000000000010

 #define LC_WIKILINK                 0x0000000000000060
 #define LC_WIKILINK_TITLE           0x0000000000000020
 #define LC_WIKILINK_TEXT            0x0000000000000040

 #define LC_EXT_LINK                 0x0000000000000180
 #define LC_EXT_LINK_URI             0x0000000000000080
 #define LC_EXT_LINK_TITLE           0x0000000000000100

 #define LC_HEADING                  0x0000000000007E00
 #define LC_HEADING_LEVEL_1          0x0000000000000200
 #define LC_HEADING_LEVEL_2          0x0000000000000400
 #define LC_HEADING_LEVEL_3          0x0000000000000800
 #define LC_HEADING_LEVEL_4          0x0000000000001000
 #define LC_HEADING_LEVEL_5          0x0000000000002000
 #define LC_HEADING_LEVEL_6          0x0000000000004000

 #define LC_TAG                      0x0000000000078000
 #define LC_TAG_OPEN                 0x0000000000008000
 #define LC_TAG_ATTR                 0x0000000000010000
 #define LC_TAG_BODY                 0x0000000000020000
 #define LC_TAG_CLOSE                0x0000000000040000

 #define LC_STYLE                    0x0000000000780000
 #define LC_STYLE_ITALICS            0x0000000000080000
 #define LC_STYLE_BOLD               0x0000000000100000
 #define LC_STYLE_PASS_AGAIN         0x0000000000200000
 #define LC_STYLE_SECOND_PASS        0x0000000000400000

 #define LC_DLTERM                   0x0000000000800000

 #define LC_SAFETY_CHECK             0x000000003F000000
 #define LC_HAS_TEXT                 0x0000000001000000
 #define LC_FAIL_ON_TEXT             0x0000000002000000
 #define LC_FAIL_NEXT                0x0000000004000000
 #define LC_FAIL_ON_LBRACE           0x0000000008000000
 #define LC_FAIL_ON_RBRACE           0x0000000010000000
 #define LC_FAIL_ON_EQUALS           0x0000000020000000

 // TODO realign all
 #define LC_TABLE                    0x00000007C0000000
 #define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000700000000
 #define LC_TABLE_OPEN               0x0000000040000000
 #define LC_TABLE_CELL_OPEN          0x0000000080000000
 #define LC_TABLE_CELL_STYLE         0x0000000100000000
 #define LC_TABLE_TD_LINE            0x0000000200000000
 #define LC_TABLE_TH_LINE            0x0000000400000000
 /* Global contexts: */

 #define GL_HEADING 0x1

 /* Aggregate contexts: */

 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE)
 #define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1134,7 +1134,7 @@ class Tokenizer(object):
        self._emit_all(cell)
        # keep header/cell line contexts
        self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
        # offset displacement done by _parse()
        # offset displacement done by parse()
        self._head -= 1

    def _handle_table_cell_end(self, reset_for_style=False):