From 0128b1f78a346dbe774800bd17b1b0f92bb9ca30 Mon Sep 17 00:00:00 2001
From: David Winegar <david.s.winegar@gmail.com>
Date: Fri, 18 Jul 2014 17:41:24 -0700
Subject: [PATCH] Implement CTokenizer for tables

CTokenizer is completely implemented in this commit - it didn't
make much sense to me to split it up. All tests passing, memory test
shows no leaks on Linux.
---
 mwparserfromhell/parser/tokenizer.c  | 503 ++++++++++++++++++++++++++++++++++-
 mwparserfromhell/parser/tokenizer.h  | 108 ++++----
 mwparserfromhell/parser/tokenizer.py |   2 +-
 3 files changed, 551 insertions(+), 62 deletions(-)

diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 90f51b0..1d2964e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2454,6 +2454,399 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context)
 }
 
 /*
+    Parse until ``end_token`` as style attributes for a table.
+*/
+static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token,
+                                                int break_on_table_end)
+{
+    TagData *data = TagData_new();
+    PyObject *padding, *trash;
+    Py_UNICODE this, next;
+    int can_exit, table_end;
+
+    if (!data)
+        return NULL;
+    data->context = TAG_ATTR_READY;
+
+    while (1) {
+        this = Tokenizer_READ(self, 0);
+        next = Tokenizer_READ(self, 1);
+        can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE);
+        table_end = (break_on_table_end && this == '|' && next == '}');
+        if ((this == end_token && can_exit) || table_end) {
+            if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) {
+                if (Tokenizer_push_tag_buffer(self, data)) {
+                    TagData_dealloc(data);
+                    return NULL;
+                }
+            }
+            if (Py_UNICODE_ISSPACE(this))
+                Textbuffer_write(&(data->pad_first), this);
+            padding = Textbuffer_render(data->pad_first);
+            TagData_dealloc(data);
+            if (!padding)
+                return NULL;
+            return padding;
+        }
+        else if (!this || table_end || this == end_token) {
+           if (self->topstack->context & LC_TAG_ATTR) {
+                if (data->context & TAG_QUOTED) {
+                    // Unclosed attribute quote: reset, don't die
+                    data->context = TAG_ATTR_VALUE;
+                    trash = Tokenizer_pop(self);
+                    Py_XDECREF(trash);
+                    self->head = data->reset;
+                    continue;
+                }
+                trash = Tokenizer_pop(self);
+                Py_XDECREF(trash);
+            }
+            TagData_dealloc(data);
+            return Tokenizer_fail_route(self);
+        }
+        else {
+            if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) {
+                TagData_dealloc(data);
+                return NULL;
+            }
+        }
+        self->head++;
+    }
+}
+
+/*
+    Handle the start of a table.
+*/
+static int Tokenizer_handle_table_start(Tokenizer* self)
+{
+    self->head += 2;
+    Py_ssize_t reset = self->head;
+    PyObject *style, *open_open_kwargs, *close_open_kwargs, *open_close_kwargs,
+             *padding, *newline_character, *open_wiki_markup, *close_wiki_markup;
+    PyObject *table = NULL;
+
+    if(Tokenizer_push(self, LC_TABLE_OPEN))
+        return -1;
+    padding = Tokenizer_parse_as_table_style(self, '\n', 1);
+    if (BAD_ROUTE) {
+        RESET_ROUTE();
+        self->head = reset - 1;
+        if (Tokenizer_emit_text(self, "{|"))
+            return -1;
+        return 0;
+    }
+    if (!padding)
+        return -1;
+    style = Tokenizer_pop(self);
+    if (!style) {
+        Py_DECREF(padding);
+        return -1;
+    }
+
+    newline_character = PyUnicode_FromString("\n");
+    if (!newline_character) {
+        Py_DECREF(padding);
+        Py_DECREF(style);
+        return -1;
+    }
+    // continue to parse if it is NOT an inline table
+    if (PyUnicode_Contains(padding, newline_character)) {
+        Py_DECREF(newline_character);
+        self->head++;
+        table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
+        if (BAD_ROUTE) {
+            RESET_ROUTE();
+            // offset displacement done by parse()
+            self->head = reset - 1;
+            if (Tokenizer_emit_text(self, "{|"))
+                return -1;
+            return 0;
+        }
+        if (!table) {
+            Py_DECREF(padding);
+            Py_DECREF(style);
+            return -1;
+        }
+    } else {
+        Py_DECREF(newline_character);
+        // close tag
+        self->head += 2;
+    }
+
+    open_open_kwargs = PyDict_New();
+    if (!open_open_kwargs)
+        goto fail_decref_all;
+    open_wiki_markup = PyUnicode_FromString("{|");
+    if (!open_wiki_markup) {
+        Py_DECREF(open_open_kwargs);
+        goto fail_decref_all;
+    }
+    PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_wiki_markup);
+    Py_DECREF(open_wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, "table"))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_padding_table;
+        Py_DECREF(style);
+    }
+
+    close_open_kwargs = PyDict_New();
+    if (!close_open_kwargs)
+        goto fail_decref_padding_table;
+    PyDict_SetItemString(close_open_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs))
+        goto fail_decref_table;
+
+    if (table) {
+        if (Tokenizer_emit_all(self, table))
+            goto fail_decref_table;
+        Py_DECREF(table);
+    }
+
+    open_close_kwargs = PyDict_New();
+    if (!open_close_kwargs)
+        return -1;
+    close_wiki_markup = PyUnicode_FromString("|}");
+    if (!close_wiki_markup) {
+        Py_DECREF(open_close_kwargs);
+        return -1;
+    }
+    PyDict_SetItemString(open_close_kwargs, "wiki_markup", close_wiki_markup);
+    Py_DECREF(close_wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs))
+        return -1;
+    if (Tokenizer_emit_text(self, "table"))
+        return -1;
+    if (Tokenizer_emit(self, TagCloseClose))
+        return -1;
+    // offset displacement done by _parse()
+    self->head--;
+    return 0;
+
+    fail_decref_all:
+    Py_DECREF(style);
+    fail_decref_padding_table:
+    Py_DECREF(padding);
+    fail_decref_table:
+    Py_XDECREF(table);
+    return -1;
+}
+
+/*
+    Return the stack in order to handle the table end.
+*/
+static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
+{
+    self->head += 2;
+    return Tokenizer_pop(self);
+}
+
+/*
+    Parse as style until end of the line, then continue.
+*/
+static int Tokenizer_handle_table_row(Tokenizer* self)
+{
+    Py_ssize_t reset = self->head;
+    self->head += 2;
+    PyObject *padding, *open_kwargs, *close_kwargs, *wiki_markup;
+    PyObject *style = NULL;
+
+    // If we can't recurse, still tokenize tag but parse style attrs as text
+    if (Tokenizer_CAN_RECURSE(self)) {
+        if(Tokenizer_push(self, LC_TABLE_OPEN))
+            return -1;
+        padding = Tokenizer_parse_as_table_style(self, '\n', 0);
+        if (BAD_ROUTE) {
+            self->head = reset;
+            return 0;
+        }
+        if (!padding)
+            return -1;
+        style = Tokenizer_pop(self);
+        if (!style) {
+            Py_DECREF(padding);
+            return -1;
+        }
+    } else {
+        padding = PyUnicode_FromString("");
+        if (!padding)
+            return -1;
+    }
+
+    open_kwargs = PyDict_New();
+    if (!open_kwargs)
+        goto fail_decref_all;
+    wiki_markup = PyUnicode_FromString("|-");
+    if (!wiki_markup) {
+        Py_DECREF(open_kwargs);
+        goto fail_decref_all;
+    }
+    PyDict_SetItemString(open_kwargs, "wiki_markup", wiki_markup);
+    Py_DECREF(wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, "tr"))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_all;
+        Py_DECREF(style);
+    }
+
+    close_kwargs = PyDict_New();
+    if (!close_kwargs)
+        goto fail_decref_all;
+    PyDict_SetItemString(close_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
+        return -1;
+    return 0;
+
+    fail_decref_all:
+    Py_XDECREF(style);
+    Py_DECREF(padding);
+    return -1;
+}
+
+/*
+    Parse as normal syntax unless we hit a style marker, then parse style
+    as HTML attributes and the remainder as normal syntax.
+*/
+static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
+                                       const char *tag, uint64_t line_context)
+{
+    if (!Tokenizer_CAN_RECURSE(self)) {
+        if (Tokenizer_emit_text(self, markup))
+            return -1;
+        self->head += strlen(markup) - 1;
+        return 0;
+    }
+
+    uint64_t old_context = self->topstack->context;
+    uint64_t cell_context;
+    Py_ssize_t reset = self->head;
+    self->head += strlen(markup);
+    PyObject *padding;
+    PyObject *cell, *open_kwargs, *close_kwargs, *open_wiki_markup, *close_wiki_markup;
+    PyObject *style = NULL;
+
+    cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
+    if (BAD_ROUTE) {
+        self->head = reset;
+        return 0;
+    }
+    if (!cell)
+        return -1;
+    cell_context = self->topstack->context;
+    self->topstack->context = old_context;
+
+    if (cell_context & LC_TABLE_CELL_STYLE) {
+        Py_DECREF(cell);
+        self->head = reset + strlen(markup);
+        if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context))
+            return -1;
+        padding = Tokenizer_parse_as_table_style(self, '|', 0);
+        if (BAD_ROUTE) {
+            self->head = reset;
+            return 0;
+        }
+        if (!padding)
+            return -1;
+        style = Tokenizer_pop(self);
+        if (!style) {
+            Py_DECREF(padding);
+            return -1;
+        }
+        // Don't parse the style separator
+        self->head++;
+        cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
+        if (BAD_ROUTE) {
+            self->head = reset;
+            return 0;
+        }
+        if (!cell)
+            return -1;
+        cell_context = self->topstack->context;
+        self->topstack->context = old_context;
+    }
+    else {
+        padding = PyUnicode_FromString("");
+        if (!padding) {
+            Py_DECREF(cell);
+            return -1;
+        }
+    }
+
+    open_kwargs = PyDict_New();
+    if (!open_kwargs)
+        goto fail_decref_all;
+    close_kwargs = PyDict_New();
+    if (!close_kwargs)
+        goto fail_decref_all;
+    open_wiki_markup = PyUnicode_FromString(markup);
+    if (!open_wiki_markup)
+        goto fail_decref_all;
+    PyDict_SetItemString(open_kwargs, "wiki_markup", open_wiki_markup);
+    Py_DECREF(open_wiki_markup);
+    if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs))
+        goto fail_decref_all;
+    if (Tokenizer_emit_text(self, tag))
+        goto fail_decref_all;
+
+    if (style) {
+        if (Tokenizer_emit_all(self, style))
+            goto fail_decref_all;
+        close_wiki_markup = PyUnicode_FromString("|");
+        if (!close_wiki_markup)
+            goto fail_decref_all;
+        PyDict_SetItemString(close_kwargs, "wiki_markup", close_wiki_markup);
+        Py_DECREF(close_wiki_markup);
+        Py_DECREF(style);
+    }
+
+    PyDict_SetItemString(close_kwargs, "padding", padding);
+    Py_DECREF(padding);
+    if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs))
+        goto fail_decref_cell;
+    if (Tokenizer_emit_all(self, cell))
+        goto fail_decref_cell;
+    Py_DECREF(cell);
+    // keep header/cell line contexts
+    self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE);
+    // offset displacement done by parse()
+    self->head--;
+    return 0;
+
+    fail_decref_all:
+    Py_XDECREF(style);
+    Py_DECREF(padding);
+    Py_XDECREF(open_kwargs);
+    Py_XDECREF(close_kwargs);
+    fail_decref_cell:
+    Py_DECREF(cell);
+    return -1;
+}
+
+/*
+    Returns the context, stack, and whether to reset the cell for style
+    in a tuple.
+*/
+static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style)
+{
+    if (reset_for_style)
+        self->topstack->context |= LC_TABLE_CELL_STYLE;
+    else
+        self->topstack->context &= ~LC_TABLE_CELL_STYLE;
+    return Tokenizer_pop_keeping_context(self);
+}
+
+/*
     Make sure we are not trying to write an invalid character. Return 0 if
     everything is safe, or -1 if the route must be failed.
 */
@@ -2533,6 +2926,24 @@ static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE d
 }
 
 /*
+    Returns whether the current head has leading whitespace.
+    TODO: treat comments and templates as whitespace, allow fail on non-newline spaces.
+*/
+static int Tokenizer_has_leading_whitespace(Tokenizer* self)
+{
+    int offset = 1;
+    Py_UNICODE current_character;
+    while (1) {
+        current_character = Tokenizer_READ_BACKWARDS(self, offset);
+        if (!current_character || current_character == '\n')
+            return 1;
+        else if (!Py_UNICODE_ISSPACE(current_character))
+            return 0;
+        offset++;
+    }
+}
+
+/*
     Parse the wikicode string, using context for when to stop. If push is true,
     we will push a new context, otherwise we won't and context will be ignored.
 */
@@ -2667,24 +3078,94 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
             if (temp != Py_None)
                 return temp;
         }
-        else if (!last || last == '\n') {
-            if (this == '#' || this == '*' || this == ';' || this == ':') {
-                if (Tokenizer_handle_list(self))
+        else if ((!last || last == '\n') && (this == '#' || this == '*' || this == ';' || this == ':')) {
+            if (Tokenizer_handle_list(self))
+                return NULL;
+        }
+        else if ((!last || last == '\n') && (this == '-' && this == next &&
+                 this == Tokenizer_READ(self, 2) &&
+                 this == Tokenizer_READ(self, 3))) {
+            if (Tokenizer_handle_hr(self))
+                return NULL;
+        }
+        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
+            if (Tokenizer_handle_dl_term(self))
+                return NULL;
+            // kill potential table contexts
+            if (this == '\n')
+                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
+        }
+
+        // Start of table parsing
+        else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) {
+            if (Tokenizer_CAN_RECURSE(self)) {
+                if (Tokenizer_handle_table_start(self))
+                    return NULL;
+            }
+            else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
+                return NULL;
+            else
+                self->head++;
+        }
+        else if (this_context & LC_TABLE_OPEN) {
+            if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) {
+                if (this_context & LC_TABLE_CELL_OPEN)
+                    return Tokenizer_handle_table_cell_end(self, 0);
+                else if (Tokenizer_handle_table_cell(self, "||", "td", LC_TABLE_TD_LINE))
+                    return NULL;
+            }
+            else if (this == '|' && next == '|' && this_context & LC_TABLE_TH_LINE) {
+                if (this_context & LC_TABLE_CELL_OPEN)
+                    return Tokenizer_handle_table_cell_end(self, 0);
+                else if (Tokenizer_handle_table_cell(self, "||", "th", LC_TABLE_TH_LINE))
                     return NULL;
             }
-            else if (this == '-' && this == next &&
-                     this == Tokenizer_READ(self, 2) &&
-                     this == Tokenizer_READ(self, 3)) {
-                if (Tokenizer_handle_hr(self))
+            else if (this == '!' && next == '!' && this_context & LC_TABLE_TH_LINE) {
+                if (this_context & LC_TABLE_CELL_OPEN)
+                    return Tokenizer_handle_table_cell_end(self, 0);
+                else if (Tokenizer_handle_table_cell(self, "!!", "th", LC_TABLE_TH_LINE))
+                    return NULL;
+            }
+            else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) {
+                return Tokenizer_handle_table_cell_end(self, 1);
+            }
+            // on newline, clear out cell line contexts
+            else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) {
+                self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS;
+                if (Tokenizer_emit_char(self, this))
+                    return NULL;
+            }
+            else if (Tokenizer_has_leading_whitespace(self)) {
+                if (this == '|' && next == '}') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else
+                        return Tokenizer_handle_table_end(self);
+                }
+                else if (this == '|' && next == '-') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else if (Tokenizer_handle_table_row(self))
+                        return NULL;
+                }
+                else if (this == '|') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else if (Tokenizer_handle_table_cell(self, "|", "td", LC_TABLE_TD_LINE))
+                        return NULL;
+                }
+                else if (this == '!') {
+                    if (this_context & LC_TABLE_CELL_OPEN)
+                        return Tokenizer_handle_table_cell_end(self, 0);
+                    else if (Tokenizer_handle_table_cell(self, "!", "th", LC_TABLE_TH_LINE))
+                        return NULL;
+                }
+                else if (Tokenizer_emit_char(self, this))
                     return NULL;
             }
             else if (Tokenizer_emit_char(self, this))
                 return NULL;
         }
-        else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) {
-            if (Tokenizer_handle_dl_term(self))
-                return NULL;
-        }
         else if (Tokenizer_emit_char(self, this))
             return NULL;
         self->head++;
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index e9b1a92..de7b7d4 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -44,9 +44,9 @@ SOFTWARE.
 
 static const char MARKERS[] = {
     '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/',
-    '-', '\n', '\0'};
+    '-', '!', '\n', '\0'};
 
-#define NUM_MARKERS 18
+#define NUM_MARKERS 19
 #define TEXTBUFFER_BLOCKSIZE 1024
 #define MAX_DEPTH 40
 #define MAX_CYCLES 100000
@@ -110,60 +110,68 @@ static PyObject* TagCloseClose;
 
 /* Local contexts: */
 
-#define LC_TEMPLATE             0x00000007
-#define LC_TEMPLATE_NAME        0x00000001
-#define LC_TEMPLATE_PARAM_KEY   0x00000002
-#define LC_TEMPLATE_PARAM_VALUE 0x00000004
-
-#define LC_ARGUMENT             0x00000018
-#define LC_ARGUMENT_NAME        0x00000008
-#define LC_ARGUMENT_DEFAULT     0x00000010
-
-#define LC_WIKILINK             0x00000060
-#define LC_WIKILINK_TITLE       0x00000020
-#define LC_WIKILINK_TEXT        0x00000040
-
-#define LC_EXT_LINK             0x00000180
-#define LC_EXT_LINK_URI         0x00000080
-#define LC_EXT_LINK_TITLE       0x00000100
-
-#define LC_HEADING              0x00007E00
-#define LC_HEADING_LEVEL_1      0x00000200
-#define LC_HEADING_LEVEL_2      0x00000400
-#define LC_HEADING_LEVEL_3      0x00000800
-#define LC_HEADING_LEVEL_4      0x00001000
-#define LC_HEADING_LEVEL_5      0x00002000
-#define LC_HEADING_LEVEL_6      0x00004000
-
-#define LC_TAG                  0x00078000
-#define LC_TAG_OPEN             0x00008000
-#define LC_TAG_ATTR             0x00010000
-#define LC_TAG_BODY             0x00020000
-#define LC_TAG_CLOSE            0x00040000
-
-#define LC_STYLE                0x00780000
-#define LC_STYLE_ITALICS        0x00080000
-#define LC_STYLE_BOLD           0x00100000
-#define LC_STYLE_PASS_AGAIN     0x00200000
-#define LC_STYLE_SECOND_PASS    0x00400000
-
-#define LC_DLTERM               0x00800000
-
-#define LC_SAFETY_CHECK         0x3F000000
-#define LC_HAS_TEXT             0x01000000
-#define LC_FAIL_ON_TEXT         0x02000000
-#define LC_FAIL_NEXT            0x04000000
-#define LC_FAIL_ON_LBRACE       0x08000000
-#define LC_FAIL_ON_RBRACE       0x10000000
-#define LC_FAIL_ON_EQUALS       0x20000000
-
+#define LC_TEMPLATE                 0x0000000000000007
+#define LC_TEMPLATE_NAME            0x0000000000000001
+#define LC_TEMPLATE_PARAM_KEY       0x0000000000000002
+#define LC_TEMPLATE_PARAM_VALUE     0x0000000000000004
+
+#define LC_ARGUMENT                 0x0000000000000018
+#define LC_ARGUMENT_NAME            0x0000000000000008
+#define LC_ARGUMENT_DEFAULT         0x0000000000000010
+
+#define LC_WIKILINK                 0x0000000000000060
+#define LC_WIKILINK_TITLE           0x0000000000000020
+#define LC_WIKILINK_TEXT            0x0000000000000040
+
+#define LC_EXT_LINK                 0x0000000000000180
+#define LC_EXT_LINK_URI             0x0000000000000080
+#define LC_EXT_LINK_TITLE           0x0000000000000100
+
+#define LC_HEADING                  0x0000000000007E00
+#define LC_HEADING_LEVEL_1          0x0000000000000200
+#define LC_HEADING_LEVEL_2          0x0000000000000400
+#define LC_HEADING_LEVEL_3          0x0000000000000800
+#define LC_HEADING_LEVEL_4          0x0000000000001000
+#define LC_HEADING_LEVEL_5          0x0000000000002000
+#define LC_HEADING_LEVEL_6          0x0000000000004000
+
+#define LC_TAG                      0x0000000000078000
+#define LC_TAG_OPEN                 0x0000000000008000
+#define LC_TAG_ATTR                 0x0000000000010000
+#define LC_TAG_BODY                 0x0000000000020000
+#define LC_TAG_CLOSE                0x0000000000040000
+
+#define LC_STYLE                    0x0000000000780000
+#define LC_STYLE_ITALICS            0x0000000000080000
+#define LC_STYLE_BOLD               0x0000000000100000
+#define LC_STYLE_PASS_AGAIN         0x0000000000200000
+#define LC_STYLE_SECOND_PASS        0x0000000000400000
+
+#define LC_DLTERM                   0x0000000000800000
+
+#define LC_SAFETY_CHECK             0x000000003F000000
+#define LC_HAS_TEXT                 0x0000000001000000
+#define LC_FAIL_ON_TEXT             0x0000000002000000
+#define LC_FAIL_NEXT                0x0000000004000000
+#define LC_FAIL_ON_LBRACE           0x0000000008000000
+#define LC_FAIL_ON_RBRACE           0x0000000010000000
+#define LC_FAIL_ON_EQUALS           0x0000000020000000
+
+// TODO realign all
+#define LC_TABLE                    0x00000007C0000000
+#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000700000000
+#define LC_TABLE_OPEN               0x0000000040000000
+#define LC_TABLE_CELL_OPEN          0x0000000080000000
+#define LC_TABLE_CELL_STYLE         0x0000000100000000
+#define LC_TABLE_TD_LINE            0x0000000200000000
+#define LC_TABLE_TH_LINE            0x0000000400000000
 /* Global contexts: */
 
 #define GL_HEADING 0x1
 
 /* Aggregate contexts: */
 
-#define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE)
+#define AGG_FAIL         (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN)
 #define AGG_UNSAFE       (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
 #define AGG_DOUBLE       (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
 #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 9e22b28..e8f21c0 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1134,7 +1134,7 @@ class Tokenizer(object):
         self._emit_all(cell)
         # keep header/cell line contexts
         self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE)
-        # offset displacement done by _parse()
+        # offset displacement done by parse()
         self._head -= 1
 
     def _handle_table_cell_end(self, reset_for_style=False):