From 0128b1f78a346dbe774800bd17b1b0f92bb9ca30 Mon Sep 17 00:00:00 2001 From: David Winegar Date: Fri, 18 Jul 2014 17:41:24 -0700 Subject: [PATCH] Implement CTokenizer for tables CTokenizer is completely implemented in this commit - it didn't make much sense to me to split it up. All tests passing, memory test shows no leaks on Linux. --- mwparserfromhell/parser/tokenizer.c | 503 ++++++++++++++++++++++++++++++++++- mwparserfromhell/parser/tokenizer.h | 108 ++++---- mwparserfromhell/parser/tokenizer.py | 2 +- 3 files changed, 551 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 90f51b0..1d2964e 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -2454,6 +2454,399 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) } /* + Parse until ``end_token`` as style attributes for a table. +*/ +static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token, + int break_on_table_end) +{ + TagData *data = TagData_new(); + PyObject *padding, *trash; + Py_UNICODE this, next; + int can_exit, table_end; + + if (!data) + return NULL; + data->context = TAG_ATTR_READY; + + while (1) { + this = Tokenizer_READ(self, 0); + next = Tokenizer_READ(self, 1); + can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE); + table_end = (break_on_table_end && this == '|' && next == '}'); + if ((this == end_token && can_exit) || table_end) { + if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) { + if (Tokenizer_push_tag_buffer(self, data)) { + TagData_dealloc(data); + return NULL; + } + } + if (Py_UNICODE_ISSPACE(this)) + Textbuffer_write(&(data->pad_first), this); + padding = Textbuffer_render(data->pad_first); + TagData_dealloc(data); + if (!padding) + return NULL; + return padding; + } + else if (!this || table_end || this == end_token) { + if (self->topstack->context & LC_TAG_ATTR) { + if (data->context & TAG_QUOTED) { + // Unclosed attribute quote: reset, don't die + data->context = TAG_ATTR_VALUE; + trash = Tokenizer_pop(self); + Py_XDECREF(trash); + self->head = data->reset; + continue; + } + trash = Tokenizer_pop(self); + Py_XDECREF(trash); + } + TagData_dealloc(data); + return Tokenizer_fail_route(self); + } + else { + if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) { + TagData_dealloc(data); + return NULL; + } + } + self->head++; + } +} + +/* + Handle the start of a table. +*/ +static int Tokenizer_handle_table_start(Tokenizer* self) +{ + self->head += 2; + Py_ssize_t reset = self->head; + PyObject *style, *open_open_kwargs, *close_open_kwargs, *open_close_kwargs, + *padding, *newline_character, *open_wiki_markup, *close_wiki_markup; + PyObject *table = NULL; + + if(Tokenizer_push(self, LC_TABLE_OPEN)) + return -1; + padding = Tokenizer_parse_as_table_style(self, '\n', 1); + if (BAD_ROUTE) { + RESET_ROUTE(); + self->head = reset - 1; + if (Tokenizer_emit_text(self, "{|")) + return -1; + return 0; + } + if (!padding) + return -1; + style = Tokenizer_pop(self); + if (!style) { + Py_DECREF(padding); + return -1; + } + + newline_character = PyUnicode_FromString("\n"); + if (!newline_character) { + Py_DECREF(padding); + Py_DECREF(style); + return -1; + } + // continue to parse if it is NOT an inline table + if (PyUnicode_Contains(padding, newline_character)) { + Py_DECREF(newline_character); + self->head++; + table = Tokenizer_parse(self, LC_TABLE_OPEN, 1); + if (BAD_ROUTE) { + RESET_ROUTE(); + // offset displacement done by parse() + self->head = reset - 1; + if (Tokenizer_emit_text(self, "{|")) + return -1; + return 0; + } + if (!table) { + Py_DECREF(padding); + Py_DECREF(style); + return -1; + } + } else { + Py_DECREF(newline_character); + // close tag + self->head += 2; + } + + open_open_kwargs = PyDict_New(); + if (!open_open_kwargs) + goto fail_decref_all; + open_wiki_markup = PyUnicode_FromString("{|"); + if (!open_wiki_markup) { + Py_DECREF(open_open_kwargs); + goto fail_decref_all; + } + PyDict_SetItemString(open_open_kwargs, "wiki_markup", open_wiki_markup); + Py_DECREF(open_wiki_markup); + if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs)) + goto fail_decref_all; + if (Tokenizer_emit_text(self, "table")) + goto fail_decref_all; + + if (style) { + if (Tokenizer_emit_all(self, style)) + goto fail_decref_padding_table; + Py_DECREF(style); + } + + close_open_kwargs = PyDict_New(); + if (!close_open_kwargs) + goto fail_decref_padding_table; + PyDict_SetItemString(close_open_kwargs, "padding", padding); + Py_DECREF(padding); + if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs)) + goto fail_decref_table; + + if (table) { + if (Tokenizer_emit_all(self, table)) + goto fail_decref_table; + Py_DECREF(table); + } + + open_close_kwargs = PyDict_New(); + if (!open_close_kwargs) + return -1; + close_wiki_markup = PyUnicode_FromString("|}"); + if (!close_wiki_markup) { + Py_DECREF(open_close_kwargs); + return -1; + } + PyDict_SetItemString(open_close_kwargs, "wiki_markup", close_wiki_markup); + Py_DECREF(close_wiki_markup); + if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs)) + return -1; + if (Tokenizer_emit_text(self, "table")) + return -1; + if (Tokenizer_emit(self, TagCloseClose)) + return -1; + // offset displacement done by _parse() + self->head--; + return 0; + + fail_decref_all: + Py_DECREF(style); + fail_decref_padding_table: + Py_DECREF(padding); + fail_decref_table: + Py_XDECREF(table); + return -1; +} + +/* + Return the stack in order to handle the table end. +*/ +static PyObject * Tokenizer_handle_table_end(Tokenizer* self) +{ + self->head += 2; + return Tokenizer_pop(self); +} + +/* + Parse as style until end of the line, then continue. +*/ +static int Tokenizer_handle_table_row(Tokenizer* self) +{ + Py_ssize_t reset = self->head; + self->head += 2; + PyObject *padding, *open_kwargs, *close_kwargs, *wiki_markup; + PyObject *style = NULL; + + // If we can't recurse, still tokenize tag but parse style attrs as text + if (Tokenizer_CAN_RECURSE(self)) { + if(Tokenizer_push(self, LC_TABLE_OPEN)) + return -1; + padding = Tokenizer_parse_as_table_style(self, '\n', 0); + if (BAD_ROUTE) { + self->head = reset; + return 0; + } + if (!padding) + return -1; + style = Tokenizer_pop(self); + if (!style) { + Py_DECREF(padding); + return -1; + } + } else { + padding = PyUnicode_FromString(""); + if (!padding) + return -1; + } + + open_kwargs = PyDict_New(); + if (!open_kwargs) + goto fail_decref_all; + wiki_markup = PyUnicode_FromString("|-"); + if (!wiki_markup) { + Py_DECREF(open_kwargs); + goto fail_decref_all; + } + PyDict_SetItemString(open_kwargs, "wiki_markup", wiki_markup); + Py_DECREF(wiki_markup); + if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs)) + goto fail_decref_all; + if (Tokenizer_emit_text(self, "tr")) + goto fail_decref_all; + + if (style) { + if (Tokenizer_emit_all(self, style)) + goto fail_decref_all; + Py_DECREF(style); + } + + close_kwargs = PyDict_New(); + if (!close_kwargs) + goto fail_decref_all; + PyDict_SetItemString(close_kwargs, "padding", padding); + Py_DECREF(padding); + if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs)) + return -1; + return 0; + + fail_decref_all: + Py_XDECREF(style); + Py_DECREF(padding); + return -1; +} + +/* + Parse as normal syntax unless we hit a style marker, then parse style + as HTML attributes and the remainder as normal syntax. +*/ +static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, + const char *tag, uint64_t line_context) +{ + if (!Tokenizer_CAN_RECURSE(self)) { + if (Tokenizer_emit_text(self, markup)) + return -1; + self->head += strlen(markup) - 1; + return 0; + } + + uint64_t old_context = self->topstack->context; + uint64_t cell_context; + Py_ssize_t reset = self->head; + self->head += strlen(markup); + PyObject *padding; + PyObject *cell, *open_kwargs, *close_kwargs, *open_wiki_markup, *close_wiki_markup; + PyObject *style = NULL; + + cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1); + if (BAD_ROUTE) { + self->head = reset; + return 0; + } + if (!cell) + return -1; + cell_context = self->topstack->context; + self->topstack->context = old_context; + + if (cell_context & LC_TABLE_CELL_STYLE) { + Py_DECREF(cell); + self->head = reset + strlen(markup); + if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context)) + return -1; + padding = Tokenizer_parse_as_table_style(self, '|', 0); + if (BAD_ROUTE) { + self->head = reset; + return 0; + } + if (!padding) + return -1; + style = Tokenizer_pop(self); + if (!style) { + Py_DECREF(padding); + return -1; + } + // Don't parse the style separator + self->head++; + cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1); + if (BAD_ROUTE) { + self->head = reset; + return 0; + } + if (!cell) + return -1; + cell_context = self->topstack->context; + self->topstack->context = old_context; + } + else { + padding = PyUnicode_FromString(""); + if (!padding) { + Py_DECREF(cell); + return -1; + } + } + + open_kwargs = PyDict_New(); + if (!open_kwargs) + goto fail_decref_all; + close_kwargs = PyDict_New(); + if (!close_kwargs) + goto fail_decref_all; + open_wiki_markup = PyUnicode_FromString(markup); + if (!open_wiki_markup) + goto fail_decref_all; + PyDict_SetItemString(open_kwargs, "wiki_markup", open_wiki_markup); + Py_DECREF(open_wiki_markup); + if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_kwargs)) + goto fail_decref_all; + if (Tokenizer_emit_text(self, tag)) + goto fail_decref_all; + + if (style) { + if (Tokenizer_emit_all(self, style)) + goto fail_decref_all; + close_wiki_markup = PyUnicode_FromString("|"); + if (!close_wiki_markup) + goto fail_decref_all; + PyDict_SetItemString(close_kwargs, "wiki_markup", close_wiki_markup); + Py_DECREF(close_wiki_markup); + Py_DECREF(style); + } + + PyDict_SetItemString(close_kwargs, "padding", padding); + Py_DECREF(padding); + if (Tokenizer_emit_kwargs(self, TagCloseSelfclose, close_kwargs)) + goto fail_decref_cell; + if (Tokenizer_emit_all(self, cell)) + goto fail_decref_cell; + Py_DECREF(cell); + // keep header/cell line contexts + self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE); + // offset displacement done by parse() + self->head--; + return 0; + + fail_decref_all: + Py_XDECREF(style); + Py_DECREF(padding); + Py_XDECREF(open_kwargs); + Py_XDECREF(close_kwargs); + fail_decref_cell: + Py_DECREF(cell); + return -1; +} + +/* + Returns the context, stack, and whether to reset the cell for style + in a tuple. +*/ +static PyObject* Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style) +{ + if (reset_for_style) + self->topstack->context |= LC_TABLE_CELL_STYLE; + else + self->topstack->context &= ~LC_TABLE_CELL_STYLE; + return Tokenizer_pop_keeping_context(self); +} + +/* Make sure we are not trying to write an invalid character. Return 0 if everything is safe, or -1 if the route must be failed. */ @@ -2533,6 +2926,24 @@ static int Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE d } /* + Returns whether the current head has leading whitespace. + TODO: treat comments and templates as whitespace, allow fail on non-newline spaces. +*/ +static int Tokenizer_has_leading_whitespace(Tokenizer* self) +{ + int offset = 1; + Py_UNICODE current_character; + while (1) { + current_character = Tokenizer_READ_BACKWARDS(self, offset); + if (!current_character || current_character == '\n') + return 1; + else if (!Py_UNICODE_ISSPACE(current_character)) + return 0; + offset++; + } +} + +/* Parse the wikicode string, using context for when to stop. If push is true, we will push a new context, otherwise we won't and context will be ignored. */ @@ -2667,24 +3078,94 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) if (temp != Py_None) return temp; } - else if (!last || last == '\n') { - if (this == '#' || this == '*' || this == ';' || this == ':') { - if (Tokenizer_handle_list(self)) + else if ((!last || last == '\n') && (this == '#' || this == '*' || this == ';' || this == ':')) { + if (Tokenizer_handle_list(self)) + return NULL; + } + else if ((!last || last == '\n') && (this == '-' && this == next && + this == Tokenizer_READ(self, 2) && + this == Tokenizer_READ(self, 3))) { + if (Tokenizer_handle_hr(self)) + return NULL; + } + else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) { + if (Tokenizer_handle_dl_term(self)) + return NULL; + // kill potential table contexts + if (this == '\n') + self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS; + } + + // Start of table parsing + else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) { + if (Tokenizer_CAN_RECURSE(self)) { + if (Tokenizer_handle_table_start(self)) + return NULL; + } + else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next)) + return NULL; + else + self->head++; + } + else if (this_context & LC_TABLE_OPEN) { + if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else if (Tokenizer_handle_table_cell(self, "||", "td", LC_TABLE_TD_LINE)) + return NULL; + } + else if (this == '|' && next == '|' && this_context & LC_TABLE_TH_LINE) { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else if (Tokenizer_handle_table_cell(self, "||", "th", LC_TABLE_TH_LINE)) return NULL; } - else if (this == '-' && this == next && - this == Tokenizer_READ(self, 2) && - this == Tokenizer_READ(self, 3)) { - if (Tokenizer_handle_hr(self)) + else if (this == '!' && next == '!' && this_context & LC_TABLE_TH_LINE) { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else if (Tokenizer_handle_table_cell(self, "!!", "th", LC_TABLE_TH_LINE)) + return NULL; + } + else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) { + return Tokenizer_handle_table_cell_end(self, 1); + } + // on newline, clear out cell line contexts + else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) { + self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS; + if (Tokenizer_emit_char(self, this)) + return NULL; + } + else if (Tokenizer_has_leading_whitespace(self)) { + if (this == '|' && next == '}') { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else + return Tokenizer_handle_table_end(self); + } + else if (this == '|' && next == '-') { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else if (Tokenizer_handle_table_row(self)) + return NULL; + } + else if (this == '|') { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else if (Tokenizer_handle_table_cell(self, "|", "td", LC_TABLE_TD_LINE)) + return NULL; + } + else if (this == '!') { + if (this_context & LC_TABLE_CELL_OPEN) + return Tokenizer_handle_table_cell_end(self, 0); + else if (Tokenizer_handle_table_cell(self, "!", "th", LC_TABLE_TH_LINE)) + return NULL; + } + else if (Tokenizer_emit_char(self, this)) return NULL; } else if (Tokenizer_emit_char(self, this)) return NULL; } - else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) { - if (Tokenizer_handle_dl_term(self)) - return NULL; - } else if (Tokenizer_emit_char(self, this)) return NULL; self->head++; diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index e9b1a92..de7b7d4 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -44,9 +44,9 @@ SOFTWARE. static const char MARKERS[] = { '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', - '-', '\n', '\0'}; + '-', '!', '\n', '\0'}; -#define NUM_MARKERS 18 +#define NUM_MARKERS 19 #define TEXTBUFFER_BLOCKSIZE 1024 #define MAX_DEPTH 40 #define MAX_CYCLES 100000 @@ -110,60 +110,68 @@ static PyObject* TagCloseClose; /* Local contexts: */ -#define LC_TEMPLATE 0x00000007 -#define LC_TEMPLATE_NAME 0x00000001 -#define LC_TEMPLATE_PARAM_KEY 0x00000002 -#define LC_TEMPLATE_PARAM_VALUE 0x00000004 - -#define LC_ARGUMENT 0x00000018 -#define LC_ARGUMENT_NAME 0x00000008 -#define LC_ARGUMENT_DEFAULT 0x00000010 - -#define LC_WIKILINK 0x00000060 -#define LC_WIKILINK_TITLE 0x00000020 -#define LC_WIKILINK_TEXT 0x00000040 - -#define LC_EXT_LINK 0x00000180 -#define LC_EXT_LINK_URI 0x00000080 -#define LC_EXT_LINK_TITLE 0x00000100 - -#define LC_HEADING 0x00007E00 -#define LC_HEADING_LEVEL_1 0x00000200 -#define LC_HEADING_LEVEL_2 0x00000400 -#define LC_HEADING_LEVEL_3 0x00000800 -#define LC_HEADING_LEVEL_4 0x00001000 -#define LC_HEADING_LEVEL_5 0x00002000 -#define LC_HEADING_LEVEL_6 0x00004000 - -#define LC_TAG 0x00078000 -#define LC_TAG_OPEN 0x00008000 -#define LC_TAG_ATTR 0x00010000 -#define LC_TAG_BODY 0x00020000 -#define LC_TAG_CLOSE 0x00040000 - -#define LC_STYLE 0x00780000 -#define LC_STYLE_ITALICS 0x00080000 -#define LC_STYLE_BOLD 0x00100000 -#define LC_STYLE_PASS_AGAIN 0x00200000 -#define LC_STYLE_SECOND_PASS 0x00400000 - -#define LC_DLTERM 0x00800000 - -#define LC_SAFETY_CHECK 0x3F000000 -#define LC_HAS_TEXT 0x01000000 -#define LC_FAIL_ON_TEXT 0x02000000 -#define LC_FAIL_NEXT 0x04000000 -#define LC_FAIL_ON_LBRACE 0x08000000 -#define LC_FAIL_ON_RBRACE 0x10000000 -#define LC_FAIL_ON_EQUALS 0x20000000 - +#define LC_TEMPLATE 0x0000000000000007 +#define LC_TEMPLATE_NAME 0x0000000000000001 +#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 +#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 + +#define LC_ARGUMENT 0x0000000000000018 +#define LC_ARGUMENT_NAME 0x0000000000000008 +#define LC_ARGUMENT_DEFAULT 0x0000000000000010 + +#define LC_WIKILINK 0x0000000000000060 +#define LC_WIKILINK_TITLE 0x0000000000000020 +#define LC_WIKILINK_TEXT 0x0000000000000040 + +#define LC_EXT_LINK 0x0000000000000180 +#define LC_EXT_LINK_URI 0x0000000000000080 +#define LC_EXT_LINK_TITLE 0x0000000000000100 + +#define LC_HEADING 0x0000000000007E00 +#define LC_HEADING_LEVEL_1 0x0000000000000200 +#define LC_HEADING_LEVEL_2 0x0000000000000400 +#define LC_HEADING_LEVEL_3 0x0000000000000800 +#define LC_HEADING_LEVEL_4 0x0000000000001000 +#define LC_HEADING_LEVEL_5 0x0000000000002000 +#define LC_HEADING_LEVEL_6 0x0000000000004000 + +#define LC_TAG 0x0000000000078000 +#define LC_TAG_OPEN 0x0000000000008000 +#define LC_TAG_ATTR 0x0000000000010000 +#define LC_TAG_BODY 0x0000000000020000 +#define LC_TAG_CLOSE 0x0000000000040000 + +#define LC_STYLE 0x0000000000780000 +#define LC_STYLE_ITALICS 0x0000000000080000 +#define LC_STYLE_BOLD 0x0000000000100000 +#define LC_STYLE_PASS_AGAIN 0x0000000000200000 +#define LC_STYLE_SECOND_PASS 0x0000000000400000 + +#define LC_DLTERM 0x0000000000800000 + +#define LC_SAFETY_CHECK 0x000000003F000000 +#define LC_HAS_TEXT 0x0000000001000000 +#define LC_FAIL_ON_TEXT 0x0000000002000000 +#define LC_FAIL_NEXT 0x0000000004000000 +#define LC_FAIL_ON_LBRACE 0x0000000008000000 +#define LC_FAIL_ON_RBRACE 0x0000000010000000 +#define LC_FAIL_ON_EQUALS 0x0000000020000000 + +// TODO realign all +#define LC_TABLE 0x00000007C0000000 +#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000700000000 +#define LC_TABLE_OPEN 0x0000000040000000 +#define LC_TABLE_CELL_OPEN 0x0000000080000000 +#define LC_TABLE_CELL_STYLE 0x0000000100000000 +#define LC_TABLE_TD_LINE 0x0000000200000000 +#define LC_TABLE_TH_LINE 0x0000000400000000 /* Global contexts: */ #define GL_HEADING 0x1 /* Aggregate contexts: */ -#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE) +#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) #define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) #define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 9e22b28..e8f21c0 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1134,7 +1134,7 @@ class Tokenizer(object): self._emit_all(cell) # keep header/cell line contexts self._context |= cell_context & (contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE) - # offset displacement done by _parse() + # offset displacement done by parse() self._head -= 1 def _handle_table_cell_end(self, reset_for_style=False):