@@ -171,7 +171,7 @@ TABLE_ROW_OPEN = 1 << 33 | |||||
TABLE_TD_LINE = 1 << 34 | TABLE_TD_LINE = 1 << 34 | ||||
TABLE_TH_LINE = 1 << 35 | TABLE_TH_LINE = 1 << 35 | ||||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | ||||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + + TABLE_ROW_OPEN + | |||||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | |||||
TABLE_TD_LINE + TABLE_TH_LINE) | TABLE_TD_LINE + TABLE_TH_LINE) | ||||
# Global contexts: | # Global contexts: | ||||
@@ -184,6 +184,6 @@ FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + | |||||
STYLE + TABLE) | STYLE + TABLE) | ||||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | ||||
TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | ||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | |||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN | |||||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | ||||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK | NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK |
@@ -2510,10 +2510,9 @@ Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup, | |||||
} | } | ||||
/* | /* | ||||
Parse until ``end_token`` as style attributes for a table. | |||||
Handle style attributes for a table until an ending token. | |||||
*/ | */ | ||||
static PyObject* | |||||
Tokenizer_parse_as_table_style(Tokenizer* self, char end_token) | |||||
static PyObject* Tokenizer_handle_table_style(Tokenizer* self, char end_token) | |||||
{ | { | ||||
TagData *data = TagData_new(); | TagData *data = TagData_new(); | ||||
PyObject *padding, *trash; | PyObject *padding, *trash; | ||||
@@ -2569,9 +2568,9 @@ Tokenizer_parse_as_table_style(Tokenizer* self, char end_token) | |||||
} | } | ||||
/* | /* | ||||
Handle the start of a table. | |||||
Parse a wikicode table by starting with the first line. | |||||
*/ | */ | ||||
static int Tokenizer_handle_table_start(Tokenizer* self) | |||||
static int Tokenizer_parse_table(Tokenizer* self) | |||||
{ | { | ||||
Py_ssize_t reset = self->head + 1; | Py_ssize_t reset = self->head + 1; | ||||
PyObject *style, *padding; | PyObject *style, *padding; | ||||
@@ -2580,7 +2579,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self) | |||||
if(Tokenizer_push(self, LC_TABLE_OPEN)) | if(Tokenizer_push(self, LC_TABLE_OPEN)) | ||||
return -1; | return -1; | ||||
padding = Tokenizer_parse_as_table_style(self, '\n'); | |||||
padding = Tokenizer_handle_table_style(self, '\n'); | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset; | self->head = reset; | ||||
@@ -2622,20 +2621,10 @@ static int Tokenizer_handle_table_start(Tokenizer* self) | |||||
} | } | ||||
/* | /* | ||||
Return the stack in order to handle the table end. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_end(Tokenizer* self) | |||||
{ | |||||
self->head += 2; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
/* | |||||
Parse as style until end of the line, then continue. | Parse as style until end of the line, then continue. | ||||
*/ | */ | ||||
static int Tokenizer_handle_table_row(Tokenizer* self) | static int Tokenizer_handle_table_row(Tokenizer* self) | ||||
{ | { | ||||
Py_ssize_t reset = self->head; | |||||
PyObject *padding, *style, *row, *trash; | PyObject *padding, *style, *row, *trash; | ||||
self->head += 2; | self->head += 2; | ||||
@@ -2648,11 +2637,10 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN)) | if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN)) | ||||
return -1; | return -1; | ||||
padding = Tokenizer_parse_as_table_style(self, '\n'); | |||||
padding = Tokenizer_handle_table_style(self, '\n'); | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
trash = Tokenizer_pop(self); | trash = Tokenizer_pop(self); | ||||
Py_XDECREF(trash); | Py_XDECREF(trash); | ||||
self->head = reset; | |||||
return 0; | return 0; | ||||
} | } | ||||
if (!padding) | if (!padding) | ||||
@@ -2666,14 +2654,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
// Don't parse the style separator | // Don't parse the style separator | ||||
self->head++; | self->head++; | ||||
row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1); | row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1); | ||||
if (BAD_ROUTE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
self->head = reset; | |||||
return 0; | |||||
} | |||||
if (!row) { | if (!row) { | ||||
Py_DECREF(padding); | Py_DECREF(padding); | ||||
Py_DECREF(style); | Py_DECREF(style); | ||||
@@ -2688,14 +2668,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
} | } | ||||
/* | /* | ||||
Return the stack in order to handle the table row end. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self) | |||||
{ | |||||
return Tokenizer_pop(self); | |||||
} | |||||
/* | |||||
Parse as normal syntax unless we hit a style marker, then parse style | Parse as normal syntax unless we hit a style marker, then parse style | ||||
as HTML attributes and the remainder as normal syntax. | as HTML attributes and the remainder as normal syntax. | ||||
*/ | */ | ||||
@@ -2705,11 +2677,10 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
{ | { | ||||
uint64_t old_context = self->topstack->context; | uint64_t old_context = self->topstack->context; | ||||
uint64_t cell_context; | uint64_t cell_context; | ||||
Py_ssize_t reset = self->head; | |||||
PyObject *padding, *cell, *trash; | |||||
PyObject *style = NULL; | |||||
PyObject *padding, *cell, *style = NULL; | |||||
const char *close_open_markup = NULL; | const char *close_open_markup = NULL; | ||||
self->head += strlen(markup); | self->head += strlen(markup); | ||||
Py_ssize_t reset = self->head; | |||||
if (!Tokenizer_CAN_RECURSE(self)) { | if (!Tokenizer_CAN_RECURSE(self)) { | ||||
if (Tokenizer_emit_text(self, markup)) | if (Tokenizer_emit_text(self, markup)) | ||||
@@ -2720,12 +2691,6 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | ||||
LC_TABLE_CELL_STYLE | line_context, 1); | LC_TABLE_CELL_STYLE | line_context, 1); | ||||
if (BAD_ROUTE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = reset; | |||||
return 0; | |||||
} | |||||
if (!cell) | if (!cell) | ||||
return -1; | return -1; | ||||
cell_context = self->topstack->context; | cell_context = self->topstack->context; | ||||
@@ -2733,11 +2698,11 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
if (cell_context & LC_TABLE_CELL_STYLE) { | if (cell_context & LC_TABLE_CELL_STYLE) { | ||||
Py_DECREF(cell); | Py_DECREF(cell); | ||||
self->head = reset + strlen(markup); | |||||
self->head = reset; | |||||
if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | ||||
line_context)) | line_context)) | ||||
return -1; | return -1; | ||||
padding = Tokenizer_parse_as_table_style(self, '|'); | |||||
padding = Tokenizer_handle_table_style(self, '|'); | |||||
if (!padding) | if (!padding) | ||||
return -1; | return -1; | ||||
style = Tokenizer_pop(self); | style = Tokenizer_pop(self); | ||||
@@ -2749,14 +2714,6 @@ Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
self->head++; | self->head++; | ||||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | ||||
line_context, 1); | line_context, 1); | ||||
if (BAD_ROUTE) { | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = reset; | |||||
return 0; | |||||
} | |||||
if (!cell) { | if (!cell) { | ||||
Py_DECREF(padding); | Py_DECREF(padding); | ||||
Py_DECREF(style); | Py_DECREF(style); | ||||
@@ -2801,6 +2758,23 @@ Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style) | |||||
} | } | ||||
/* | /* | ||||
Return the stack in order to handle the table row end. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self) | |||||
{ | |||||
return Tokenizer_pop(self); | |||||
} | |||||
/* | |||||
Return the stack in order to handle the table end. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_end(Tokenizer* self) | |||||
{ | |||||
self->head += 2; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
/* | |||||
Handle the end of the stream of wikitext. | Handle the end of the stream of wikitext. | ||||
*/ | */ | ||||
static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) | static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) | ||||
@@ -2819,9 +2793,16 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) | |||||
if (single) | if (single) | ||||
return Tokenizer_handle_single_tag_end(self); | return Tokenizer_handle_single_tag_end(self); | ||||
} | } | ||||
else if (context & AGG_DOUBLE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
else { | |||||
if (context & LC_TABLE_CELL_OPEN) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
context = self->topstack->context; | |||||
} | |||||
if (context & AGG_DOUBLE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
} | |||||
} | } | ||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
} | } | ||||
@@ -3082,7 +3063,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||||
// Start of table parsing | // Start of table parsing | ||||
else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) { | else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) { | ||||
if (Tokenizer_CAN_RECURSE(self)) { | if (Tokenizer_CAN_RECURSE(self)) { | ||||
if (Tokenizer_handle_table_start(self)) | |||||
if (Tokenizer_parse_table(self)) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next)) | else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next)) | ||||
@@ -3197,7 +3178,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
self->skip_style_tags = skip_style_tags; | self->skip_style_tags = skip_style_tags; | ||||
tokens = Tokenizer_parse(self, context, 1); | tokens = Tokenizer_parse(self, context, 1); | ||||
if (!tokens && !PyErr_Occurred()) { | |||||
if ((!tokens && !PyErr_Occurred()) || self->topstack) { | |||||
if (!ParserError) { | if (!ParserError) { | ||||
if (load_exceptions()) | if (load_exceptions()) | ||||
return NULL; | return NULL; | ||||
@@ -3206,6 +3187,9 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | ||||
} | } | ||||
else if (self->topstack) | |||||
PyErr_SetString(ParserError, | |||||
"C tokenizer exited with non-empty token stack"); | |||||
else | else | ||||
PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | ||||
return NULL; | return NULL; | ||||
@@ -175,7 +175,7 @@ static PyObject* TagCloseClose; | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | #define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | ||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | #define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | ||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | ||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | #define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | ||||
@@ -1009,8 +1009,8 @@ class Tokenizer(object): | |||||
self._emit_text(tag) | self._emit_text(tag) | ||||
self._emit(tokens.TagCloseClose()) | self._emit(tokens.TagCloseClose()) | ||||
def _parse_as_table_style(self, end_token): | |||||
"""Parse until ``end_token`` as style attributes for a table.""" | |||||
def _handle_table_style(self, end_token): | |||||
"""Handle style attributes for a table until ``end_token``.""" | |||||
data = _TagOpenData() | data = _TagOpenData() | ||||
data.context = _TagOpenData.CX_ATTR_READY | data.context = _TagOpenData.CX_ATTR_READY | ||||
while True: | while True: | ||||
@@ -1037,14 +1037,13 @@ class Tokenizer(object): | |||||
self._handle_tag_data(data, this) | self._handle_tag_data(data, this) | ||||
self._head += 1 | self._head += 1 | ||||
def _handle_table_start(self): | |||||
"""Handle the start of a table.""" | |||||
def _parse_table(self): | |||||
"""Parse a wikicode table by starting with the first line.""" | |||||
reset = self._head + 1 | reset = self._head + 1 | ||||
self._head += 2 | self._head += 2 | ||||
self._push(contexts.TABLE_OPEN) | self._push(contexts.TABLE_OPEN) | ||||
try: | try: | ||||
padding = self._parse_as_table_style("\n") | |||||
padding = self._handle_table_style("\n") | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
self._emit_text("{|") | self._emit_text("{|") | ||||
@@ -1063,14 +1062,8 @@ class Tokenizer(object): | |||||
# Offset displacement done by _parse(): | # Offset displacement done by _parse(): | ||||
self._head -= 1 | self._head -= 1 | ||||
def _handle_table_end(self): | |||||
"""Return the stack in order to handle the table end.""" | |||||
self._head += 2 | |||||
return self._pop() | |||||
def _handle_table_row(self): | def _handle_table_row(self): | ||||
"""Parse as style until end of the line, then continue.""" | """Parse as style until end of the line, then continue.""" | ||||
reset = self._head | |||||
self._head += 2 | self._head += 2 | ||||
if not self._can_recurse(): | if not self._can_recurse(): | ||||
self._emit_text("|-") | self._emit_text("|-") | ||||
@@ -1079,67 +1072,47 @@ class Tokenizer(object): | |||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | ||||
try: | try: | ||||
padding = self._parse_as_table_style("\n") | |||||
padding = self._handle_table_style("\n") | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | |||||
self._pop() | self._pop() | ||||
raise | raise | ||||
style = self._pop() | style = self._pop() | ||||
# Don't parse the style separator: | # Don't parse the style separator: | ||||
self._head += 1 | self._head += 1 | ||||
try: | |||||
row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._pop() | |||||
raise | |||||
row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||||
self._emit_table_tag("|-", "tr", style, padding, None, row, "") | self._emit_table_tag("|-", "tr", style, padding, None, row, "") | ||||
# Offset displacement done by parse(): | # Offset displacement done by parse(): | ||||
self._head -= 1 | self._head -= 1 | ||||
def _handle_table_row_end(self): | |||||
"""Return the stack in order to handle the table row end.""" | |||||
return self._pop() | |||||
def _handle_table_cell(self, markup, tag, line_context): | def _handle_table_cell(self, markup, tag, line_context): | ||||
"""Parse as normal syntax unless we hit a style marker, then parse | """Parse as normal syntax unless we hit a style marker, then parse | ||||
style as HTML attributes and the remainder as normal syntax.""" | style as HTML attributes and the remainder as normal syntax.""" | ||||
old_context = self._context | old_context = self._context | ||||
reset = self._head | |||||
reset_for_style, padding, style = False, "", None | |||||
padding, style = "", None | |||||
self._head += len(markup) | self._head += len(markup) | ||||
reset = self._head | |||||
if not self._can_recurse(): | if not self._can_recurse(): | ||||
self._emit_text(markup) | self._emit_text(markup) | ||||
self._head -= 1 | self._head -= 1 | ||||
return | return | ||||
try: | |||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context | contexts.TABLE_CELL_STYLE) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._pop() | |||||
raise | |||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context | contexts.TABLE_CELL_STYLE) | |||||
cell_context = self._context | cell_context = self._context | ||||
self._context = old_context | self._context = old_context | ||||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | ||||
if reset_for_style: | if reset_for_style: | ||||
self._head = reset + len(markup) | |||||
self._head = reset | |||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | ||||
line_context) | line_context) | ||||
padding = self._parse_as_table_style("|") | |||||
padding = self._handle_table_style("|") | |||||
style = self._pop() | style = self._pop() | ||||
# Don't parse the style separator: | # Don't parse the style separator: | ||||
self._head += 1 | self._head += 1 | ||||
try: | |||||
cell = self._parse(contexts.TABLE_OPEN | | |||||
contexts.TABLE_CELL_OPEN | line_context) | |||||
except BadRoute: | |||||
self._head = reset | |||||
ret = self._pop() | |||||
raise | |||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context) | |||||
cell_context = self._context | cell_context = self._context | ||||
self._context = old_context | self._context = old_context | ||||
@@ -1161,12 +1134,23 @@ class Tokenizer(object): | |||||
self._context &= ~contexts.TABLE_CELL_STYLE | self._context &= ~contexts.TABLE_CELL_STYLE | ||||
return self._pop(keep_context=True) | return self._pop(keep_context=True) | ||||
def _handle_table_row_end(self): | |||||
"""Return the stack in order to handle the table row end.""" | |||||
return self._pop() | |||||
def _handle_table_end(self): | |||||
"""Return the stack in order to handle the table end.""" | |||||
self._head += 2 | |||||
return self._pop() | |||||
def _handle_end(self): | def _handle_end(self): | ||||
"""Handle the end of the stream of wikitext.""" | """Handle the end of the stream of wikitext.""" | ||||
if self._context & contexts.FAIL: | if self._context & contexts.FAIL: | ||||
if self._context & contexts.TAG_BODY: | if self._context & contexts.TAG_BODY: | ||||
if is_single(self._stack[1].text): | if is_single(self._stack[1].text): | ||||
return self._handle_single_tag_end() | return self._handle_single_tag_end() | ||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
self._pop() | |||||
if self._context & contexts.DOUBLE: | if self._context & contexts.DOUBLE: | ||||
self._pop() | self._pop() | ||||
self._fail_route() | self._fail_route() | ||||
@@ -1327,19 +1311,19 @@ class Tokenizer(object): | |||||
elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or | elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or | ||||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | ||||
if self._can_recurse(): | if self._can_recurse(): | ||||
self._handle_table_start() | |||||
self._parse_table() | |||||
else: | else: | ||||
self._emit_text("{|") | self._emit_text("{|") | ||||
elif self._context & contexts.TABLE_OPEN: | elif self._context & contexts.TABLE_OPEN: | ||||
if this == "|" and next == "|" and self._context & contexts.TABLE_TD_LINE: | |||||
if this == next == "|" and self._context & contexts.TABLE_TD_LINE: | |||||
if self._context & contexts.TABLE_CELL_OPEN: | if self._context & contexts.TABLE_CELL_OPEN: | ||||
return self._handle_table_cell_end() | return self._handle_table_cell_end() | ||||
self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE) | self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE) | ||||
elif this == "|" and next == "|" and self._context & contexts.TABLE_TH_LINE: | |||||
elif this == next == "|" and self._context & contexts.TABLE_TH_LINE: | |||||
if self._context & contexts.TABLE_CELL_OPEN: | if self._context & contexts.TABLE_CELL_OPEN: | ||||
return self._handle_table_cell_end() | return self._handle_table_cell_end() | ||||
self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE) | self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE) | ||||
elif this == "!" and next == "!" and self._context & contexts.TABLE_TH_LINE: | |||||
elif this == next == "!" and self._context & contexts.TABLE_TH_LINE: | |||||
if self._context & contexts.TABLE_CELL_OPEN: | if self._context & contexts.TABLE_CELL_OPEN: | ||||
return self._handle_table_cell_end() | return self._handle_table_cell_end() | ||||
self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE) | self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE) | ||||
@@ -1387,6 +1371,10 @@ class Tokenizer(object): | |||||
self._text = [segment for segment in split if segment] | self._text = [segment for segment in split if segment] | ||||
self._head = self._global = self._depth = self._cycles = 0 | self._head = self._global = self._depth = self._cycles = 0 | ||||
try: | try: | ||||
return self._parse(context) | |||||
tokens = self._parse(context) | |||||
except BadRoute: # pragma: no cover (untestable/exceptional case) | except BadRoute: # pragma: no cover (untestable/exceptional case) | ||||
raise ParserError("Python tokenizer exited with BadRoute") | raise ParserError("Python tokenizer exited with BadRoute") | ||||
if self._stacks: # pragma: no cover (untestable/exceptional case) | |||||
err = "Python tokenizer exited with non-empty token stack" | |||||
raise ParserError(err) | |||||
return tokens |
@@ -61,6 +61,13 @@ output: [Text(text="{| \n|- \n ")] | |||||
--- | --- | ||||
name: no_table_close_row_and_cell | |||||
label: no table close while inside a cell inside a row | |||||
input: "{| \n|- \n|" | |||||
output: [Text(text="{| \n|- \n|")] | |||||
--- | |||||
name: no_table_close_attributes | name: no_table_close_attributes | ||||
label: don't parse attributes as attributes if the table doesn't exist | label: don't parse attributes as attributes if the table doesn't exist | ||||
input: "{| border="1"" | input: "{| border="1"" | ||||