Tests were not correctly testing the situations without a table close. Fixed tests and then fixed tokenizers for failing tests. Also refactored pytokenizer to more closely match the ctokenizer by only holding the `_parse` methods in the try blocks and no other code.tags/v0.4
@@ -2636,8 +2636,9 @@ static int Tokenizer_handle_table_start(Tokenizer* self) | |||
self->head++; | |||
table = Tokenizer_parse(self, LC_TABLE_OPEN, 1); | |||
if (BAD_ROUTE) { | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
RESET_ROUTE(); | |||
// offset displacement done by parse() | |||
self->head = reset; | |||
if (Tokenizer_emit_text(self, "{|")) | |||
return -1; | |||
@@ -2676,7 +2677,7 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self) | |||
static int Tokenizer_handle_table_row(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head; | |||
PyObject *padding, *style, *row; | |||
PyObject *padding, *style, *row, *trash; | |||
self->head += 2; | |||
if (!Tokenizer_CAN_RECURSE(self)) { | |||
@@ -2690,6 +2691,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||
return -1; | |||
padding = Tokenizer_parse_as_table_style(self, '\n', 0); | |||
if (BAD_ROUTE) { | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
self->head = reset; | |||
return 0; | |||
} | |||
@@ -2704,6 +2707,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||
self->head++; | |||
row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1); | |||
if (BAD_ROUTE) { | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
self->head = reset; | |||
@@ -2712,7 +2717,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||
if (!row) { | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
Py_DECREF(row); | |||
return -1; | |||
} | |||
@@ -2741,7 +2745,7 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||
uint64_t old_context = self->topstack->context; | |||
uint64_t cell_context; | |||
Py_ssize_t reset = self->head; | |||
PyObject *padding, *cell; | |||
PyObject *padding, *cell, *trash; | |||
PyObject *style = NULL; | |||
const char *close_open_markup = NULL; | |||
self->head += strlen(markup); | |||
@@ -2755,6 +2759,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1); | |||
if (BAD_ROUTE) { | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
self->head = reset; | |||
return 0; | |||
} | |||
@@ -2770,6 +2776,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||
return -1; | |||
padding = Tokenizer_parse_as_table_style(self, '|', 0); | |||
if (BAD_ROUTE) { | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
self->head = reset; | |||
return 0; | |||
} | |||
@@ -2784,11 +2792,18 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||
self->head++; | |||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1); | |||
if (BAD_ROUTE) { | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
self->head = reset; | |||
return 0; | |||
} | |||
if (!cell) | |||
if (!cell) { | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
return -1; | |||
} | |||
cell_context = self->topstack->context; | |||
self->topstack->context = old_context; | |||
} | |||
@@ -3148,6 +3163,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
// Raise BadRoute to table start | |||
if (BAD_ROUTE) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
@@ -1053,24 +1053,30 @@ class Tokenizer(object): | |||
reset = self._head + 1 | |||
style, table = None, None | |||
self._head += 2 | |||
self._push(contexts.TABLE_OPEN) | |||
try: | |||
self._push(contexts.TABLE_OPEN) | |||
padding = self._parse_as_table_style("\n", break_on_table_end=True) | |||
style = self._pop() | |||
# continue to parse if it is NOT an inline table | |||
if "\n" in padding: | |||
self._head += 1 | |||
table = self._parse(contexts.TABLE_OPEN) | |||
else: | |||
# close tag | |||
self._head += 2 | |||
except BadRoute: | |||
# offset displacement done by _parse() | |||
self._head = reset | |||
self._emit_text("{|") | |||
return | |||
style = self._pop() | |||
# continue to parse if it is NOT an inline table | |||
if "\n" in padding: | |||
self._head += 1 | |||
try: | |||
table = self._parse(contexts.TABLE_OPEN) | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("{|") | |||
return | |||
else: | |||
self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | |||
self._head -= 1 | |||
# close tag | |||
self._head += 2 | |||
self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | |||
# offset displacement done by _parse() | |||
self._head -= 1 | |||
def _handle_table_end(self): | |||
"""Return the stack in order to handle the table end.""" | |||
@@ -1087,15 +1093,21 @@ class Tokenizer(object): | |||
self._head -= 1 | |||
return | |||
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||
try: | |||
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||
padding = self._parse_as_table_style("\n") | |||
style = self._pop() | |||
# don't parse the style separator | |||
self._head += 1 | |||
except BadRoute: | |||
self._head = reset | |||
self._pop() | |||
raise | |||
style = self._pop() | |||
# don't parse the style separator | |||
self._head += 1 | |||
try: | |||
row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||
except BadRoute: | |||
self._head = reset | |||
self._pop() | |||
raise | |||
self._emit_table_tag("|-", "tr", style, padding, None, row, "") | |||
# offset displacement done by parse() | |||
@@ -1119,26 +1131,34 @@ class Tokenizer(object): | |||
try: | |||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE) | |||
cell_context = self._context | |||
self._context = old_context | |||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | |||
except BadRoute: | |||
self._head = reset | |||
self._pop() | |||
raise | |||
cell_context = self._context | |||
self._context = old_context | |||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | |||
if reset_for_style: | |||
self._head = reset + len(markup) | |||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||
try: | |||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||
padding = self._parse_as_table_style("|") | |||
style = self._pop() | |||
# Don't parse the style separator | |||
self._head += 1 | |||
except BadRoute: | |||
self._head = reset | |||
self._pop() | |||
raise | |||
style = self._pop() | |||
# Don't parse the style separator | |||
self._head += 1 | |||
try: | |||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||
cell_context = self._context | |||
self._context = old_context | |||
except BadRoute: | |||
self._head = reset | |||
ret = self._pop() | |||
raise | |||
cell_context = self._context | |||
self._context = old_context | |||
close_open_markup = "|" if reset_for_style else None | |||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "") | |||
# keep header/cell line contexts | |||
@@ -13,23 +13,51 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding | |||
--- | |||
name: no_table_close_simple | |||
label: Handle case when there is no table close. | |||
label: No table close on inline table | |||
input: "{| " | |||
output: [Text(text="{| ")] | |||
--- | |||
name: no_table_close_newline | |||
label: No table close with a newline | |||
input: "{| \n " | |||
output: [Text(text="{| \n ")] | |||
--- | |||
name: no_table_close_inside_cell | |||
label: Handle case when there is no table close while inside of a cell. | |||
input: "{| | " | |||
output: [Text(text="{| | ")] | |||
label: No table close while inside of a cell | |||
input: "{| \n| " | |||
output: [Text(text="{| \n| ")] | |||
--- | |||
name: no_table_close_inside_cell_after_newline | |||
label: No table close while inside of a cell after a newline | |||
input: "{| \n| \n " | |||
output: [Text(text="{| \n| \n ")] | |||
--- | |||
name: no_table_close_inside_cell_with_attributes | |||
label: No table close while inside of a cell with attributes | |||
input: "{| \n| red | test" | |||
output: [Text(text="{| \n| red | test")] | |||
--- | |||
name: no_table_close_inside_row | |||
label: Handle case when there is no table close while inside of a row. | |||
input: "{| |- " | |||
output: [Text(text="{| |- ")] | |||
label: No table close while inside of a row | |||
input: "{| \n|- " | |||
output: [Text(text="{| \n|- ")] | |||
--- | |||
name: no_table_close_inside_row_after_newline | |||
label: No table close while inside of a row after a newline | |||
input: "{| \n|- \n " | |||
output: [Text(text="{| \n|- \n ")] | |||
--- | |||
@@ -40,6 +68,13 @@ output: [Text(text="{| border=\"1\"")] | |||
--- | |||
name: no_table_close_unclosed_attributes | |||
label: Don't parse unclosed attributes if the table doesn't exist. | |||
input: "{| border=" | |||
output: [Text(text="{| border=")] | |||
--- | |||
name: no_table_close_row_attributes | |||
label: Don't parse row attributes as attributes if the table doesn't exist. | |||
input: "{| |- border="1"" | |||