Tests were not correctly testing the situations without a table close. Fixed tests and then fixed tokenizers for failing tests. Also refactored pytokenizer to more closely match the ctokenizer by only holding the `_parse` methods in the try blocks and no other code.tags/v0.4
@@ -2636,8 +2636,9 @@ static int Tokenizer_handle_table_start(Tokenizer* self) | |||||
self->head++; | self->head++; | ||||
table = Tokenizer_parse(self, LC_TABLE_OPEN, 1); | table = Tokenizer_parse(self, LC_TABLE_OPEN, 1); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
// offset displacement done by parse() | |||||
self->head = reset; | self->head = reset; | ||||
if (Tokenizer_emit_text(self, "{|")) | if (Tokenizer_emit_text(self, "{|")) | ||||
return -1; | return -1; | ||||
@@ -2676,7 +2677,7 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self) | |||||
static int Tokenizer_handle_table_row(Tokenizer* self) | static int Tokenizer_handle_table_row(Tokenizer* self) | ||||
{ | { | ||||
Py_ssize_t reset = self->head; | Py_ssize_t reset = self->head; | ||||
PyObject *padding, *style, *row; | |||||
PyObject *padding, *style, *row, *trash; | |||||
self->head += 2; | self->head += 2; | ||||
if (!Tokenizer_CAN_RECURSE(self)) { | if (!Tokenizer_CAN_RECURSE(self)) { | ||||
@@ -2690,6 +2691,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
return -1; | return -1; | ||||
padding = Tokenizer_parse_as_table_style(self, '\n', 0); | padding = Tokenizer_parse_as_table_style(self, '\n', 0); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = reset; | self->head = reset; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -2704,6 +2707,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
self->head++; | self->head++; | ||||
row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1); | row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
Py_DECREF(padding); | Py_DECREF(padding); | ||||
Py_DECREF(style); | Py_DECREF(style); | ||||
self->head = reset; | self->head = reset; | ||||
@@ -2712,7 +2717,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
if (!row) { | if (!row) { | ||||
Py_DECREF(padding); | Py_DECREF(padding); | ||||
Py_DECREF(style); | Py_DECREF(style); | ||||
Py_DECREF(row); | |||||
return -1; | return -1; | ||||
} | } | ||||
@@ -2741,7 +2745,7 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
uint64_t old_context = self->topstack->context; | uint64_t old_context = self->topstack->context; | ||||
uint64_t cell_context; | uint64_t cell_context; | ||||
Py_ssize_t reset = self->head; | Py_ssize_t reset = self->head; | ||||
PyObject *padding, *cell; | |||||
PyObject *padding, *cell, *trash; | |||||
PyObject *style = NULL; | PyObject *style = NULL; | ||||
const char *close_open_markup = NULL; | const char *close_open_markup = NULL; | ||||
self->head += strlen(markup); | self->head += strlen(markup); | ||||
@@ -2755,6 +2759,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1); | cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = reset; | self->head = reset; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -2770,6 +2776,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
return -1; | return -1; | ||||
padding = Tokenizer_parse_as_table_style(self, '|', 0); | padding = Tokenizer_parse_as_table_style(self, '|', 0); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = reset; | self->head = reset; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -2784,11 +2792,18 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
self->head++; | self->head++; | ||||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1); | cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = reset; | self->head = reset; | ||||
return 0; | return 0; | ||||
} | } | ||||
if (!cell) | |||||
if (!cell) { | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
return -1; | return -1; | ||||
} | |||||
cell_context = self->topstack->context; | cell_context = self->topstack->context; | ||||
self->topstack->context = old_context; | self->topstack->context = old_context; | ||||
} | } | ||||
@@ -3148,6 +3163,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
// Raise BadRoute to table start | |||||
if (BAD_ROUTE) | |||||
return NULL; | |||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
@@ -1053,24 +1053,30 @@ class Tokenizer(object): | |||||
reset = self._head + 1 | reset = self._head + 1 | ||||
style, table = None, None | style, table = None, None | ||||
self._head += 2 | self._head += 2 | ||||
self._push(contexts.TABLE_OPEN) | |||||
try: | try: | ||||
self._push(contexts.TABLE_OPEN) | |||||
padding = self._parse_as_table_style("\n", break_on_table_end=True) | padding = self._parse_as_table_style("\n", break_on_table_end=True) | ||||
style = self._pop() | |||||
# continue to parse if it is NOT an inline table | |||||
if "\n" in padding: | |||||
self._head += 1 | |||||
table = self._parse(contexts.TABLE_OPEN) | |||||
else: | |||||
# close tag | |||||
self._head += 2 | |||||
except BadRoute: | except BadRoute: | ||||
# offset displacement done by _parse() | |||||
self._head = reset | self._head = reset | ||||
self._emit_text("{|") | self._emit_text("{|") | ||||
return | |||||
style = self._pop() | |||||
# continue to parse if it is NOT an inline table | |||||
if "\n" in padding: | |||||
self._head += 1 | |||||
try: | |||||
table = self._parse(contexts.TABLE_OPEN) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._emit_text("{|") | |||||
return | |||||
else: | else: | ||||
self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | |||||
self._head -= 1 | |||||
# close tag | |||||
self._head += 2 | |||||
self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | |||||
# offset displacement done by _parse() | |||||
self._head -= 1 | |||||
def _handle_table_end(self): | def _handle_table_end(self): | ||||
"""Return the stack in order to handle the table end.""" | """Return the stack in order to handle the table end.""" | ||||
@@ -1087,15 +1093,21 @@ class Tokenizer(object): | |||||
self._head -= 1 | self._head -= 1 | ||||
return | return | ||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||||
try: | try: | ||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||||
padding = self._parse_as_table_style("\n") | padding = self._parse_as_table_style("\n") | ||||
style = self._pop() | |||||
# don't parse the style separator | |||||
self._head += 1 | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._pop() | |||||
raise | |||||
style = self._pop() | |||||
# don't parse the style separator | |||||
self._head += 1 | |||||
try: | |||||
row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | ||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
self._pop() | |||||
raise | raise | ||||
self._emit_table_tag("|-", "tr", style, padding, None, row, "") | self._emit_table_tag("|-", "tr", style, padding, None, row, "") | ||||
# offset displacement done by parse() | # offset displacement done by parse() | ||||
@@ -1119,26 +1131,34 @@ class Tokenizer(object): | |||||
try: | try: | ||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE) | cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE) | ||||
cell_context = self._context | |||||
self._context = old_context | |||||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
self._pop() | |||||
raise | raise | ||||
cell_context = self._context | |||||
self._context = old_context | |||||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | |||||
if reset_for_style: | if reset_for_style: | ||||
self._head = reset + len(markup) | self._head = reset + len(markup) | ||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||||
try: | try: | ||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||||
padding = self._parse_as_table_style("|") | padding = self._parse_as_table_style("|") | ||||
style = self._pop() | |||||
# Don't parse the style separator | |||||
self._head += 1 | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._pop() | |||||
raise | |||||
style = self._pop() | |||||
# Don't parse the style separator | |||||
self._head += 1 | |||||
try: | |||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | ||||
cell_context = self._context | |||||
self._context = old_context | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
ret = self._pop() | |||||
raise | raise | ||||
cell_context = self._context | |||||
self._context = old_context | |||||
close_open_markup = "|" if reset_for_style else None | close_open_markup = "|" if reset_for_style else None | ||||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "") | self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "") | ||||
# keep header/cell line contexts | # keep header/cell line contexts | ||||
@@ -13,23 +13,51 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding | |||||
--- | --- | ||||
name: no_table_close_simple | name: no_table_close_simple | ||||
label: Handle case when there is no table close. | |||||
label: No table close on inline table | |||||
input: "{| " | input: "{| " | ||||
output: [Text(text="{| ")] | output: [Text(text="{| ")] | ||||
--- | --- | ||||
name: no_table_close_newline | |||||
label: No table close with a newline | |||||
input: "{| \n " | |||||
output: [Text(text="{| \n ")] | |||||
--- | |||||
name: no_table_close_inside_cell | name: no_table_close_inside_cell | ||||
label: Handle case when there is no table close while inside of a cell. | |||||
input: "{| | " | |||||
output: [Text(text="{| | ")] | |||||
label: No table close while inside of a cell | |||||
input: "{| \n| " | |||||
output: [Text(text="{| \n| ")] | |||||
--- | |||||
name: no_table_close_inside_cell_after_newline | |||||
label: No table close while inside of a cell after a newline | |||||
input: "{| \n| \n " | |||||
output: [Text(text="{| \n| \n ")] | |||||
--- | |||||
name: no_table_close_inside_cell_with_attributes | |||||
label: No table close while inside of a cell with attributes | |||||
input: "{| \n| red | test" | |||||
output: [Text(text="{| \n| red | test")] | |||||
--- | --- | ||||
name: no_table_close_inside_row | name: no_table_close_inside_row | ||||
label: Handle case when there is no table close while inside of a row. | |||||
input: "{| |- " | |||||
output: [Text(text="{| |- ")] | |||||
label: No table close while inside of a row | |||||
input: "{| \n|- " | |||||
output: [Text(text="{| \n|- ")] | |||||
--- | |||||
name: no_table_close_inside_row_after_newline | |||||
label: No table close while inside of a row after a newline | |||||
input: "{| \n|- \n " | |||||
output: [Text(text="{| \n|- \n ")] | |||||
--- | --- | ||||
@@ -40,6 +68,13 @@ output: [Text(text="{| border=\"1\"")] | |||||
--- | --- | ||||
name: no_table_close_unclosed_attributes | |||||
label: Don't parse unclosed attributes if the table doesn't exist. | |||||
input: "{| border=" | |||||
output: [Text(text="{| border=")] | |||||
--- | |||||
name: no_table_close_row_attributes | name: no_table_close_row_attributes | ||||
label: Don't parse row attributes as attributes if the table doesn't exist. | label: Don't parse row attributes as attributes if the table doesn't exist. | ||||
input: "{| |- border="1"" | input: "{| |- border="1"" | ||||