diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index c062404..c902c3d 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -2601,17 +2601,17 @@ static PyObject* Tokenizer_parse_as_table_style(Tokenizer* self, char end_token, */ static int Tokenizer_handle_table_start(Tokenizer* self) { - self->head += 2; - Py_ssize_t reset = self->head; + Py_ssize_t reset = self->head + 1; PyObject *style, *padding, *newline_character; PyObject *table = NULL; + self->head += 2; if(Tokenizer_push(self, LC_TABLE_OPEN)) return -1; padding = Tokenizer_parse_as_table_style(self, '\n', 1); if (BAD_ROUTE) { RESET_ROUTE(); - self->head = reset - 1; + self->head = reset; if (Tokenizer_emit_text(self, "{|")) return -1; return 0; @@ -2638,7 +2638,7 @@ static int Tokenizer_handle_table_start(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); // offset displacement done by parse() - self->head = reset - 1; + self->head = reset; if (Tokenizer_emit_text(self, "{|")) return -1; return 0; @@ -2675,17 +2675,17 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self) */ static int Tokenizer_handle_table_row(Tokenizer* self) { + Py_ssize_t reset = self->head; + PyObject *padding, *style, *row; + self->head += 2; + if (!Tokenizer_CAN_RECURSE(self)) { if (Tokenizer_emit_text(self, "|-")) return -1; - self->head += 1; + self->head -= 1; return 0; } - Py_ssize_t reset = self->head; - self->head += 2; - PyObject *padding, *style, *row; - if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN)) return -1; padding = Tokenizer_parse_as_table_style(self, '\n', 0); @@ -2738,20 +2738,20 @@ static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self) static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, const char *tag, uint64_t line_context) { - if (!Tokenizer_CAN_RECURSE(self)) { - if (Tokenizer_emit_text(self, markup)) - return -1; - self->head += strlen(markup) - 1; - return 0; - } - uint64_t old_context = self->topstack->context; uint64_t cell_context; Py_ssize_t reset = self->head; - self->head += strlen(markup); PyObject *padding, *cell; PyObject *style = NULL; const char *close_open_markup = NULL; + self->head += strlen(markup); + + if (!Tokenizer_CAN_RECURSE(self)) { + if (Tokenizer_emit_text(self, markup)) + return -1; + self->head--; + return 0; + } cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1); if (BAD_ROUTE) { diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 6ae6050..59f2156 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1050,9 +1050,9 @@ class Tokenizer(object): def _handle_table_start(self): """Handle the start of a table.""" - self._head += 2 - reset = self._head + reset = self._head + 1 style, table = None, None + self._head += 2 try: self._push(contexts.TABLE_OPEN) padding = self._parse_as_table_style("\n", break_on_table_end=True) @@ -1066,7 +1066,7 @@ class Tokenizer(object): self._head += 2 except BadRoute: # offset displacement done by _parse() - self._head = reset - 1 + self._head = reset self._emit_text("{|") else: self._emit_table_tag("{|", "table", style, padding, None, table, "|}") @@ -1079,14 +1079,14 @@ class Tokenizer(object): def _handle_table_row(self): """Parse as style until end of the line, then continue.""" + reset = self._head + style, padding = None, "" + self._head += 2 if not self._can_recurse(): self._emit_text("|-") - self._head += 1 + self._head -= 1 return - reset = self._head - self._head += 2 - style, padding = None, "" try: self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) padding = self._parse_as_table_style("\n") @@ -1108,15 +1108,15 @@ class Tokenizer(object): def _handle_table_cell(self, markup, tag, line_context): """Parse as normal syntax unless we hit a style marker, then parse style as HTML attributes and the remainder as normal syntax.""" + old_context = self._context + reset = self._head + reset_for_style, padding, style = False, "", None + self._head += len(markup) if not self._can_recurse(): self._emit_text(markup) - self._head += len(markup) - 1 + self._head -= 1 return - old_context = self._context - reset = self._head - self._head += len(markup) - reset_for_style, padding, style = False, "", None try: cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE) cell_context = self._context @@ -1149,8 +1149,6 @@ class Tokenizer(object): def _handle_table_cell_end(self, reset_for_style=False): """Returns the current context, with the TABLE_CELL_STYLE flag set if it is necessary to reset and parse style attributes.""" - if self._context & (contexts.FAIL & ~contexts.TABLE): - raise BadRoute if reset_for_style: self._context |= contexts.TABLE_CELL_STYLE else: