diff --git a/CHANGELOG b/CHANGELOG index 9ebbed9..976f520 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ v0.5 (unreleased): -- +- Fixed parsing bugs involving: + - wikitables nested in templates; + - wikitable error recovery when unable to recurse. v0.4.3 (released October 29, 2015): diff --git a/docs/changelog.rst b/docs/changelog.rst index 6d57561..5b4a909 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,7 +7,10 @@ v0.5 Unreleased (`changes `__): -- +- Fixed parsing bugs involving: + + - wikitables nested in templates; + - wikitable error recovery when unable to recurse. v0.4.3 ------ diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index 5833d01..521640d 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -2190,7 +2190,7 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token */ static int Tokenizer_parse_table(Tokenizer* self) { - Py_ssize_t reset = self->head + 1; + Py_ssize_t reset = self->head; PyObject *style, *padding; PyObject *table = NULL; self->head += 2; @@ -2201,7 +2201,7 @@ static int Tokenizer_parse_table(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - if (Tokenizer_emit_text(self, "{|")) + if (Tokenizer_emit_char(self, '{')) return -1; return 0; } @@ -2220,7 +2220,7 @@ static int Tokenizer_parse_table(Tokenizer* self) Py_DECREF(padding); Py_DECREF(style); self->head = reset; - if (Tokenizer_emit_text(self, "{|")) + if (Tokenizer_emit_char(self, '{')) return -1; return 0; } @@ -2689,10 +2689,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) if (Tokenizer_parse_table(self)) return NULL; } - else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next)) + else if (Tokenizer_emit_char(self, this)) return NULL; - else - self->head++; } else if (this_context & LC_TABLE_OPEN) { if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) { diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 3a1c775..dddb6bc 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1074,14 +1074,14 @@ class Tokenizer(object): def _parse_table(self): """Parse a wikicode table by starting with the first line.""" - reset = self._head + 1 + reset = self._head self._head += 2 self._push(contexts.TABLE_OPEN) try: padding = self._handle_table_style("\n") except BadRoute: self._head = reset - self._emit_text("{|") + self._emit_text("{") return style = self._pop() @@ -1090,7 +1090,7 @@ class Tokenizer(object): table = self._parse(contexts.TABLE_OPEN) except BadRoute: self._head = reset - self._emit_text("{|") + self._emit_text("{") return self._emit_table_tag("{|", "table", style, padding, None, table, "|}") @@ -1352,7 +1352,7 @@ class Tokenizer(object): if self._can_recurse(): self._parse_table() else: - self._emit_text("{|") + self._emit_text("{") elif self._context & contexts.TABLE_OPEN: if this == next == "|" and self._context & contexts.TABLE_TD_LINE: if self._context & contexts.TABLE_CELL_OPEN: diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 5b8ff25..831f4d0 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -332,3 +332,17 @@ name: wikilink_to_external_link_fallback_2 label: an external link enclosed in an extra pair of brackets (see issue #120) input: "[[http://example.com]]" output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")] + +--- + +name: tables_in_templates +label: catch error handling mistakes when wikitables are inside templates +input: "{{hello|test\n{|\n|} }}" +output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" "), TemplateClose()] + +--- + +name: tables_in_templates_2 +label: catch error handling mistakes when wikitables are inside templates +input: "{{hello|test\n{|\n| }}" +output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n{"), TemplateParamSeparator(), Text(text="\n"), TemplateParamSeparator(), Text(text=" "), TemplateClose()]