From 477513171700dddfdb91c3541f7f43368bd4a30f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2019 22:49:53 -0400 Subject: [PATCH] Fix not memoizing bad routes after failing inside a table (fixes #206) --- CHANGELOG | 1 + docs/changelog.rst | 2 ++ mwparserfromhell/parser/ctokenizer/tok_parse.c | 1 + mwparserfromhell/parser/tokenizer.py | 1 + tests/tokenizer/tables.mwtest | 14 ++++++++++++++ 5 files changed, 19 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index c27e826..14ec205 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ v0.6 (unreleased): - Fixed manual construction of Node objects, previously unsupported. (#214) - Fixed Wikicode transformation methods (replace(), remove(), etc.) when passed an empty section as an argument. (#212) +- Fixed the parser getting stuck inside malformed tables. (#206) v0.5.2 (released November 1, 2018): diff --git a/docs/changelog.rst b/docs/changelog.rst index e12f74e..2a222e3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -12,6 +12,8 @@ Unreleased - Fixed :class:`.Wikicode` transformation methods (:meth:`.Wikicode.replace`, :meth:`.Wikicode.remove`, etc.) when passed an empty section as an argument. (`#212 `_) +- Fixed the parser getting stuck inside malformed tables. + (`#206 `_) v0.5.2 ------ diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index 3a2cda9..6d34cfb 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -2254,6 +2254,7 @@ static int Tokenizer_parse_table(Tokenizer* self) Py_DECREF(padding); Py_DECREF(style); while (!Tokenizer_IS_CURRENT_STACK(self, restore_point)) { + Tokenizer_memoize_bad_route(self); trash = Tokenizer_pop(self); Py_XDECREF(trash); } diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a9a02cc..a68c132 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1133,6 +1133,7 @@ class Tokenizer(object): table = self._parse(contexts.TABLE_OPEN) except BadRoute: while self._stack_ident != restore_point: + self._memoize_bad_route() self._pop() self._head = reset self._emit_text("{") diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest index 16012cf..b8e92cf 100644 --- a/tests/tokenizer/tables.mwtest +++ b/tests/tokenizer/tables.mwtest @@ -408,3 +408,17 @@ name: junk_after_table_row label: ignore junk on the first line of a table row input: "{|\n|- foo="bar" | baz\n|blerp\n|}" output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="bar"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="baz"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="blerp\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] + +--- + +name: incomplete_nested_open_only +label: many nested incomplete tables: table open only +input: "{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|" +output: [Text(text="{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|")] + +--- + +name: incomplete_nested_open_and_row +label: many nested incomplete tables: table open and row separator (see issue #206) +input: "{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-" +output: [Text(text="{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-")]