From b3c98efd22bd7e49e68480bbf492bc62314f981e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Dec 2019 01:06:13 -0500 Subject: [PATCH] Fix a parsing bug involving deeply nested style tags (fixes #224) --- CHANGELOG | 1 + docs/changelog.rst | 2 ++ mwparserfromhell/parser/ctokenizer/tok_parse.c | 5 +++++ mwparserfromhell/parser/tokenizer.py | 6 +++++- tests/tokenizer/integration.mwtest | 7 +++++++ 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index d95b07c..dee81fb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ v0.6 (unreleased): - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) +- Fixed a rare parsing bug involving deeply nested style tags. (#224) v0.5.4 (released May 15, 2019): diff --git a/docs/changelog.rst b/docs/changelog.rst index c46e8f1..216c46e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Unreleased - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (`#216 `_) +- Fixed a rare parsing bug involving deeply nested style tags. + (`#224 `_) v0.5.4 ------ diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index c32e48c..deac6c5 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -1807,6 +1807,11 @@ static int Tokenizer_parse_italics(Tokenizer* self) if (BAD_ROUTE_CONTEXT & LC_STYLE_PASS_AGAIN) { context = LC_STYLE_ITALICS | LC_STYLE_SECOND_PASS; stack = Tokenizer_parse(self, context, 1); + if (BAD_ROUTE) { + RESET_ROUTE(); + self->head = reset; + return Tokenizer_emit_text(self, "''"); + } } else return Tokenizer_emit_text(self, "''"); diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 7b2f3ce..f44360e 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -931,7 +931,11 @@ class Tokenizer(object): self._head = reset if route.context & contexts.STYLE_PASS_AGAIN: new_ctx = contexts.STYLE_ITALICS | contexts.STYLE_SECOND_PASS - stack = self._parse(new_ctx) + try: + stack = self._parse(new_ctx) + except BadRoute: + self._head = reset + return self._emit_text("''") else: return self._emit_text("''") self._emit_style_tag("i", "''", stack) diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 7137c50..7ab51c6 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -353,3 +353,10 @@ name: many_invalid_nested_tags label: many unending nested tags that should be treated as plain text, followed by valid wikitext (see issues #42, #183) input: "[[{{x}}" output: [Text(text="[["), TemplateOpen(), Text(text="x"), TemplateClose()] + +--- + +name: nested_templates_and_style_tags +label: many nested templates and style tags, testing edge case behavior and error recovery near the recursion depth limit (see issue #224) +input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}\n{|\n|-\n|'''\n|}" +output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text="b"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="c"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="d"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="e"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="f"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="g"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="h"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="i"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="j"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="k"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="m"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="o''}}"), TemplateOpen(), Text(text="p"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="q"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="r"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text="t"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="u"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), Text(text="{{v|1="), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="w''x"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="y"), TagOpenClose(), Text(text="b"), TagCloseClose(), TemplateClose(), Text(text="\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="'''\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]