Browse Source

Fix a parsing bug involving deeply nested style tags (fixes #224)

pull/194/merge
Ben Kurtovic 2 months ago
parent
commit
b3c98efd22
5 changed files with 20 additions and 1 deletions
  1. +1
    -0
      CHANGELOG
  2. +2
    -0
      docs/changelog.rst
  3. +5
    -0
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  4. +5
    -1
      mwparserfromhell/parser/tokenizer.py
  5. +7
    -0
      tests/tokenizer/integration.mwtest

+ 1
- 0
CHANGELOG View File

@@ -2,6 +2,7 @@ v0.6 (unreleased):

- Updated Wikicode.matches() to recognize underscores as being equivalent
to spaces. (#216)
- Fixed a rare parsing bug involving deeply nested style tags. (#224)

v0.5.4 (released May 15, 2019):


+ 2
- 0
docs/changelog.rst View File

@@ -9,6 +9,8 @@ Unreleased

- Updated Wikicode.matches() to recognize underscores as being equivalent
to spaces. (`#216 <https://github.com/earwig/mwparserfromhell/issues/216>`_)
- Fixed a rare parsing bug involving deeply nested style tags.
(`#224 <https://github.com/earwig/mwparserfromhell/issues/224>`_)

v0.5.4
------

+ 5
- 0
mwparserfromhell/parser/ctokenizer/tok_parse.c View File

@@ -1807,6 +1807,11 @@ static int Tokenizer_parse_italics(Tokenizer* self)
if (BAD_ROUTE_CONTEXT & LC_STYLE_PASS_AGAIN) {
context = LC_STYLE_ITALICS | LC_STYLE_SECOND_PASS;
stack = Tokenizer_parse(self, context, 1);
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
return Tokenizer_emit_text(self, "''");
}
}
else
return Tokenizer_emit_text(self, "''");

+ 5
- 1
mwparserfromhell/parser/tokenizer.py View File

@@ -931,7 +931,11 @@ class Tokenizer(object):
self._head = reset
if route.context & contexts.STYLE_PASS_AGAIN:
new_ctx = contexts.STYLE_ITALICS | contexts.STYLE_SECOND_PASS
stack = self._parse(new_ctx)
try:
stack = self._parse(new_ctx)
except BadRoute:
self._head = reset
return self._emit_text("''")
else:
return self._emit_text("''")
self._emit_style_tag("i", "''", stack)

+ 7
- 0
tests/tokenizer/integration.mwtest View File

@@ -353,3 +353,10 @@ name: many_invalid_nested_tags
label: many unending nested tags that should be treated as plain text, followed by valid wikitext (see issues #42, #183)
input: "<b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b>[[{{x}}"
output: [Text(text="<b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b>[["), TemplateOpen(), Text(text="x"), TemplateClose()]

---

name: nested_templates_and_style_tags
label: many nested templates and style tags, testing edge case behavior and error recovery near the recursion depth limit (see issue #224)
input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}\n{|\n|-\n|'''\n|}"
output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text="b"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="c"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="d"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="e"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="f"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="g"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="h"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="i"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="j"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="k"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="m"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="o''}}"), TemplateOpen(), Text(text="p"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="q"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="r"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text="t"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="u"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), Text(text="{{v|1="), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="w''x"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="y"), TagOpenClose(), Text(text="b"), TagCloseClose(), TemplateClose(), Text(text="\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="'''\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

Loading…
Cancel
Save