Ver código fonte

Fix parsing of nested wikilinks

tags/v0.6.1
Ben Kurtovic 3 anos atrás
pai
commit
fac60dee48
6 arquivos alterados com 32 adições e 16 exclusões
  1. +1
    -0
      CHANGELOG
  2. +1
    -0
      docs/changelog.rst
  3. +3
    -0
      src/mwparserfromhell/parser/ctokenizer/tok_parse.c
  4. +2
    -0
      src/mwparserfromhell/parser/tokenizer.py
  5. +7
    -0
      tests/tokenizer/integration.mwtest
  6. +18
    -16
      tests/tokenizer/wikilinks.mwtest

+ 1
- 0
CHANGELOG Ver arquivo

@@ -1,6 +1,7 @@
v0.7 (unreleased):

- Improved parsing of external links. (#232)
- Fixed parsing of nested wikilinks.
- Ported tests to pytest. (#237)
- Moved mwparserfromhell package to src/ dir.



+ 1
- 0
docs/changelog.rst Ver arquivo

@@ -9,6 +9,7 @@ Unreleased

- Improved parsing of external links.
(`#232 <https://github.com/earwig/mwparserfromhell/issues/232>`_)
- Fixed parsing of nested wikilinks.
- Ported tests to pytest.
(`#237 <https://github.com/earwig/mwparserfromhell/issues/237>`_)
- Moved mwparserfromhell package to src/ dir.


+ 3
- 0
src/mwparserfromhell/parser/ctokenizer/tok_parse.c Ver arquivo

@@ -2728,6 +2728,9 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
return NULL;
}
else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) {
if (this_context & LC_WIKILINK_TEXT) {
return Tokenizer_fail_route(self);
}
if (!(this_context & AGG_NO_WIKILINKS)) {
if (Tokenizer_parse_wikilink(self))
return NULL;


+ 2
- 0
src/mwparserfromhell/parser/tokenizer.py Ver arquivo

@@ -1353,6 +1353,8 @@ class Tokenizer:
return self._handle_argument_end()
self._emit_text("}")
elif this == nxt == "[" and self._can_recurse():
if self._context & contexts.WIKILINK_TEXT:
self._fail_route()
if not self._context & contexts.NO_WIKILINKS:
self._parse_wikilink()
else:


+ 7
- 0
tests/tokenizer/integration.mwtest Ver arquivo

@@ -359,3 +359,10 @@ name: nested_templates_and_style_tags
label: many nested templates and style tags, testing edge case behavior and error recovery near the recursion depth limit (see issue #224)
input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}\n{|\n|-\n|'''\n|}"
output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text="b"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="c"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="d"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="e"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="f"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="g"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="h"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="i"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="j"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="k"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="m"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="o''}}"), TemplateOpen(), Text(text="p"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="q"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="r"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text="t"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="u"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), Text(text="{{v|1="), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="w''x"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="y"), TagOpenClose(), Text(text="b"), TagCloseClose(), TemplateClose(), Text(text="\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="'''\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: wikilink_nested_with_nowiki
label: wikilinks nested within the text of another, but surrounded by nowiki tags
input: [[foo|bar<nowiki>[[baz]][[qux]]</nowiki>]]
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar"), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="[[baz]][[qux]]"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), WikilinkClose()]

+ 18
- 16
tests/tokenizer/wikilinks.mwtest Ver arquivo

@@ -54,20 +54,6 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b

---

name: nested
label: a wikilink nested within another
input: "[[foo|[[bar]]]]"
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()]


name: nested_padding
label: a wikilink nested within another, separated by other data
input: "[[foo|a[[b]]c]]"
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()]


name: invalid_newline
label: invalid wikilink: newline as only content
input: "[[\n]]"
@@ -103,15 +89,29 @@ output: [Text(text="[[foo[bar]]")]

---

name: invalid_nested
label: invalid wikilink: trying to nest in the wrong context
name: invalid_nested_text
label: invalid wikilink: nested within the text of another
input: "[[foo|[[bar]]]]"
output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")]

---

name: invalid_nested_text_2
label: invalid wikilink: a wikilink nested within the text of another, with additional content
input: "[[foo|a[[b]]c]]"
output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")]

---

name: invalid_nested_title
label: invalid wikilink: nested within the title of another
input: "[[foo[[bar]]]]"
output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")]

---

name: invalid_nested_padding
label: invalid wikilink: trying to nest in the wrong context, with a text param
name: invalid_nested_title_and_text
label: invalid wikilink: nested within the title of another, with a text param
input: "[[foo[[bar]]|baz]]"
output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="|baz]]")]



Carregando…
Cancelar
Salvar