diff --git a/CHANGELOG b/CHANGELOG index 81fed0a..bb6efc2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,10 @@ -v0.7 (unreleased): +v0.6.4 (unreleased): - Dropped support for end-of-life Python 3.5. - Added support for Python 3.10. (#278) +- Fixed a regression in v0.6.2 that broke parsing of nested wikilinks in file + captions. For now, the parser will interpret nested wikilinks in normal links + as well, even though this differs from MediaWiki. (#270) v0.6.3 (released September 2, 2021): diff --git a/docs/changelog.rst b/docs/changelog.rst index fabb244..b619763 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,14 +1,19 @@ Changelog ========= -v0.7 ----- +v0.6.4 +------ Unreleased (`changes `__): +- Dropped support for end-of-life Python 3.5. - Added support for Python 3.10. (`#278 `_) +- Fixed a regression in v0.6.2 that broke parsing of nested wikilinks in file + captions. For now, the parser will handle interpret wikilinks in normal links + as well, even though this differs from MediaWiki. + (`#270 `_) v0.6.3 ------ diff --git a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c index f1d036f..3ee62fd 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -51,7 +51,8 @@ static int Tokenizer_parse_tag(Tokenizer *); /* Determine whether the given code point is a marker. */ -static int is_marker(Py_UCS4 this) +static int +is_marker(Py_UCS4 this) { int i; @@ -2929,9 +2930,10 @@ Tokenizer_parse(Tokenizer *self, uint64_t context, int push) return NULL; } } else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) { - if (this_context & LC_WIKILINK_TEXT) { - return Tokenizer_fail_route(self); - } + // TODO: Only do this if not in a file context: + // if (this_context & LC_WIKILINK_TEXT) { + // return Tokenizer_fail_route(self); + // } if (!(this_context & AGG_NO_WIKILINKS)) { if (Tokenizer_parse_wikilink(self)) { return NULL; diff --git a/src/mwparserfromhell/parser/tokenizer.py b/src/mwparserfromhell/parser/tokenizer.py index 44bad01..dcdfcee 100644 --- a/src/mwparserfromhell/parser/tokenizer.py +++ b/src/mwparserfromhell/parser/tokenizer.py @@ -1406,8 +1406,9 @@ class Tokenizer: return self._handle_argument_end() self._emit_text("}") elif this == nxt == "[" and self._can_recurse(): - if self._context & contexts.WIKILINK_TEXT: - self._fail_route() + # TODO: Only do this if not in a file context: + # if self._context & contexts.WIKILINK_TEXT: + # self._fail_route() if not self._context & contexts.NO_WIKILINKS: self._parse_wikilink() else: diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest index 1dab688..34651d7 100644 --- a/tests/tokenizer/wikilinks.mwtest +++ b/tests/tokenizer/wikilinks.mwtest @@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b --- +name: nested +label: a wikilink nested within another +input: "[[file:foo|[[bar]]]]" +output: [WikilinkOpen(), Text(text="file:foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] + +--- + +name: nested_padding +label: a wikilink nested within another, separated by other data +input: "[[file:foo|a[[b]]c]]" +output: [WikilinkOpen(), Text(text="file:foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] + +--- + name: invalid_newline label: invalid wikilink: newline as only content input: "[[\n]]" @@ -89,20 +103,6 @@ output: [Text(text="[[foo[bar]]")] --- -name: invalid_nested_text -label: invalid wikilink: nested within the text of another -input: "[[foo|[[bar]]]]" -output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] - ---- - -name: invalid_nested_text_2 -label: invalid wikilink: a wikilink nested within the text of another, with additional content -input: "[[foo|a[[b]]c]]" -output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")] - ---- - name: invalid_nested_title label: invalid wikilink: nested within the title of another input: "[[foo[[bar]]]]"