From 2155638b919a96b3b5d7c94a4ba3820e3b5aa166 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 14 Feb 2022 02:20:06 -0500 Subject: [PATCH] Fix regression in parsing nested wikilinks in file captions This regression seems more severe than the bug the commit was attempting to fix (incorrect parsing of nested wikilinks in normal links), so that bug is reintroduced until localization-aware parsing that allows us to detect file links is added. This commit partially reverts fac60dee48eb346f839c0145138151dbc5b7d999. --- CHANGELOG | 5 +++- docs/changelog.rst | 9 +++++-- src/mwparserfromhell/parser/ctokenizer/tok_parse.c | 10 ++++---- src/mwparserfromhell/parser/tokenizer.py | 5 ++-- tests/tokenizer/wikilinks.mwtest | 28 +++++++++++----------- 5 files changed, 34 insertions(+), 23 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 81fed0a..bb6efc2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,10 @@ -v0.7 (unreleased): +v0.6.4 (unreleased): - Dropped support for end-of-life Python 3.5. - Added support for Python 3.10. (#278) +- Fixed a regression in v0.6.2 that broke parsing of nested wikilinks in file + captions. For now, the parser will interpret nested wikilinks in normal links + as well, even though this differs from MediaWiki. (#270) v0.6.3 (released September 2, 2021): diff --git a/docs/changelog.rst b/docs/changelog.rst index fabb244..b619763 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,14 +1,19 @@ Changelog ========= -v0.7 ----- +v0.6.4 +------ Unreleased (`changes `__): +- Dropped support for end-of-life Python 3.5. - Added support for Python 3.10. (`#278 `_) +- Fixed a regression in v0.6.2 that broke parsing of nested wikilinks in file + captions. For now, the parser will handle interpret wikilinks in normal links + as well, even though this differs from MediaWiki. + (`#270 `_) v0.6.3 ------ diff --git a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c index f1d036f..3ee62fd 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -51,7 +51,8 @@ static int Tokenizer_parse_tag(Tokenizer *); /* Determine whether the given code point is a marker. */ -static int is_marker(Py_UCS4 this) +static int +is_marker(Py_UCS4 this) { int i; @@ -2929,9 +2930,10 @@ Tokenizer_parse(Tokenizer *self, uint64_t context, int push) return NULL; } } else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) { - if (this_context & LC_WIKILINK_TEXT) { - return Tokenizer_fail_route(self); - } + // TODO: Only do this if not in a file context: + // if (this_context & LC_WIKILINK_TEXT) { + // return Tokenizer_fail_route(self); + // } if (!(this_context & AGG_NO_WIKILINKS)) { if (Tokenizer_parse_wikilink(self)) { return NULL; diff --git a/src/mwparserfromhell/parser/tokenizer.py b/src/mwparserfromhell/parser/tokenizer.py index 44bad01..dcdfcee 100644 --- a/src/mwparserfromhell/parser/tokenizer.py +++ b/src/mwparserfromhell/parser/tokenizer.py @@ -1406,8 +1406,9 @@ class Tokenizer: return self._handle_argument_end() self._emit_text("}") elif this == nxt == "[" and self._can_recurse(): - if self._context & contexts.WIKILINK_TEXT: - self._fail_route() + # TODO: Only do this if not in a file context: + # if self._context & contexts.WIKILINK_TEXT: + # self._fail_route() if not self._context & contexts.NO_WIKILINKS: self._parse_wikilink() else: diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest index 1dab688..34651d7 100644 --- a/tests/tokenizer/wikilinks.mwtest +++ b/tests/tokenizer/wikilinks.mwtest @@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b --- +name: nested +label: a wikilink nested within another +input: "[[file:foo|[[bar]]]]" +output: [WikilinkOpen(), Text(text="file:foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] + +--- + +name: nested_padding +label: a wikilink nested within another, separated by other data +input: "[[file:foo|a[[b]]c]]" +output: [WikilinkOpen(), Text(text="file:foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] + +--- + name: invalid_newline label: invalid wikilink: newline as only content input: "[[\n]]" @@ -89,20 +103,6 @@ output: [Text(text="[[foo[bar]]")] --- -name: invalid_nested_text -label: invalid wikilink: nested within the text of another -input: "[[foo|[[bar]]]]" -output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] - ---- - -name: invalid_nested_text_2 -label: invalid wikilink: a wikilink nested within the text of another, with additional content -input: "[[foo|a[[b]]c]]" -output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")] - ---- - name: invalid_nested_title label: invalid wikilink: nested within the title of another input: "[[foo[[bar]]]]"