From 90061b6844407a7671501d3060d9617a18d6e59b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Mon, 21 Dec 2020 10:13:26 +0100 Subject: [PATCH] Fix parsing of section headings inside templates (#233) Fixes #198 Co-authored-by: Ben Kurtovic --- mwparserfromhell/parser/ctokenizer/tok_parse.c | 8 ++++++-- mwparserfromhell/parser/tokenizer.py | 7 +++++-- tests/tokenizer/templates.mwtest | 28 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index be7018b..e73b3ef 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -2628,7 +2628,11 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) return NULL; } else if (this == '=' && this_context & LC_TEMPLATE_PARAM_KEY) { - if (Tokenizer_handle_template_param_value(self)) + if (!(self->global & GL_HEADING) && (!last || last == '\n') && next == '=') { + if (Tokenizer_parse_heading(self)) + return NULL; + } + else if (Tokenizer_handle_template_param_value(self)) return NULL; } else if (this == next && next == '}' && this_context & LC_TEMPLATE) @@ -2668,7 +2672,7 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) } else if (this == ']' && this_context & LC_EXT_LINK_TITLE) return Tokenizer_pop(self); - else if (this == '=' && !(self->global & GL_HEADING)) { + else if (this == '=' && !(self->global & GL_HEADING) && !(this_context & LC_TEMPLATE)) { if (!last || last == '\n') { if (Tokenizer_parse_heading(self)) return NULL; diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index d4e6c8c..ab61f92 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1326,7 +1326,10 @@ class Tokenizer: elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: - self._handle_template_param_value() + if not self._global & contexts.GL_HEADING and self._read(-1) in ("\n", self.START) and nxt == "=": + self._parse_heading() + else: + self._handle_template_param_value() elif this == nxt == "}" and self._context & contexts.TEMPLATE: return self._handle_template_end() elif this == "|" and self._context & contexts.ARGUMENT_NAME: @@ -1350,7 +1353,7 @@ class Tokenizer: self._parse_external_link(False) elif this == "]" and self._context & contexts.EXT_LINK_TITLE: return self._pop() - elif this == "=" and not self._global & contexts.GL_HEADING: + elif this == "=" and not self._global & contexts.GL_HEADING and not self._context & contexts.TEMPLATE: if self._read(-1) in ("\n", self.START): self._parse_heading() else: diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index 8d30069..72ba9c7 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -695,3 +695,31 @@ name: recursion_opens_and_closes label: test potentially dangerous recursion: template openings and closings input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}" output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose()] + +--- + +name: invalid_section_level_1 +label: level 1 headings inside a template are always invalid +input: "{{foo|bar\n=baz=\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz=\n"), TemplateClose()] + +--- + +name: section_level_2 +label: valid level 2 heading inside a template +input: "{{foo|bar\n==baz==\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), HeadingStart(level=2), Text(text="baz"), HeadingEnd(), Text(text="\n"), TemplateClose()] + +--- + +name: invalid_section_level_2 +label: invalid level 2 heading inside a template +input: "{{foo|bar==baz==\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="=baz==\n"), TemplateClose()] + +--- + +name: section_level_2_after_template_parameter +label: level 2 heading inside a template after a parameter +input: "{{foo|bar=\n==baz==\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="\n==baz==\n"), TemplateClose()]