From 6954480263b537c775c960f9b64e3a9cd4706481 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 10 Jul 2014 20:17:45 -0400 Subject: [PATCH] Fix template parsing when comments are inside the name (fixes #59). --- mwparserfromhell/parser/tokenizer.c | 17 ++++++++++++++--- mwparserfromhell/parser/tokenizer.py | 8 ++++++++ tests/tokenizer/integration.mwtest | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index d62b965..814ad50 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1553,6 +1553,12 @@ static int Tokenizer_parse_comment(Tokenizer* self) return -1; Py_DECREF(comment); self->head += 2; + if (self->topstack->context & LC_FAIL_NEXT) { + /* _verify_safe() sets this flag while parsing a template name + when it encounters what might be a comment -- we must unset + it to let _verify_safe() know it was correct: */ + self->topstack->context ^= LC_FAIL_NEXT; + } return 0; } if (Tokenizer_emit_char(self, this)) @@ -2478,8 +2484,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) return 0; if (context & LC_HAS_TEXT) { if (context & LC_FAIL_ON_TEXT) { - if (!Py_UNICODE_ISSPACE(data)) + if (!Py_UNICODE_ISSPACE(data)) { + if (data == '<' && Tokenizer_READ(self, 1) == '!') { + self->topstack->context |= LC_FAIL_NEXT; + return 0; + } return -1; + } } else { if (data == '\n') @@ -2496,8 +2507,8 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) } } else if (context & LC_FAIL_ON_LBRACE) { - if (data == '{' || (Tokenizer_READ(self, -1) == '{' && - Tokenizer_READ(self, -2) == '{')) { + if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' && + Tokenizer_READ_BACKWARDS(self, 2) == '{')) { if (context & LC_TEMPLATE) self->topstack->context |= LC_FAIL_ON_EQUALS; else diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index d867234..44f0d60 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -608,6 +608,11 @@ class Tokenizer(object): self._emit(tokens.CommentEnd()) self._emit_all(self._pop()) self._head += 2 + if self._context & contexts.FAIL_NEXT: + # _verify_safe() sets this flag while parsing a template + # name when it encounters what might be a comment -- we + # must unset it to let _verify_safe() know it was correct: + self._context ^= contexts.FAIL_NEXT return self._emit_text(this) self._head += 1 @@ -1021,6 +1026,9 @@ class Tokenizer(object): if context & contexts.HAS_TEXT: if context & contexts.FAIL_ON_TEXT: if this is self.END or not this.isspace(): + if this == "<" and self._read(1) == "!": + self._context |= contexts.FAIL_NEXT + return True return False else: if this == "\n": diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index ef6d5c5..1019175 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -227,3 +227,17 @@ name: newline_and_comment_in_template_name_5 label: a template name containing a newline followed by a comment input: "{{foobar\n\ninvalid|key=value}}" output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")] + +--- + +name: newline_and_comment_in_template_name_6 +label: a template name containing a newline followed by a comment +input: "{{foobar\n