Fix template parsing when comments are inside the name (fixes #59).

10 years ago · 6954480263
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -1553,6 +1553,12 @@ static int Tokenizer_parse_comment(Tokenizer* self)
                return -1;
            Py_DECREF(comment);
            self->head += 2;
            if (self->topstack->context & LC_FAIL_NEXT) {
                /* _verify_safe() sets this flag while parsing a template name
                   when it encounters what might be a comment -- we must unset
                   it to let _verify_safe() know it was correct: */
                self->topstack->context ^= LC_FAIL_NEXT;
            }
            return 0;
        }
        if (Tokenizer_emit_char(self, this))
@@ -2478,8 +2484,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
            return 0;
        if (context & LC_HAS_TEXT) {
            if (context & LC_FAIL_ON_TEXT) {
                if (!Py_UNICODE_ISSPACE(data))
                if (!Py_UNICODE_ISSPACE(data)) {
                    if (data == '<' && Tokenizer_READ(self, 1) == '!') {
                        self->topstack->context |= LC_FAIL_NEXT;
                        return 0;
                    }
                    return -1;
                }
            }
            else {
                if (data == '\n')
@@ -2496,8 +2507,8 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
            }
        }
        else if (context & LC_FAIL_ON_LBRACE) {
            if (data == '{' || (Tokenizer_READ(self, -1) == '{' &&
                                 Tokenizer_READ(self, -2) == '{')) {
            if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' &&
                                 Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
                if (context & LC_TEMPLATE)
                    self->topstack->context |= LC_FAIL_ON_EQUALS;
                else
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -608,6 +608,11 @@ class Tokenizer(object):
                self._emit(tokens.CommentEnd())
                self._emit_all(self._pop())
                self._head += 2
                if self._context & contexts.FAIL_NEXT:
                    # _verify_safe() sets this flag while parsing a template
                    # name when it encounters what might be a comment -- we
                    # must unset it to let _verify_safe() know it was correct:
                    self._context ^= contexts.FAIL_NEXT
                return
            self._emit_text(this)
            self._head += 1
@@ -1021,6 +1026,9 @@ class Tokenizer(object):
            if context & contexts.HAS_TEXT:
                if context & contexts.FAIL_ON_TEXT:
                    if this is self.END or not this.isspace():
                        if this == "<" and self._read(1) == "!":
                            self._context |= contexts.FAIL_NEXT
                            return True
                        return False
                else:
                    if this == "\n":
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -227,3 +227,17 @@ name:   newline_and_comment_in_template_name_5
 label:  a template name containing a newline followed by a comment
 input:  "{{foobar\n<!-- comment -->\ninvalid|key=value}}"
 output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")]

 ---

 name:   newline_and_comment_in_template_name_6
 label:  a template name containing a newline followed by a comment
 input:  "{{foobar\n<!--|key=value}}"
 output: [Text(text="{{foobar\n<!--|key=value}}")]

 ---

 name:   newline_and_comment_in_template_name_7
 label:  a template name containing a newline followed by a comment
 input:  "{{foobar\n<!|key=value}}"
 output: [Text(text="{{foobar\n<!|key=value}}")]