Browse Source

Fix template parsing when comments are inside the name (fixes #59).

tags/v0.4
Ben Kurtovic 9 years ago
parent
commit
6954480263
3 changed files with 36 additions and 3 deletions
  1. +14
    -3
      mwparserfromhell/parser/tokenizer.c
  2. +8
    -0
      mwparserfromhell/parser/tokenizer.py
  3. +14
    -0
      tests/tokenizer/integration.mwtest

+ 14
- 3
mwparserfromhell/parser/tokenizer.c View File

@@ -1553,6 +1553,12 @@ static int Tokenizer_parse_comment(Tokenizer* self)
return -1;
Py_DECREF(comment);
self->head += 2;
if (self->topstack->context & LC_FAIL_NEXT) {
/* _verify_safe() sets this flag while parsing a template name
when it encounters what might be a comment -- we must unset
it to let _verify_safe() know it was correct: */
self->topstack->context ^= LC_FAIL_NEXT;
}
return 0;
}
if (Tokenizer_emit_char(self, this))
@@ -2478,8 +2484,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
return 0;
if (context & LC_HAS_TEXT) {
if (context & LC_FAIL_ON_TEXT) {
if (!Py_UNICODE_ISSPACE(data))
if (!Py_UNICODE_ISSPACE(data)) {
if (data == '<' && Tokenizer_READ(self, 1) == '!') {
self->topstack->context |= LC_FAIL_NEXT;
return 0;
}
return -1;
}
}
else {
if (data == '\n')
@@ -2496,8 +2507,8 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
}
}
else if (context & LC_FAIL_ON_LBRACE) {
if (data == '{' || (Tokenizer_READ(self, -1) == '{' &&
Tokenizer_READ(self, -2) == '{')) {
if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' &&
Tokenizer_READ_BACKWARDS(self, 2) == '{')) {
if (context & LC_TEMPLATE)
self->topstack->context |= LC_FAIL_ON_EQUALS;
else


+ 8
- 0
mwparserfromhell/parser/tokenizer.py View File

@@ -608,6 +608,11 @@ class Tokenizer(object):
self._emit(tokens.CommentEnd())
self._emit_all(self._pop())
self._head += 2
if self._context & contexts.FAIL_NEXT:
# _verify_safe() sets this flag while parsing a template
# name when it encounters what might be a comment -- we
# must unset it to let _verify_safe() know it was correct:
self._context ^= contexts.FAIL_NEXT
return
self._emit_text(this)
self._head += 1
@@ -1021,6 +1026,9 @@ class Tokenizer(object):
if context & contexts.HAS_TEXT:
if context & contexts.FAIL_ON_TEXT:
if this is self.END or not this.isspace():
if this == "<" and self._read(1) == "!":
self._context |= contexts.FAIL_NEXT
return True
return False
else:
if this == "\n":


+ 14
- 0
tests/tokenizer/integration.mwtest View File

@@ -227,3 +227,17 @@ name: newline_and_comment_in_template_name_5
label: a template name containing a newline followed by a comment
input: "{{foobar\n<!-- comment -->\ninvalid|key=value}}"
output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")]

---

name: newline_and_comment_in_template_name_6
label: a template name containing a newline followed by a comment
input: "{{foobar\n<!--|key=value}}"
output: [Text(text="{{foobar\n<!--|key=value}}")]

---

name: newline_and_comment_in_template_name_7
label: a template name containing a newline followed by a comment
input: "{{foobar\n<!|key=value}}"
output: [Text(text="{{foobar\n<!|key=value}}")]

Loading…
Cancel
Save