Browse Source

Support tag nesting properly; unit tests; recursion checks for tags.

tags/v0.3
Ben Kurtovic 11 years ago
parent
commit
dd6bb1637d
2 changed files with 39 additions and 5 deletions
  1. +11
    -5
      mwparserfromhell/parser/tokenizer.py
  2. +28
    -0
      tests/tokenizer/tags.mwtest

+ 11
- 5
mwparserfromhell/parser/tokenizer.py View File

@@ -447,7 +447,8 @@ class Tokenizer(object):
self._write(tokens.TagOpenOpen(showtag=True))
while True:
this, next = self._read(), self._read(1)
can_exit = not data.context & data.CX_QUOTED or data.context & data.CX_NEED_SPACE
can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or
data.context & data.CX_NEED_SPACE)
if this not in self.MARKERS:
for chunk in self.tag_splitter.split(this):
if self._handle_tag_chunk(data, chunk):
@@ -488,8 +489,8 @@ class Tokenizer(object):
if not chunk:
return
if data.context & data.CX_NAME:
if chunk != chunk.lstrip(): # Tags cannot start with whitespace
self._fail_route()
if chunk in self.MARKERS or chunk.isspace():
self._fail_route() # Tags must start with text (not a space)
self._write_text(chunk)
data.context = data.CX_NEED_SPACE
elif data.context & data.CX_NEED_SPACE:
@@ -563,6 +564,8 @@ class Tokenizer(object):
self._parse_template_or_argument()
elif chunk == next == "[":
self._parse_wikilink()
elif chunk == "<":
self._parse_tag()
else:
self._write_text(chunk)

@@ -735,10 +738,13 @@ class Tokenizer(object):
self._parse_comment()
else:
self._write_text(this)
elif this == "<" and next != "/" and not self._context & contexts.TAG_CLOSE:
self._parse_tag()
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY:
self._handle_tag_open_close()
elif this == "<":
if not self._context & contexts.TAG_CLOSE and self._can_recurse():
self._parse_tag()
else:
self._write_text("<")
elif this == ">" and self._context & contexts.TAG_CLOSE:
return self._handle_tag_close_close()
else:


+ 28
- 0
tests/tokenizer/tags.mwtest View File

@@ -96,6 +96,34 @@ output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(pad_first=" "

---

name: nested_tag
label: a tag nested within the attributes of another
input: "<ref name=<span style="color: red;">foo</span>>citation</ref>"
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(showtag=True), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

---

name: nested_tag_quoted
label: a tag nested within the attributes of another, quoted
input: "<ref name="<span style="color: red;">foo</span>">citation</ref>"
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(showtag=True), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

---

name: nested_troll_tag
label: a bogus tag that appears to be nested within the attributes of another
input: "<ref name=</ ><//>>citation</ref>"
output: [Text(text="<ref name=</ ><//>>citation</ref>")]

---

name: nested_troll_tag_quoted
label: a bogus tag that appears to be nested within the attributes of another, quoted
input: "<ref name="</ ><//>">citation</ref>"
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]

---

name: invalid_space_begin_open
label: invalid tag: a space at the beginning of the open tag
input: "< ref>test</ref>"


Loading…
Cancel
Save