Browse Source

Fix six failing tests; add three more (all passing).

tags/v0.3
Ben Kurtovic 11 years ago
parent
commit
ce27d5d385
2 changed files with 39 additions and 15 deletions
  1. +18
    -15
      mwparserfromhell/parser/tokenizer.py
  2. +21
    -0
      tests/tokenizer/tags.mwtest

+ 18
- 15
mwparserfromhell/parser/tokenizer.py View File

@@ -21,6 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals
from itertools import takewhile
from math import log
import re

@@ -416,19 +417,6 @@ class Tokenizer(object):
else:
self._write_all(tokens)

def _get_tag_from_stack(self, stack=None):
"""Return the tag based on the text in *stack*.

If *stack* is ``None``, we will use the current, topmost one.
"""
if stack is None:
stack = self._stack
self._push_textbuffer()
if not stack:
self._fail_route() # Tag has an empty name?
text = [tok for tok in stack if isinstance(tok, tokens.Text)]
return "".join([token.text for token in text]).rstrip().lower()

def _actually_close_tag_opening(self):
"""Handle cleanup at the end of a opening tag.

@@ -557,14 +545,27 @@ class Tokenizer(object):
while chunks:
self._actually_handle_chunk(chunks, True)

def _get_tag_from_stack(self, stack=None):
"""Return the tag based on the text in *stack*."""
if not stack:
sentinels = (tokens.TagAttrStart, tokens.TagCloseOpen)
func = lambda tok: not isinstance(tok, sentinels)
stack = takewhile(func, self._stack)
text = [tok.text for tok in stack if isinstance(tok, tokens.Text)]
return "".join(text).rstrip().lower()

def _handle_tag_close_open(self):
"""Handle the ending of an open tag (``<foo>``)."""
padding = self._actually_close_tag_opening()
if not self._get_tag_from_stack(): # Tags cannot be blank
self._fail_route()
self._write(tokens.TagCloseOpen(padding=padding))

def _handle_tag_selfclose(self):
"""Handle the ending of an tag that closes itself (``<foo />``)."""
padding = self._actually_close_tag_opening()
if not self._get_tag_from_stack(): # Tags cannot be blank
self._fail_route()
self._write(tokens.TagCloseSelfclose(padding=padding))
self._head += 1
return self._pop()
@@ -578,8 +579,10 @@ class Tokenizer(object):
def _handle_tag_close_close(self):
"""Handle the ending of a closing tag (``</foo>``)."""
closing = self._pop()
if self._get_tag_from_stack(closing) != self._get_tag_from_stack():
# Closing and opening tags are not the same, so fail this route:
close_tag = self._get_tag_from_stack(closing)
open_tag = self._get_tag_from_stack()
if not close_tag or close_tag != open_tag:
# Closing and opening tags are empty or unequal, so fail this tag:
self._fail_route()
self._write_all(closing)
self._write(tokens.TagCloseClose())


+ 21
- 0
tests/tokenizer/tags.mwtest View File

@@ -233,3 +233,24 @@ name: incomplete_open_text_wrong_close
label: incomplete tags: an open tag, text, wrong close
input: "junk <ref>bar</span>"
output: [Text(text="junk <ref>bar</span>")]

---

name: incomplete_no_tag_name_open
label: incomplete tags: no tag name within brackets; just an open
input: "junk <>"
output: [Text(text="junk <>")]

---

name: incomplete_no_tag_name_selfclosing
label: incomplete tags: no tag name within brackets; self-closing
input: "junk < />"
output: [Text(text="junk < />")]

---

name: incomplete_no_tag_name_open_close
label: incomplete tags: no tag name within brackets; open and close
input: "junk <></>"
output: [Text(text="junk <></>")]

Loading…
Cancel
Save