From ce27d5d385a4adc14e136b33471216038dfc70a1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 29 Jun 2013 00:33:41 -0400 Subject: [PATCH] Fix six failing tests; add three more (all passing). --- mwparserfromhell/parser/tokenizer.py | 33 ++++++++++++++++++--------------- tests/tokenizer/tags.mwtest | 21 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index e7fdb0e..93e9a8d 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals +from itertools import takewhile from math import log import re @@ -416,19 +417,6 @@ class Tokenizer(object): else: self._write_all(tokens) - def _get_tag_from_stack(self, stack=None): - """Return the tag based on the text in *stack*. - - If *stack* is ``None``, we will use the current, topmost one. - """ - if stack is None: - stack = self._stack - self._push_textbuffer() - if not stack: - self._fail_route() # Tag has an empty name? - text = [tok for tok in stack if isinstance(tok, tokens.Text)] - return "".join([token.text for token in text]).rstrip().lower() - def _actually_close_tag_opening(self): """Handle cleanup at the end of a opening tag. @@ -557,14 +545,27 @@ class Tokenizer(object): while chunks: self._actually_handle_chunk(chunks, True) + def _get_tag_from_stack(self, stack=None): + """Return the tag based on the text in *stack*.""" + if not stack: + sentinels = (tokens.TagAttrStart, tokens.TagCloseOpen) + func = lambda tok: not isinstance(tok, sentinels) + stack = takewhile(func, self._stack) + text = [tok.text for tok in stack if isinstance(tok, tokens.Text)] + return "".join(text).rstrip().lower() + def _handle_tag_close_open(self): """Handle the ending of an open tag (````).""" padding = self._actually_close_tag_opening() + if not self._get_tag_from_stack(): # Tags cannot be blank + self._fail_route() self._write(tokens.TagCloseOpen(padding=padding)) def _handle_tag_selfclose(self): """Handle the ending of an tag that closes itself (````).""" padding = self._actually_close_tag_opening() + if not self._get_tag_from_stack(): # Tags cannot be blank + self._fail_route() self._write(tokens.TagCloseSelfclose(padding=padding)) self._head += 1 return self._pop() @@ -578,8 +579,10 @@ class Tokenizer(object): def _handle_tag_close_close(self): """Handle the ending of a closing tag (````).""" closing = self._pop() - if self._get_tag_from_stack(closing) != self._get_tag_from_stack(): - # Closing and opening tags are not the same, so fail this route: + close_tag = self._get_tag_from_stack(closing) + open_tag = self._get_tag_from_stack() + if not close_tag or close_tag != open_tag: + # Closing and opening tags are empty or unequal, so fail this tag: self._fail_route() self._write_all(closing) self._write(tokens.TagCloseClose()) diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest index a76d6b6..849a4fd 100644 --- a/tests/tokenizer/tags.mwtest +++ b/tests/tokenizer/tags.mwtest @@ -233,3 +233,24 @@ name: incomplete_open_text_wrong_close label: incomplete tags: an open tag, text, wrong close input: "junk bar" output: [Text(text="junk bar")] + +--- + +name: incomplete_no_tag_name_open +label: incomplete tags: no tag name within brackets; just an open +input: "junk <>" +output: [Text(text="junk <>")] + +--- + +name: incomplete_no_tag_name_selfclosing +label: incomplete tags: no tag name within brackets; self-closing +input: "junk < />" +output: [Text(text="junk < />")] + +--- + +name: incomplete_no_tag_name_open_close +label: incomplete tags: no tag name within brackets; open and close +input: "junk <>" +output: [Text(text="junk <>")]