From fb92012fcb351c49a39a8535f203921f597e92d8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Jul 2013 00:12:06 -0400 Subject: [PATCH] Support parser-blacklisted tags like --- mwparserfromhell/parser/tokenizer.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 47c04a9..45dfd01 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -459,7 +459,9 @@ class Tokenizer(object): elif this == ">" and can_exit: self._handle_tag_close_open(data, tokens.TagCloseOpen) self._context = contexts.TAG_BODY - return self._parse(push=False) + if is_parsable(self._stack[1].text): + return self._parse(push=False) + return self._handle_blacklisted_tag() elif this == "/" and next == ">" and can_exit: self._handle_tag_close_open(data, tokens.TagCloseSelfclose) return self._pop() @@ -559,6 +561,19 @@ class Tokenizer(object): else: self._emit_text(text) + def _handle_blacklisted_tag(self): + """Handle the body of an HTML tag that is parser-blacklisted.""" + while True: + this, next = self._read(), self._read(1) + self._head += 1 + if this is self.END: + self._fail_route() + elif this == "<" and next == "/": + self._handle_tag_open_close() + return self._parse(push=False) + else: + self._emit_text(this) + def _handle_tag_close_open(self, data, token): """Handle the closing of a open tag (````).""" if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):