From 929fa25e1fcd89d7e9e1d456aac0404c284906c8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 10 Jul 2013 01:05:08 -0400 Subject: [PATCH] Correctly tokenize single-only HTML tags, part two. --- mwparserfromhell/parser/tokenizer.py | 34 ++++++++++++++++++++++++++-------- tests/tokenizer/tags.mwtest | 6 +++--- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 3873f58..4192cc4 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -460,7 +460,7 @@ class Tokenizer(object): self._handle_tag_close_open(data, tokens.TagCloseOpen) self._context = contexts.TAG_BODY if is_single_only(self._stack[1].text): - return self._handle_single_only_tag() + return self._handle_single_only_tag_end() if is_parsable(self._stack[1].text): return self._parse(push=False) return self._handle_blacklisted_tag() @@ -598,12 +598,26 @@ class Tokenizer(object): self._emit(tokens.TagCloseClose()) return self._pop() - def _handle_single_only_tag(self): + def _handle_invalid_tag_start(self): + """Handle the (possible) start of an implicitly closing single tag.""" + reset = self._head + 1 + self._head += 2 + try: + if not is_single_only(self.tag_splitter.split(self._read())[0]): + raise BadRoute() + tag = self._really_parse_tag() + except BadRoute: + self._head = reset + self._emit_text("bar"), TemplateOpen(), Text(text="baz"), TemplateClo name: single_only_open_close label: a tag that can only be single; both an open and a close tag input: "foo
bar{{baz}}
" -output: [Text(text="foo"), TagOpenOpen(showtag=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose(), TagOpenOpen(showtag=True, invalid=True), Text(text="br"), TagCloseSelfclose(padding="")] +output: [Text(text="foo"), TagOpenOpen(showtag=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose(), TagOpenOpen(showtag=True, invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True)] --- @@ -519,11 +519,11 @@ output: [Text(text="foo"), TagOpenOpen(showtag=True), Text(text="br"), TagCloseS name: single_only_close label: a tag that can only be single; just a close tag input: "foo
bar{{baz}}" -output: [Text(text="foo"), TagOpenOpen(showtag=True, invalid=True), Text(text="br"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] +output: [Text(text="foo"), TagOpenOpen(showtag=True, invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] --- name: single_only_double label: a tag that can only be single; a tag with backslashes at the beginning and end input: "foo
bar{{baz}}" -output: [Text(text="foo"), TagOpenOpen(showtag=True, invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] +output: [Text(text="foo"), TagOpenOpen(showtag=True, invalid=True), Text(text="br"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()]