From 77092e066ca6b39512a38b485ed0f047bfc6e32b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 24 Aug 2013 16:31:22 -0400 Subject: [PATCH] Fix C tokenizer behavior re: some single_only tag edge cases. --- mwparserfromhell/parser/tokenizer.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 1bc1f14..c9527ab 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -2016,7 +2016,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) return -1; while (1) { this = Tokenizer_READ(self, pos); - if (is_marker(this)) { + if (Py_UNICODE_ISSPACE(this) || is_marker(this)) { name = Textbuffer_render(buf); if (!name) { Textbuffer_dealloc(buf); @@ -2031,16 +2031,15 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) pos++; } Textbuffer_dealloc(buf); - if (!BAD_ROUTE) { + if (!BAD_ROUTE) tag = Tokenizer_really_parse_tag(self); - if (!tag) - return -1; - } if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; return Tokenizer_emit_text(self, "