From 4c5cfe57b86e767cc0603ca6fccc2e9af27f0957 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Jul 2013 10:37:23 -0400 Subject: [PATCH] Implement Tokenizer_handle_invalid_tag_start() --- mwparserfromhell/parser/tokenizer.c | 46 ++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 062d05b..37c5639 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1707,7 +1707,51 @@ Tokenizer_handle_tag_close_close(Tokenizer* self) static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) { - return 0; + Py_ssize_t reset = self->head + 1, pos = 0; + Textbuffer* buf; + PyObject *name, *tag; + Py_UNICODE this; + int is_marker, i; + + self->head += 2; + buf = Textbuffer_new(); + if (!buf) + return -1; + while (1) { + this = Tokenizer_READ(self, pos); + is_marker = 0; + for (i = 0; i < NUM_MARKERS; i++) { + if (*MARKERS[i] == this) { + is_marker = 1; + break; + } + } + if (is_marker) { + name = Textbuffer_render(buf); + if (!name) { + Textbuffer_dealloc(buf); + return -1; + } + if (!IS_SINGLE_ONLY(name)) + FAIL_ROUTE(); + break; + } + pos++; + } + if (!BAD_ROUTE) { + tag = Tokenizer_really_parse_tag(self); + if (!tag) + return -1; + } + if (BAD_ROUTE) { + self->head = reset; + return (Tokenizer_emit_text(self, *"<") || + Tokenizer_emit_text(self, *"/")); + } + // Set invalid=True flag of TagOpenOpen + if (PyObject_SetAttrString(PyList_GET_ITEM(tag, 0), "invalid", Py_True)) + return -1; + return Tokenizer_emit_all(self, tag); } /*