diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 907c55e..d302ea2 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -856,7 +856,6 @@ Tokenizer_parse_wikilink(Tokenizer* self) return -1; } } - else { PyObject *class, *token; PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); @@ -899,7 +898,6 @@ Tokenizer_parse_wikilink(Tokenizer* self) } Py_DECREF(token); } - return 0; } @@ -983,7 +981,29 @@ Tokenizer_really_parse_entity(Tokenizer* self) static int Tokenizer_parse_entity(Tokenizer* self) { + Py_ssize_t reset = self->head; + if (Tokenizer_push(self, 0)) + return -1; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + if (Tokenizer_write_text(self, Tokenizer_read(self, 0))) + return -1; + } + else { + if (Tokenizer_really_parse_entity(self)) + return -1; + + PyObject* tokenlist = Tokenizer_pop(self); + if (!tokenlist) return -1; + if (Tokenizer_write_all(self, tokenlist)) { + Py_DECREF(tokenlist); + return -1; + } + + Py_DECREF(tokenlist); + } + return 0; } /* @@ -992,7 +1012,62 @@ Tokenizer_parse_entity(Tokenizer* self) static int Tokenizer_parse_comment(Tokenizer* self) { + self->head += 4; + Py_ssize_t reset = self->head - 1; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + PyObject* text = PyUnicode_FromString("