From 150f3311290a8569eb960084e070eb23f6e70c3c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 25 Sep 2012 18:11:29 -0400 Subject: [PATCH] Implement Tokenizer_parse_entity(), Tokenizer_parse_comment(). --- mwparserfromhell/parser/tokenizer.c | 79 ++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 907c55e..d302ea2 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -856,7 +856,6 @@ Tokenizer_parse_wikilink(Tokenizer* self) return -1; } } - else { PyObject *class, *token; PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); @@ -899,7 +898,6 @@ Tokenizer_parse_wikilink(Tokenizer* self) } Py_DECREF(token); } - return 0; } @@ -983,7 +981,29 @@ Tokenizer_really_parse_entity(Tokenizer* self) static int Tokenizer_parse_entity(Tokenizer* self) { + Py_ssize_t reset = self->head; + if (Tokenizer_push(self, 0)) + return -1; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + if (Tokenizer_write_text(self, Tokenizer_read(self, 0))) + return -1; + } + else { + if (Tokenizer_really_parse_entity(self)) + return -1; + + PyObject* tokenlist = Tokenizer_pop(self); + if (!tokenlist) return -1; + if (Tokenizer_write_all(self, tokenlist)) { + Py_DECREF(tokenlist); + return -1; + } + + Py_DECREF(tokenlist); + } + return 0; } /* @@ -992,7 +1012,62 @@ Tokenizer_parse_entity(Tokenizer* self) static int Tokenizer_parse_comment(Tokenizer* self) { + self->head += 4; + Py_ssize_t reset = self->head - 1; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + PyObject* text = PyUnicode_FromString("