From c20d3f2a6af7a0ad866c0788b9b1b91badb05571 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 14 Aug 2013 02:01:13 -0400 Subject: [PATCH] handle_list_marker() and handle_list() --- mwparserfromhell/parser/tokenizer.c | 59 ++++++++++++++++++++++++++++++------- mwparserfromhell/parser/tokenizer.h | 2 +- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 137c3a5..f5e1f27 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -2101,12 +2101,43 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) */ static int Tokenizer_handle_list_marker(Tokenizer* self) { - // markup = self._read() - // if markup == ";": - // self._context |= contexts.DL_TERM - // self._emit(tokens.TagOpenOpen(wiki_markup=markup)) - // self._emit_text(get_html_tag(markup)) - // self._emit(tokens.TagCloseSelfclose()) + PyObject *markup = Tokenizer_read(self, 0), *kwargs, *token; + Py_UNICODE code = *PyUnicode_AS_UNICODE(markup); + char *html; + int i = 0; + + if (code == *";") + self->topstack->context |= LC_DLTERM; + kwargs = PyDict_New(); + if (!kwargs) + return -1; + PyDict_SetItemString(kwargs, "wiki_markup", markup); + token = PyObject_Call(TagOpenOpen, NOARGS, kwargs); + if (!token) { + Py_DECREF(kwargs); + return -1; + } + Py_DECREF(kwargs); + if (Tokenizer_emit(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + html = GET_HTML_TAG(code); + while (html[i]) { + if (Tokenizer_emit_text(self, html[i])) + return -1; + i++; + } + token = PyObject_CallObject(TagCloseSelfclose, NULL); + if (!token) + return -1; + if (Tokenizer_emit(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + return 0; } /* @@ -2114,10 +2145,18 @@ static int Tokenizer_handle_list_marker(Tokenizer* self) */ static int Tokenizer_handle_list(Tokenizer* self) { - // self._handle_list_marker() - // while self._read(1) in ("#", "*", ";", ":"): - // self._head += 1 - // self._handle_list_marker() + Py_UNICODE marker = Tokenizer_READ(self, 1); + + if (Tokenizer_handle_list_marker(self)) + return -1; + while (marker == *"#" || marker == *"*" || marker == *";" || + marker == *":") { + self->head++; + if (Tokenizer_handle_list_marker(self)) + return -1; + marker = Tokenizer_READ(self, 1); + } + return 0; } /* diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 29e8fbe..4136285 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -220,7 +220,7 @@ typedef struct { /* Macros for accessing HTML tag definitions: */ -#define GET_HTML_TAG(markup) (call_tag_def_func("get_html_tag", markup)) +#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li") #define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag)) #define IS_SINGLE(tag) (call_tag_def_func("is_single", tag)) #define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag))