From df9f7388b65185210deb5a5402e84750f820f0d6 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 14 Aug 2013 15:00:45 -0400 Subject: [PATCH 01/12] emit_FAST(), emit_first_FAST(); update comment parsing --- mwparserfromhell/parser/tokenizer.c | 328 +++++++++++------------------------- mwparserfromhell/parser/tokenizer.h | 86 +++++----- 2 files changed, 140 insertions(+), 274 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index eff000a..a78c6d9 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -368,6 +368,40 @@ static int Tokenizer_emit_first(Tokenizer* self, PyObject* token) return 0; } +static int Tokenizer_emit_FAST(Tokenizer* self, PyObject* token) +{ + PyObject* instance; + + if (Tokenizer_push_textbuffer(self)) + return -1; + instance = PyObject_CallObject(token, NULL); + if (!instance) + return -1; + if (PyList_Append(self->topstack->stack, instance)) { + Py_DECREF(instance); + return -1; + } + Py_DECREF(instance); + return 0; +} + +static int Tokenizer_emit_first_FAST(Tokenizer* self, PyObject* token) +{ + PyObject* instance; + + if (Tokenizer_push_textbuffer(self)) + return -1; + instance = PyObject_CallObject(token, NULL); + if (!instance) + return -1; + if (PyList_Insert(self->topstack->stack, 0, instance)) { + Py_DECREF(instance); + return -1; + } + Py_DECREF(instance); + return 0; +} + /* Write a Unicode codepoint to the current textbuffer. */ @@ -503,7 +537,7 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) */ static int Tokenizer_parse_template(Tokenizer* self) { - PyObject *template, *token; + PyObject *template; Py_ssize_t reset = self->head; template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); @@ -513,30 +547,17 @@ static int Tokenizer_parse_template(Tokenizer* self) } if (!template) return -1; - token = PyObject_CallObject(TemplateOpen, NULL); - if (!token) { + if (Tokenizer_emit_first_FAST(self, TemplateOpen)) { Py_DECREF(template); return -1; } - if (Tokenizer_emit_first(self, token)) { - Py_DECREF(token); - Py_DECREF(template); - return -1; - } - Py_DECREF(token); if (Tokenizer_emit_all(self, template)) { Py_DECREF(template); return -1; } Py_DECREF(template); - token = PyObject_CallObject(TemplateClose, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_FAST(self, TemplateClose)) return -1; - } - Py_DECREF(token); return 0; } @@ -545,7 +566,7 @@ static int Tokenizer_parse_template(Tokenizer* self) */ static int Tokenizer_parse_argument(Tokenizer* self) { - PyObject *argument, *token; + PyObject *argument; Py_ssize_t reset = self->head; argument = Tokenizer_parse(self, LC_ARGUMENT_NAME, 1); @@ -555,30 +576,17 @@ static int Tokenizer_parse_argument(Tokenizer* self) } if (!argument) return -1; - token = PyObject_CallObject(ArgumentOpen, NULL); - if (!token) { - Py_DECREF(argument); - return -1; - } - if (Tokenizer_emit_first(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_first_FAST(self, ArgumentOpen)) { Py_DECREF(argument); return -1; } - Py_DECREF(token); if (Tokenizer_emit_all(self, argument)) { Py_DECREF(argument); return -1; } Py_DECREF(argument); - token = PyObject_CallObject(ArgumentClose, NULL); - if (!token) + if (Tokenizer_emit_FAST(self, ArgumentClose)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); return 0; } @@ -658,7 +666,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) */ static int Tokenizer_handle_template_param(Tokenizer* self) { - PyObject *stack, *token; + PyObject *stack; if (self->topstack->context & LC_TEMPLATE_NAME) self->topstack->context ^= LC_TEMPLATE_NAME; @@ -676,15 +684,8 @@ static int Tokenizer_handle_template_param(Tokenizer* self) } else self->topstack->context |= LC_TEMPLATE_PARAM_KEY; - - token = PyObject_CallObject(TemplateParamSeparator, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_FAST(self, TemplateParamSeparator)) return -1; - } - Py_DECREF(token); if (Tokenizer_push(self, self->topstack->context)) return -1; return 0; @@ -695,7 +696,7 @@ static int Tokenizer_handle_template_param(Tokenizer* self) */ static int Tokenizer_handle_template_param_value(Tokenizer* self) { - PyObject *stack, *token; + PyObject *stack; stack = Tokenizer_pop_keeping_context(self); if (!stack) @@ -707,14 +708,8 @@ static int Tokenizer_handle_template_param_value(Tokenizer* self) Py_DECREF(stack); self->topstack->context ^= LC_TEMPLATE_PARAM_KEY; self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; - token = PyObject_CallObject(TemplateParamEquals, NULL); - if (!token) + if (Tokenizer_emit_FAST(self, TemplateParamEquals)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); return 0; } @@ -745,17 +740,10 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) */ static int Tokenizer_handle_argument_separator(Tokenizer* self) { - PyObject* token; self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; - token = PyObject_CallObject(ArgumentSeparator, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_FAST(self, ArgumentSeparator)) return -1; - } - Py_DECREF(token); return 0; } @@ -765,6 +753,7 @@ static int Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { PyObject* stack = Tokenizer_pop(self); + self->head += 2; return stack; } @@ -775,7 +764,7 @@ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) static int Tokenizer_parse_wikilink(Tokenizer* self) { Py_ssize_t reset; - PyObject *wikilink, *token; + PyObject *wikilink; self->head += 2; reset = self->head - 1; @@ -789,30 +778,17 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) } if (!wikilink) return -1; - token = PyObject_CallObject(WikilinkOpen, NULL); - if (!token) { + if (Tokenizer_emit_FAST(self, WikilinkOpen)) { Py_DECREF(wikilink); return -1; } - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - Py_DECREF(wikilink); - return -1; - } - Py_DECREF(token); if (Tokenizer_emit_all(self, wikilink)) { Py_DECREF(wikilink); return -1; } Py_DECREF(wikilink); - token = PyObject_CallObject(WikilinkClose, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_FAST(self, WikilinkClose)) return -1; - } - Py_DECREF(token); if (self->topstack->context & LC_FAIL_NEXT) self->topstack->context ^= LC_FAIL_NEXT; return 0; @@ -823,17 +799,10 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) */ static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { - PyObject* token; self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; - token = PyObject_CallObject(WikilinkSeparator, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_FAST(self, WikilinkSeparator)) return -1; - } - Py_DECREF(token); return 0; } @@ -921,14 +890,8 @@ static int Tokenizer_parse_heading(Tokenizer* self) } Py_DECREF(heading->title); free(heading); - token = PyObject_CallObject(HeadingEnd, NULL); - if (!token) + if (Tokenizer_emit_FAST(self, HeadingEnd)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); self->global ^= GL_HEADING; return 0; } @@ -1010,14 +973,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) return 0; \ } - token = PyObject_CallObject(HTMLEntityStart, NULL); - if (!token) + if (Tokenizer_emit_FAST(self, HTMLEntityStart)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); self->head++; this = Tokenizer_READ(self, 0); if (this == *"") { @@ -1026,14 +983,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) } if (this == *"#") { numeric = 1; - token = PyObject_CallObject(HTMLEntityNumeric, NULL); - if (!token) + if (Tokenizer_emit_FAST(self, HTMLEntityNumeric)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); self->head++; this = Tokenizer_READ(self, 0); if (this == *"") { @@ -1156,14 +1107,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) return -1; } Py_DECREF(token); - token = PyObject_CallObject(HTMLEntityEnd, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_FAST(self, HTMLEntityEnd)) return -1; - } - Py_DECREF(token); return 0; } @@ -1203,45 +1148,39 @@ static int Tokenizer_parse_entity(Tokenizer* self) static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; - PyObject *token, *comment; + PyObject *comment; + Py_UNICODE this; self->head += 4; - comment = Tokenizer_parse(self, LC_COMMENT, 1); - if (BAD_ROUTE) { - RESET_ROUTE(); - self->head = reset; - if (Tokenizer_emit_text(self, "