diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 912cfd7..6600203 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -207,7 +207,7 @@ static void Tokenizer_dealloc(Tokenizer* self) free(this); this = next; } - self->ob_type->tp_free((PyObject*) self); + Py_TYPE(self)->tp_free((PyObject*) self); } static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) @@ -347,24 +347,49 @@ static void* Tokenizer_fail_route(Tokenizer* self) /* Write a token to the end of the current token stack. */ -static int Tokenizer_emit(Tokenizer* self, PyObject* token) +static int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) { + PyObject* instance; + if (Tokenizer_push_textbuffer(self)) return -1; - if (PyList_Append(self->topstack->stack, token)) + instance = PyObject_CallObject(token, NULL); + if (!instance) + return -1; + if (first ? PyList_Insert(self->topstack->stack, 0, instance) : + PyList_Append(self->topstack->stack, instance)) { + Py_DECREF(instance); return -1; + } + Py_DECREF(instance); return 0; } /* - Write a token to the beginning of the current token stack. + Write a token to the end of the current token stack. */ -static int Tokenizer_emit_first(Tokenizer* self, PyObject* token) +static int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, + PyObject* kwargs, int first) { - if (Tokenizer_push_textbuffer(self)) + PyObject* instance; + + if (Tokenizer_push_textbuffer(self)) { + Py_DECREF(kwargs); + return -1; + } + instance = PyObject_Call(token, NOARGS, kwargs); + if (!instance) { + Py_DECREF(kwargs); return -1; - if (PyList_Insert(self->topstack->stack, 0, token)) + } + if (first ? PyList_Insert(self->topstack->stack, 0, instance): + PyList_Append(self->topstack->stack, instance)) { + Py_DECREF(instance); + Py_DECREF(kwargs); return -1; + } + Py_DECREF(instance); + Py_DECREF(kwargs); return 0; } @@ -503,7 +528,7 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) */ static int Tokenizer_parse_template(Tokenizer* self) { - PyObject *template, *token; + PyObject *template; Py_ssize_t reset = self->head; template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); @@ -513,30 +538,17 @@ static int Tokenizer_parse_template(Tokenizer* self) } if (!template) return -1; - token = PyObject_CallObject(TemplateOpen, NULL); - if (!token) { - Py_DECREF(template); - return -1; - } - if (Tokenizer_emit_first(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_first(self, TemplateOpen)) { Py_DECREF(template); return -1; } - Py_DECREF(token); if (Tokenizer_emit_all(self, template)) { Py_DECREF(template); return -1; } Py_DECREF(template); - token = PyObject_CallObject(TemplateClose, NULL); - if (!token) + if (Tokenizer_emit(self, TemplateClose)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); return 0; } @@ -545,7 +557,7 @@ static int Tokenizer_parse_template(Tokenizer* self) */ static int Tokenizer_parse_argument(Tokenizer* self) { - PyObject *argument, *token; + PyObject *argument; Py_ssize_t reset = self->head; argument = Tokenizer_parse(self, LC_ARGUMENT_NAME, 1); @@ -555,30 +567,17 @@ static int Tokenizer_parse_argument(Tokenizer* self) } if (!argument) return -1; - token = PyObject_CallObject(ArgumentOpen, NULL); - if (!token) { + if (Tokenizer_emit_first(self, ArgumentOpen)) { Py_DECREF(argument); return -1; } - if (Tokenizer_emit_first(self, token)) { - Py_DECREF(token); - Py_DECREF(argument); - return -1; - } - Py_DECREF(token); if (Tokenizer_emit_all(self, argument)) { Py_DECREF(argument); return -1; } Py_DECREF(argument); - token = PyObject_CallObject(ArgumentClose, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, ArgumentClose)) return -1; - } - Py_DECREF(token); return 0; } @@ -658,7 +657,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) */ static int Tokenizer_handle_template_param(Tokenizer* self) { - PyObject *stack, *token; + PyObject *stack; if (self->topstack->context & LC_TEMPLATE_NAME) self->topstack->context ^= LC_TEMPLATE_NAME; @@ -676,15 +675,8 @@ static int Tokenizer_handle_template_param(Tokenizer* self) } else self->topstack->context |= LC_TEMPLATE_PARAM_KEY; - - token = PyObject_CallObject(TemplateParamSeparator, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, TemplateParamSeparator)) return -1; - } - Py_DECREF(token); if (Tokenizer_push(self, self->topstack->context)) return -1; return 0; @@ -695,7 +687,7 @@ static int Tokenizer_handle_template_param(Tokenizer* self) */ static int Tokenizer_handle_template_param_value(Tokenizer* self) { - PyObject *stack, *token; + PyObject *stack; stack = Tokenizer_pop_keeping_context(self); if (!stack) @@ -707,14 +699,8 @@ static int Tokenizer_handle_template_param_value(Tokenizer* self) Py_DECREF(stack); self->topstack->context ^= LC_TEMPLATE_PARAM_KEY; self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; - token = PyObject_CallObject(TemplateParamEquals, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, TemplateParamEquals)) return -1; - } - Py_DECREF(token); return 0; } @@ -745,17 +731,10 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) */ static int Tokenizer_handle_argument_separator(Tokenizer* self) { - PyObject* token; self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; - token = PyObject_CallObject(ArgumentSeparator, NULL); - if (!token) + if (Tokenizer_emit(self, ArgumentSeparator)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); return 0; } @@ -765,6 +744,7 @@ static int Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { PyObject* stack = Tokenizer_pop(self); + self->head += 2; return stack; } @@ -775,7 +755,7 @@ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) static int Tokenizer_parse_wikilink(Tokenizer* self) { Py_ssize_t reset; - PyObject *wikilink, *token; + PyObject *wikilink; self->head += 2; reset = self->head - 1; @@ -789,30 +769,17 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) } if (!wikilink) return -1; - token = PyObject_CallObject(WikilinkOpen, NULL); - if (!token) { - Py_DECREF(wikilink); - return -1; - } - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, WikilinkOpen)) { Py_DECREF(wikilink); return -1; } - Py_DECREF(token); if (Tokenizer_emit_all(self, wikilink)) { Py_DECREF(wikilink); return -1; } Py_DECREF(wikilink); - token = PyObject_CallObject(WikilinkClose, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, WikilinkClose)) return -1; - } - Py_DECREF(token); if (self->topstack->context & LC_FAIL_NEXT) self->topstack->context ^= LC_FAIL_NEXT; return 0; @@ -823,17 +790,10 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) */ static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { - PyObject* token; self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; - token = PyObject_CallObject(WikilinkSeparator, NULL); - if (!token) + if (Tokenizer_emit(self, WikilinkSeparator)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); return 0; } @@ -855,7 +815,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) Py_ssize_t reset = self->head; int best = 1, i, context, diff; HeadingData *heading; - PyObject *level, *kwargs, *token; + PyObject *level, *kwargs; self->global |= GL_HEADING; self->head += 1; @@ -875,7 +835,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) self->global ^= GL_HEADING; return 0; } - level = PyInt_FromSsize_t(heading->level); + level = NEW_INT_FUNC(heading->level); if (!level) { Py_DECREF(heading->title); free(heading); @@ -890,20 +850,11 @@ static int Tokenizer_parse_heading(Tokenizer* self) } PyDict_SetItemString(kwargs, "level", level); Py_DECREF(level); - token = PyObject_Call(HeadingStart, NOARGS, kwargs); - Py_DECREF(kwargs); - if (!token) { - Py_DECREF(heading->title); - free(heading); - return -1; - } - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit_kwargs(self, HeadingStart, kwargs)) { Py_DECREF(heading->title); free(heading); return -1; } - Py_DECREF(token); if (heading->level < best) { diff = best - heading->level; for (i = 0; i < diff; i++) { @@ -921,14 +872,8 @@ static int Tokenizer_parse_heading(Tokenizer* self) } Py_DECREF(heading->title); free(heading); - token = PyObject_CallObject(HeadingEnd, NULL); - if (!token) + if (Tokenizer_emit(self, HeadingEnd)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); self->global ^= GL_HEADING; return 0; } @@ -999,7 +944,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) */ static int Tokenizer_really_parse_entity(Tokenizer* self) { - PyObject *token, *kwargs, *textobj; + PyObject *kwargs, *textobj; Py_UNICODE this; int numeric, hexadecimal, i, j, zeroes, test; char *valid, *text, *buffer, *def; @@ -1010,14 +955,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) return 0; \ } - token = PyObject_CallObject(HTMLEntityStart, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, HTMLEntityStart)) return -1; - } - Py_DECREF(token); self->head++; this = Tokenizer_READ(self, 0); if (this == *"") { @@ -1026,14 +965,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) } if (this == *"#") { numeric = 1; - token = PyObject_CallObject(HTMLEntityNumeric, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, HTMLEntityNumeric)) return -1; - } - Py_DECREF(token); self->head++; this = Tokenizer_READ(self, 0); if (this == *"") { @@ -1046,15 +979,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) if (!kwargs) return -1; PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0)); - token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs); - Py_DECREF(kwargs); - if (!token) + if (Tokenizer_emit_kwargs(self, HTMLEntityHex, kwargs)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); self->head++; } else @@ -1087,7 +1013,7 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) self->head++; continue; } - if (i >= 8) + if (i >= MAX_ENTITY_SIZE) FAIL_ROUTE_AND_EXIT() for (j = 0; j < NUM_MARKERS; j++) { if (this == *MARKERS[j]) @@ -1147,23 +1073,10 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) } PyDict_SetItemString(kwargs, "text", textobj); Py_DECREF(textobj); - token = PyObject_Call(Text, NOARGS, kwargs); - Py_DECREF(kwargs); - if (!token) + if (Tokenizer_emit_kwargs(self, Text, kwargs)) return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); + if (Tokenizer_emit(self, HTMLEntityEnd)) return -1; - } - Py_DECREF(token); - token = PyObject_CallObject(HTMLEntityEnd, NULL); - if (!token) - return -1; - if (Tokenizer_emit(self, token)) { - Py_DECREF(token); - return -1; - } - Py_DECREF(token); return 0; } @@ -1203,45 +1116,39 @@ static int Tokenizer_parse_entity(Tokenizer* self) static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; - PyObject *token, *comment; + PyObject *comment; + Py_UNICODE this; self->head += 4; - comment = Tokenizer_parse(self, LC_COMMENT, 1); - if (BAD_ROUTE) { - RESET_ROUTE(); - self->head = reset; - if (Tokenizer_emit_text(self, "