diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index a3e2a95..40f91a1 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -106,9 +106,8 @@ Tokenizer_push(Tokenizer* self, int context) top->stack = PyList_New(0); top->context = context; top->textbuffer = Textbuffer_new(); - if (!top->textbuffer) { + if (!top->textbuffer) return -1; - } top->next = self->topstack; self->topstack = top; return 0; @@ -140,11 +139,11 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) { struct Textbuffer* buffer = self->topstack->textbuffer; - if (buffer->size == 0 && !buffer->next) { + if (buffer->size == 0 && !buffer->next) return 0; - } PyObject* text = Textbuffer_render(buffer); - if (!text) return -1; + if (!text) + return -1; PyObject* kwargs = PyDict_New(); if (!kwargs) { @@ -156,20 +155,19 @@ Tokenizer_push_textbuffer(Tokenizer* self) PyObject* token = PyObject_Call(Text, NOARGS, kwargs); Py_DECREF(kwargs); - if (!token) return -1; + if (!token) + return -1; if (PyList_Append(self->topstack->stack, token)) { Py_DECREF(token); return -1; } - Py_DECREF(token); Textbuffer_dealloc(buffer); self->topstack->textbuffer = Textbuffer_new(); - if (!self->topstack->textbuffer) { + if (!self->topstack->textbuffer) return -1; - } return 0; } @@ -239,10 +237,8 @@ Tokenizer_write(Tokenizer* self, PyObject* token) { if (Tokenizer_push_textbuffer(self)) return -1; - if (PyList_Append(self->topstack->stack, token)) return -1; - return 0; } @@ -254,10 +250,8 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token) { if (Tokenizer_push_textbuffer(self)) return -1; - if (PyList_Insert(self->topstack->stack, 0, token)) return -1; - return 0; } @@ -270,9 +264,8 @@ Tokenizer_write_text(Tokenizer* self, Py_UNICODE text) struct Textbuffer* buf = self->topstack->textbuffer; if (buf->size == TEXTBUFFER_BLOCKSIZE) { struct Textbuffer* new = Textbuffer_new(); - if (!new) { + if (!new) return -1; - } new->next = buf; self->topstack->textbuffer = new; buf = new; @@ -297,18 +290,20 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) case 1: { pushed = 1; struct Textbuffer* buffer = self->topstack->textbuffer; - if (buffer->size == 0 && !buffer->next) { + if (buffer->size == 0 && !buffer->next) break; - } PyObject* left = Textbuffer_render(buffer); - if (!left) return -1; + if (!left) + return -1; PyObject* right = PyObject_GetAttrString(token, "text"); - if (!right) return -1; + if (!right) + return -1; PyObject* text = PyUnicode_Concat(left, right); Py_DECREF(left); Py_DECREF(right); - if (!text) return -1; + if (!text) + return -1; if (PyObject_SetAttrString(token, "text", text)) { Py_DECREF(text); @@ -318,9 +313,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) Textbuffer_dealloc(buffer); self->topstack->textbuffer = Textbuffer_new(); - if (!self->topstack->textbuffer) { + if (!self->topstack->textbuffer) return -1; - } break; } case -1: @@ -334,10 +328,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) PyObject* stack = self->topstack->stack; Py_ssize_t size = PyList_GET_SIZE(stack); - if (PyList_SetSlice(stack, size, size, tokenlist)) return -1; - return 0; } @@ -351,7 +343,8 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text) PyObject* stack = Tokenizer_pop(self); int i = 0; while (1) { - if (!text[i]) break; + if (!text[i]) + break; if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) { Py_XDECREF(stack); return -1; @@ -380,10 +373,8 @@ static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; - if (index >= self->length) return EMPTY; - return PyList_GET_ITEM(self->text, index); } @@ -395,7 +386,6 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { if (delta > self->head) return EMPTY; - Py_ssize_t index = self->head - delta; return PyList_GET_ITEM(self->text, index); } @@ -457,21 +447,19 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) Py_XDECREF(text); return 0; } - else { + else braces -= 2; - } } - else { + else braces -= 3; - } - if (braces) { + if (braces) self->head++; - } } PyObject* tokenlist = Tokenizer_pop(self); - if (!tokenlist) return -1; + if (!tokenlist) + return -1; if (Tokenizer_write_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; @@ -495,7 +483,8 @@ Tokenizer_parse_template(Tokenizer* self) self->head = reset; return 0; } - if (!template) return -1; + if (!template) + return -1; token = PyObject_CallObject(TemplateOpen, NULL); if (!token) { @@ -517,14 +506,14 @@ Tokenizer_parse_template(Tokenizer* self) Py_DECREF(template); token = PyObject_CallObject(TemplateClose, NULL); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; } Py_DECREF(token); - return 0; } @@ -542,7 +531,8 @@ Tokenizer_parse_argument(Tokenizer* self) self->head = reset; return 0; } - if (!argument) return -1; + if (!argument) + return -1; token = PyObject_CallObject(ArgumentOpen, NULL); if (!token) { @@ -564,14 +554,14 @@ Tokenizer_parse_argument(Tokenizer* self) Py_DECREF(argument); token = PyObject_CallObject(ArgumentClose, NULL); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; } Py_DECREF(token); - return 0; } @@ -581,28 +571,27 @@ Tokenizer_parse_argument(Tokenizer* self) static int Tokenizer_handle_template_param(Tokenizer* self) { - if (self->topstack->context & LC_TEMPLATE_NAME) { + if (self->topstack->context & LC_TEMPLATE_NAME) self->topstack->context ^= LC_TEMPLATE_NAME; - } - else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) { + else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; - } if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { PyObject* stack = Tokenizer_pop_keeping_context(self); - if (!stack) return -1; + if (!stack) + return -1; if (Tokenizer_write_all(self, stack)) { Py_DECREF(stack); return -1; } Py_DECREF(stack); } - else { + else self->topstack->context |= LC_TEMPLATE_PARAM_KEY; - } PyObject* token = PyObject_CallObject(TemplateParamSeparator, NULL); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); @@ -622,7 +611,8 @@ static int Tokenizer_handle_template_param_value(Tokenizer* self) { PyObject* stack = Tokenizer_pop_keeping_context(self); - if (!stack) return -1; + if (!stack) + return -1; if (Tokenizer_write_all(self, stack)) { Py_DECREF(stack); return -1; @@ -633,8 +623,8 @@ Tokenizer_handle_template_param_value(Tokenizer* self) self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; PyObject* token = PyObject_CallObject(TemplateParamEquals, NULL); - if (!token) return -1; - + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -652,14 +642,14 @@ Tokenizer_handle_template_end(Tokenizer* self) PyObject* stack; if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { stack = Tokenizer_pop_keeping_context(self); - if (!stack) return NULL; + if (!stack) + return NULL; if (Tokenizer_write_all(self, stack)) { Py_DECREF(stack); return NULL; } Py_DECREF(stack); } - self->head++; stack = Tokenizer_pop(self); return stack; @@ -675,8 +665,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self) self->topstack->context |= LC_ARGUMENT_DEFAULT; PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL); - if (!token) return -1; - + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -702,11 +692,12 @@ Tokenizer_handle_argument_end(Tokenizer* self) static int Tokenizer_parse_wikilink(Tokenizer* self) { - self->head += 2; - Py_ssize_t reset = self->head - 1; + Py_ssize_t reset; + PyObject *token, *wikilink; - PyObject *token; - PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); + self->head += 2; + reset = self->head - 1; + wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); if (BAD_ROUTE) { RESET_ROUTE(); @@ -718,7 +709,8 @@ Tokenizer_parse_wikilink(Tokenizer* self) } return 0; } - if (!wikilink) return -1; + if (!wikilink) + return -1; token = PyObject_CallObject(WikilinkOpen, NULL); if (!token) { @@ -740,8 +732,8 @@ Tokenizer_parse_wikilink(Tokenizer* self) Py_DECREF(wikilink); token = PyObject_CallObject(WikilinkClose, NULL); - if (!token) return -1; - + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -760,8 +752,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) self->topstack->context |= LC_WIKILINK_TEXT; PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL); - if (!token) return -1; - + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -866,14 +858,13 @@ Tokenizer_parse_heading(Tokenizer* self) free(heading); token = PyObject_CallObject(HeadingEnd, NULL); - if (!token) return -1; - + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; } Py_DECREF(token); - self->global ^= GL_HEADING; return 0; } @@ -931,7 +922,8 @@ Tokenizer_handle_heading_end(Tokenizer* self) } PyObject* stack = Tokenizer_pop(self); - if (!stack) return NULL; + if (!stack) + return NULL; HeadingData* heading = malloc(sizeof(HeadingData)); if (!heading) { @@ -955,7 +947,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) char *valid, *text, *def; token = PyObject_CallObject(HTMLEntityStart, NULL); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -972,7 +965,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) if (this == *"#") { numeric = 1; token = PyObject_CallObject(HTMLEntityNumeric, NULL); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -988,11 +982,13 @@ Tokenizer_really_parse_entity(Tokenizer* self) if (this == *"x" || this == *"X") { hexadecimal = 1; kwargs = PyDict_New(); - if (!kwargs) return -1; + if (!kwargs) + return -1; PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0)); PyObject* token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs); Py_DECREF(kwargs); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -1000,13 +996,11 @@ Tokenizer_really_parse_entity(Tokenizer* self) Py_DECREF(token); self->head++; } - else { + else hexadecimal = 0; - } } - else { + else numeric = hexadecimal = 0; - } if (hexadecimal) valid = "0123456789abcdefABCDEF"; @@ -1091,7 +1085,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) Py_DECREF(textobj); token = PyObject_Call(Text, NOARGS, kwargs); Py_DECREF(kwargs); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -1099,7 +1094,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) Py_DECREF(token); token = PyObject_CallObject(HTMLEntityEnd, NULL); - if (!token) return -1; + if (!token) + return -1; if (Tokenizer_write(self, token)) { Py_DECREF(token); return -1; @@ -1117,9 +1113,8 @@ Tokenizer_parse_entity(Tokenizer* self) Py_ssize_t reset = self->head; if (Tokenizer_push(self, 0)) return -1; - if (Tokenizer_really_parse_entity(self)) - return -1; + return -1; if (BAD_ROUTE) { RESET_ROUTE(); @@ -1130,12 +1125,12 @@ Tokenizer_parse_entity(Tokenizer* self) } PyObject* tokenlist = Tokenizer_pop(self); - if (!tokenlist) return -1; + if (!tokenlist) + return -1; if (Tokenizer_write_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; } - Py_DECREF(tokenlist); return 0; } @@ -1158,7 +1153,8 @@ Tokenizer_parse_comment(Tokenizer* self) const char* text = "