From c1379d5f21f1f5bfd4bb7a179994225e487519ad Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 14 Aug 2013 02:33:15 -0400 Subject: [PATCH] Add a emit_string() as a shortcut; a bunch of minor cleanup. --- mwparserfromhell/parser/tokenizer.c | 143 +++++++++++++++--------------------- 1 file changed, 60 insertions(+), 83 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 43df293..62e8599 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -29,6 +29,7 @@ SOFTWARE. static int heading_level_from_context(int n) { int level; + n /= LC_HEADING_LEVEL_1; for (level = 1; n > 1; n >>= 1) level++; @@ -72,6 +73,7 @@ static PyObject* strip_tag_name(PyObject* token) static Textbuffer* Textbuffer_new(void) { Textbuffer* buffer = malloc(sizeof(Textbuffer)); + if (!buffer) { PyErr_NoMemory(); return NULL; @@ -90,6 +92,7 @@ static Textbuffer* Textbuffer_new(void) static void Textbuffer_dealloc(Textbuffer* self) { Textbuffer* next; + while (self) { free(self->data); next = self->next; @@ -104,6 +107,7 @@ static void Textbuffer_dealloc(Textbuffer* self) static int Textbuffer_write(Textbuffer** this, Py_UNICODE text) { Textbuffer* self = *this; + if (self->size == TEXTBUFFER_BLOCKSIZE) { Textbuffer* new = Textbuffer_new(); if (!new) @@ -123,6 +127,7 @@ static PyObject* Textbuffer_render(Textbuffer* self) { PyObject *result = PyUnicode_FromUnicode(self->data, self->size); PyObject *left, *concat; + while (self->next) { self = self->next; left = PyUnicode_FromUnicode(self->data, self->size); @@ -208,6 +213,7 @@ static void Tokenizer_dealloc(Tokenizer* self) static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) { static char* kwlist[] = {NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) return -1; self->text = Py_None; @@ -223,6 +229,7 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) static int Tokenizer_push(Tokenizer* self, int context) { Stack* top = malloc(sizeof(Stack)); + if (!top) { PyErr_NoMemory(); return -1; @@ -246,6 +253,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) { PyObject *text, *kwargs, *token; Textbuffer* buffer = self->topstack->textbuffer; + if (buffer->size == 0 && !buffer->next) return 0; text = Textbuffer_render(buffer); @@ -280,6 +288,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) static void Tokenizer_delete_top_of_stack(Tokenizer* self) { Stack* top = self->topstack; + Py_DECREF(top->stack); Textbuffer_dealloc(top->textbuffer); self->topstack = top->next; @@ -293,6 +302,7 @@ static void Tokenizer_delete_top_of_stack(Tokenizer* self) static PyObject* Tokenizer_pop(Tokenizer* self) { PyObject* stack; + if (Tokenizer_push_textbuffer(self)) return NULL; stack = self->topstack->stack; @@ -309,6 +319,7 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) { PyObject* stack; int context; + if (Tokenizer_push_textbuffer(self)) return NULL; stack = self->topstack->stack; @@ -327,6 +338,7 @@ static void* Tokenizer_fail_route(Tokenizer* self) { int context = self->topstack->context; PyObject* stack = Tokenizer_pop(self); + Py_XDECREF(stack); FAIL_ROUTE(context); return NULL; @@ -365,6 +377,21 @@ static int Tokenizer_emit_text(Tokenizer* self, Py_UNICODE text) } /* + Write a string of text to the current textbuffer. +*/ +static int Tokenizer_emit_string(Tokenizer* self, const char* text) +{ + int i = 0; + + while (text[i]) { + if (Tokenizer_emit_text(self, text[i])) + return -1; + i++; + } + return 0; +} + +/* Write a series of tokens to the current stack at once. */ static int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) @@ -428,15 +455,10 @@ static int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) { PyObject* stack = Tokenizer_pop(self); - int i = 0; - while (1) { - if (!text[i]) - break; - if (Tokenizer_emit_text(self, (Py_UNICODE) text[i])) { - Py_XDECREF(stack); - return -1; - } - i++; + + if (Tokenizer_emit_string(self, text)) { + Py_DECREF(stack); + return -1; } if (stack) { if (PyList_GET_SIZE(stack) > 0) { @@ -457,6 +479,7 @@ static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; + if (index >= self->length) return EMPTY; return PyList_GET_ITEM(self->text, index); @@ -468,6 +491,7 @@ static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index; + if (delta > self->head) return EMPTY; index = self->head - delta; @@ -752,7 +776,6 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) { Py_ssize_t reset; PyObject *wikilink, *token; - int i; self->head += 2; reset = self->head - 1; @@ -760,10 +783,8 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - for (i = 0; i < 2; i++) { - if (Tokenizer_emit_text(self, *"[")) - return -1; - } + if (Tokenizer_emit_string(self, "[[")) + return -1; return 0; } if (!wikilink) @@ -1183,24 +1204,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; PyObject *token, *comment; - int i; self->head += 4; comment = Tokenizer_parse(self, LC_COMMENT, 1); if (BAD_ROUTE) { - const char* text = "