diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index a02b053..b8d2ad1 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -140,6 +140,58 @@ Textbuffer_render(Textbuffer* self) return result; } +static TagData* +TagData_new(void) +{ + TagData *self = malloc(sizeof(TagData)); + + #define ALLOC_BUFFER(name) \ + name = Textbuffer_new(); \ + if (!name) { \ + TagData_dealloc(self); \ + return NULL; \ + } + + if (!self) { + PyErr_NoMemory(); + return NULL; + } + self->context = TAG_NAME; + ALLOC_BUFFER(self->pad_first) + ALLOC_BUFFER(self->pad_before_eq) + ALLOC_BUFFER(self->pad_after_eq) + self->reset = 0; + return self; +} + +static void +TagData_dealloc(TagData* self) +{ + #define DEALLOC_BUFFER(name) \ + if (name) \ + Textbuffer_dealloc(name); + + DEALLOC_BUFFER(self->pad_first); + DEALLOC_BUFFER(self->pad_before_eq); + DEALLOC_BUFFER(self->pad_after_eq); + free(self); +} + +static int +TagData_reset_buffers(TagData* self) +{ + #define RESET_BUFFER(name) \ + Textbuffer_dealloc(name); \ + name = Textbuffer_new(); \ + if (!name) \ + return -1; + + RESET_BUFFER(self->pad_first) + RESET_BUFFER(self->pad_before_eq) + RESET_BUFFER(self->pad_after_eq) + return 0; +} + static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { @@ -1252,36 +1304,25 @@ Tokenizer_parse_tag(Tokenizer* self) static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) { - TagOpenData *data = malloc(sizeof(TagOpenData)); + TagData *data = TagData_new(); PyObject *token, *text, *trash; Py_UNICODE this, next; int can_exit; - if (!data) { - PyErr_NoMemory(); + if (!data) return NULL; - } - data->context = TAG_NAME; - data->pad_first = Textbuffer_new(); - data->pad_before_eq = Textbuffer_new(); - data->pad_after_eq = Textbuffer_new(); - if (!data->pad_first || !data->pad_before_eq || !data->pad_after_eq) { - free(data); - return NULL; - } - data->reset = 0; if (Tokenizer_push(self, LC_TAG_OPEN)) { - free(data); + TagData_dealloc(data); return NULL; } token = PyObject_CallObject(TagOpenOpen, NULL); if (!token) { - free(data); + TagData_dealloc(data); return NULL; } if (Tokenizer_emit(self, token)) { Py_DECREF(token); - free(data); + TagData_dealloc(data); return NULL; } Py_DECREF(token); @@ -1303,15 +1344,15 @@ Tokenizer_really_parse_tag(Tokenizer* self) trash = Tokenizer_pop(self); Py_XDECREF(trash); } - free(data); + TagData_dealloc(data); return Tokenizer_fail_route(self); } else if (this == *">" && can_exit) { if (Tokenizer_handle_tag_close_open(self, data, TagCloseOpen)) { - free(data); + TagData_dealloc(data); return NULL; } - free(data); + TagData_dealloc(data); self->topstack->context = LC_TAG_BODY; token = PyList_GET_ITEM(self->topstack->stack, 1); text = PyObject_GetAttrString(token, "text"); @@ -1329,17 +1370,18 @@ Tokenizer_really_parse_tag(Tokenizer* self) return Tokenizer_handle_blacklisted_tag(self); } else if (this == *"/" && next == *">" && can_exit) { - if (Tokenizer_handle_tag_close_open(self, data, TagCloseSelfclose)) { - free(data); + if (Tokenizer_handle_tag_close_open(self, data, + TagCloseSelfclose)) { + TagData_dealloc(data); return NULL; } - free(data); + TagData_dealloc(data); return Tokenizer_pop(self); } else { if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) { RESET_ROUTE(); - free(data); + TagData_dealloc(data); return NULL; } } @@ -1351,7 +1393,7 @@ Tokenizer_really_parse_tag(Tokenizer* self) Write a pending tag attribute from data to the stack. */ static int -Tokenizer_push_tag_buffer(Tokenizer* self, TagOpenData* data) +Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) { PyObject *token, *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq; @@ -1405,13 +1447,7 @@ Tokenizer_push_tag_buffer(Tokenizer* self, TagOpenData* data) return -1; } Py_DECREF(tokens); - Textbuffer_dealloc(data->pad_first); - Textbuffer_dealloc(data->pad_before_eq); - Textbuffer_dealloc(data->pad_after_eq); - data->pad_first = Textbuffer_new(); - data->pad_before_eq = Textbuffer_new(); - data->pad_after_eq = Textbuffer_new(); - if (!data->pad_first || !data->pad_before_eq || !data->pad_after_eq) + if (TagData_reset_buffers(data)) return -1; return 0; } @@ -1420,7 +1456,7 @@ Tokenizer_push_tag_buffer(Tokenizer* self, TagOpenData* data) Handle all sorts of text data inside of an HTML open tag. */ static int -Tokenizer_handle_tag_data(Tokenizer* self, TagOpenData* data, Py_UNICODE chunk) +Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) { PyObject *trash, *token; int first_time, i, is_marker = 0, escaped; @@ -1509,7 +1545,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagOpenData* data, Py_UNICODE chunk) Handle whitespace inside of an HTML open tag. */ static int -Tokenizer_handle_tag_space(Tokenizer* self, TagOpenData* data, Py_UNICODE text) +Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text) { int ctx = data->context; int end_of_value = (ctx & TAG_ATTR_VALUE && @@ -1592,10 +1628,9 @@ Tokenizer_handle_blacklisted_tag(Tokenizer* self) Handle the closing of a open tag (). */ static int -Tokenizer_handle_tag_close_open(Tokenizer* self, TagOpenData* data, - PyObject* token) +Tokenizer_handle_tag_close_open(Tokenizer* self, TagData* data, PyObject* cls) { - PyObject *padding, *kwargs, *tok; + PyObject *padding, *kwargs, *token; if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) { if (Tokenizer_push_tag_buffer(self, data)) @@ -1611,15 +1646,15 @@ Tokenizer_handle_tag_close_open(Tokenizer* self, TagOpenData* data, } PyDict_SetItemString(kwargs, "padding", padding); Py_DECREF(padding); - tok = PyObject_Call(token, NOARGS, kwargs); + token = PyObject_Call(cls, NOARGS, kwargs); Py_DECREF(kwargs); - if (!tok) + if (!token) return -1; - if (Tokenizer_emit(self, tok)) { - Py_DECREF(tok); + if (Tokenizer_emit(self, token)) { + Py_DECREF(token); return -1; } - Py_DECREF(tok); + Py_DECREF(token); self->head++; return 0; } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 7440924..20934fa 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -181,7 +181,7 @@ typedef struct { struct Textbuffer* pad_before_eq; struct Textbuffer* pad_after_eq; Py_ssize_t reset; -} TagOpenData; +} TagData; typedef struct Textbuffer Textbuffer; typedef struct Stack Stack; @@ -218,11 +218,15 @@ typedef struct { /* Function prototypes: */ -static struct Textbuffer* Textbuffer_new(void); +static Textbuffer* Textbuffer_new(void); static void Textbuffer_dealloc(Textbuffer*); static int Textbuffer_write(Textbuffer**, Py_UNICODE); static PyObject* Textbuffer_render(Textbuffer*); +static TagData* TagData_new(void); +static void TagData_dealloc(TagData*); +static int TagData_reset_buffers(TagData*); + static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); static void Tokenizer_dealloc(Tokenizer*); static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); @@ -257,12 +261,12 @@ static int Tokenizer_parse_entity(Tokenizer*); static int Tokenizer_parse_comment(Tokenizer*); static int Tokenizer_parse_tag(Tokenizer*); static PyObject* Tokenizer_really_parse_tag(Tokenizer*); -static int Tokenizer_push_tag_buffer(Tokenizer*, TagOpenData*); -static int Tokenizer_handle_tag_data(Tokenizer*, TagOpenData*, Py_UNICODE); -static int Tokenizer_handle_tag_space(Tokenizer*, TagOpenData*, Py_UNICODE); +static int Tokenizer_push_tag_buffer(Tokenizer*, TagData*); +static int Tokenizer_handle_tag_data(Tokenizer*, TagData*, Py_UNICODE); +static int Tokenizer_handle_tag_space(Tokenizer*, TagData*, Py_UNICODE); static int Tokenizer_handle_tag_text(Tokenizer*, Py_UNICODE); static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer*); -static int Tokenizer_handle_tag_close_open(Tokenizer*, TagOpenData*, PyObject*); +static int Tokenizer_handle_tag_close_open(Tokenizer*, TagData*, PyObject*); static int Tokenizer_handle_tag_open_close(Tokenizer*); static PyObject* Tokenizer_handle_tag_close_close(Tokenizer*); static int Tokenizer_handle_invalid_tag_start(Tokenizer*);