Browse Source

Refactor TagData code into dedicated functions.

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
e3fc27c9e3
2 changed files with 86 additions and 47 deletions
  1. +76
    -41
      mwparserfromhell/parser/tokenizer.c
  2. +10
    -6
      mwparserfromhell/parser/tokenizer.h

+ 76
- 41
mwparserfromhell/parser/tokenizer.c View File

@@ -140,6 +140,58 @@ Textbuffer_render(Textbuffer* self)
return result;
}

static TagData*
TagData_new(void)
{
TagData *self = malloc(sizeof(TagData));

#define ALLOC_BUFFER(name) \
name = Textbuffer_new(); \
if (!name) { \
TagData_dealloc(self); \
return NULL; \
}

if (!self) {
PyErr_NoMemory();
return NULL;
}
self->context = TAG_NAME;
ALLOC_BUFFER(self->pad_first)
ALLOC_BUFFER(self->pad_before_eq)
ALLOC_BUFFER(self->pad_after_eq)
self->reset = 0;
return self;
}

static void
TagData_dealloc(TagData* self)
{
#define DEALLOC_BUFFER(name) \
if (name) \
Textbuffer_dealloc(name);

DEALLOC_BUFFER(self->pad_first);
DEALLOC_BUFFER(self->pad_before_eq);
DEALLOC_BUFFER(self->pad_after_eq);
free(self);
}

static int
TagData_reset_buffers(TagData* self)
{
#define RESET_BUFFER(name) \
Textbuffer_dealloc(name); \
name = Textbuffer_new(); \
if (!name) \
return -1;

RESET_BUFFER(self->pad_first)
RESET_BUFFER(self->pad_before_eq)
RESET_BUFFER(self->pad_after_eq)
return 0;
}

static PyObject*
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
{
@@ -1252,36 +1304,25 @@ Tokenizer_parse_tag(Tokenizer* self)
static PyObject*
Tokenizer_really_parse_tag(Tokenizer* self)
{
TagOpenData *data = malloc(sizeof(TagOpenData));
TagData *data = TagData_new();
PyObject *token, *text, *trash;
Py_UNICODE this, next;
int can_exit;

if (!data) {
PyErr_NoMemory();
if (!data)
return NULL;
}
data->context = TAG_NAME;
data->pad_first = Textbuffer_new();
data->pad_before_eq = Textbuffer_new();
data->pad_after_eq = Textbuffer_new();
if (!data->pad_first || !data->pad_before_eq || !data->pad_after_eq) {
free(data);
return NULL;
}
data->reset = 0;
if (Tokenizer_push(self, LC_TAG_OPEN)) {
free(data);
TagData_dealloc(data);
return NULL;
}
token = PyObject_CallObject(TagOpenOpen, NULL);
if (!token) {
free(data);
TagData_dealloc(data);
return NULL;
}
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
free(data);
TagData_dealloc(data);
return NULL;
}
Py_DECREF(token);
@@ -1303,15 +1344,15 @@ Tokenizer_really_parse_tag(Tokenizer* self)
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
}
free(data);
TagData_dealloc(data);
return Tokenizer_fail_route(self);
}
else if (this == *">" && can_exit) {
if (Tokenizer_handle_tag_close_open(self, data, TagCloseOpen)) {
free(data);
TagData_dealloc(data);
return NULL;
}
free(data);
TagData_dealloc(data);
self->topstack->context = LC_TAG_BODY;
token = PyList_GET_ITEM(self->topstack->stack, 1);
text = PyObject_GetAttrString(token, "text");
@@ -1329,17 +1370,18 @@ Tokenizer_really_parse_tag(Tokenizer* self)
return Tokenizer_handle_blacklisted_tag(self);
}
else if (this == *"/" && next == *">" && can_exit) {
if (Tokenizer_handle_tag_close_open(self, data, TagCloseSelfclose)) {
free(data);
if (Tokenizer_handle_tag_close_open(self, data,
TagCloseSelfclose)) {
TagData_dealloc(data);
return NULL;
}
free(data);
TagData_dealloc(data);
return Tokenizer_pop(self);
}
else {
if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) {
RESET_ROUTE();
free(data);
TagData_dealloc(data);
return NULL;
}
}
@@ -1351,7 +1393,7 @@ Tokenizer_really_parse_tag(Tokenizer* self)
Write a pending tag attribute from data to the stack.
*/
static int
Tokenizer_push_tag_buffer(Tokenizer* self, TagOpenData* data)
Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
{
PyObject *token, *tokens, *kwargs, *pad_first, *pad_before_eq,
*pad_after_eq;
@@ -1405,13 +1447,7 @@ Tokenizer_push_tag_buffer(Tokenizer* self, TagOpenData* data)
return -1;
}
Py_DECREF(tokens);
Textbuffer_dealloc(data->pad_first);
Textbuffer_dealloc(data->pad_before_eq);
Textbuffer_dealloc(data->pad_after_eq);
data->pad_first = Textbuffer_new();
data->pad_before_eq = Textbuffer_new();
data->pad_after_eq = Textbuffer_new();
if (!data->pad_first || !data->pad_before_eq || !data->pad_after_eq)
if (TagData_reset_buffers(data))
return -1;
return 0;
}
@@ -1420,7 +1456,7 @@ Tokenizer_push_tag_buffer(Tokenizer* self, TagOpenData* data)
Handle all sorts of text data inside of an HTML open tag.
*/
static int
Tokenizer_handle_tag_data(Tokenizer* self, TagOpenData* data, Py_UNICODE chunk)
Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
{
PyObject *trash, *token;
int first_time, i, is_marker = 0, escaped;
@@ -1509,7 +1545,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagOpenData* data, Py_UNICODE chunk)
Handle whitespace inside of an HTML open tag.
*/
static int
Tokenizer_handle_tag_space(Tokenizer* self, TagOpenData* data, Py_UNICODE text)
Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text)
{
int ctx = data->context;
int end_of_value = (ctx & TAG_ATTR_VALUE &&
@@ -1592,10 +1628,9 @@ Tokenizer_handle_blacklisted_tag(Tokenizer* self)
Handle the closing of a open tag (<foo>).
*/
static int
Tokenizer_handle_tag_close_open(Tokenizer* self, TagOpenData* data,
PyObject* token)
Tokenizer_handle_tag_close_open(Tokenizer* self, TagData* data, PyObject* cls)
{
PyObject *padding, *kwargs, *tok;
PyObject *padding, *kwargs, *token;

if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) {
if (Tokenizer_push_tag_buffer(self, data))
@@ -1611,15 +1646,15 @@ Tokenizer_handle_tag_close_open(Tokenizer* self, TagOpenData* data,
}
PyDict_SetItemString(kwargs, "padding", padding);
Py_DECREF(padding);
tok = PyObject_Call(token, NOARGS, kwargs);
token = PyObject_Call(cls, NOARGS, kwargs);
Py_DECREF(kwargs);
if (!tok)
if (!token)
return -1;
if (Tokenizer_emit(self, tok)) {
Py_DECREF(tok);
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(tok);
Py_DECREF(token);
self->head++;
return 0;
}


+ 10
- 6
mwparserfromhell/parser/tokenizer.h View File

@@ -181,7 +181,7 @@ typedef struct {
struct Textbuffer* pad_before_eq;
struct Textbuffer* pad_after_eq;
Py_ssize_t reset;
} TagOpenData;
} TagData;

typedef struct Textbuffer Textbuffer;
typedef struct Stack Stack;
@@ -218,11 +218,15 @@ typedef struct {

/* Function prototypes: */

static struct Textbuffer* Textbuffer_new(void);
static Textbuffer* Textbuffer_new(void);
static void Textbuffer_dealloc(Textbuffer*);
static int Textbuffer_write(Textbuffer**, Py_UNICODE);
static PyObject* Textbuffer_render(Textbuffer*);

static TagData* TagData_new(void);
static void TagData_dealloc(TagData*);
static int TagData_reset_buffers(TagData*);

static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*);
static void Tokenizer_dealloc(Tokenizer*);
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*);
@@ -257,12 +261,12 @@ static int Tokenizer_parse_entity(Tokenizer*);
static int Tokenizer_parse_comment(Tokenizer*);
static int Tokenizer_parse_tag(Tokenizer*);
static PyObject* Tokenizer_really_parse_tag(Tokenizer*);
static int Tokenizer_push_tag_buffer(Tokenizer*, TagOpenData*);
static int Tokenizer_handle_tag_data(Tokenizer*, TagOpenData*, Py_UNICODE);
static int Tokenizer_handle_tag_space(Tokenizer*, TagOpenData*, Py_UNICODE);
static int Tokenizer_push_tag_buffer(Tokenizer*, TagData*);
static int Tokenizer_handle_tag_data(Tokenizer*, TagData*, Py_UNICODE);
static int Tokenizer_handle_tag_space(Tokenizer*, TagData*, Py_UNICODE);
static int Tokenizer_handle_tag_text(Tokenizer*, Py_UNICODE);
static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer*);
static int Tokenizer_handle_tag_close_open(Tokenizer*, TagOpenData*, PyObject*);
static int Tokenizer_handle_tag_close_open(Tokenizer*, TagData*, PyObject*);
static int Tokenizer_handle_tag_open_close(Tokenizer*);
static PyObject* Tokenizer_handle_tag_close_close(Tokenizer*);
static int Tokenizer_handle_invalid_tag_start(Tokenizer*);


Loading…
Cancel
Save