Quellcode durchsuchen

Clean up whitespace / newlines.

tags/v0.2
Ben Kurtovic vor 12 Jahren
Ursprung
Commit
06f02b9753
1 geänderte Dateien mit 108 neuen und 123 gelöschten Zeilen
  1. +108
    -123
      mwparserfromhell/parser/tokenizer.c

+ 108
- 123
mwparserfromhell/parser/tokenizer.c Datei anzeigen

@@ -106,9 +106,8 @@ Tokenizer_push(Tokenizer* self, int context)
top->stack = PyList_New(0); top->stack = PyList_New(0);
top->context = context; top->context = context;
top->textbuffer = Textbuffer_new(); top->textbuffer = Textbuffer_new();
if (!top->textbuffer) {
if (!top->textbuffer)
return -1; return -1;
}
top->next = self->topstack; top->next = self->topstack;
self->topstack = top; self->topstack = top;
return 0; return 0;
@@ -140,11 +139,11 @@ static int
Tokenizer_push_textbuffer(Tokenizer* self) Tokenizer_push_textbuffer(Tokenizer* self)
{ {
struct Textbuffer* buffer = self->topstack->textbuffer; struct Textbuffer* buffer = self->topstack->textbuffer;
if (buffer->size == 0 && !buffer->next) {
if (buffer->size == 0 && !buffer->next)
return 0; return 0;
}
PyObject* text = Textbuffer_render(buffer); PyObject* text = Textbuffer_render(buffer);
if (!text) return -1;
if (!text)
return -1;


PyObject* kwargs = PyDict_New(); PyObject* kwargs = PyDict_New();
if (!kwargs) { if (!kwargs) {
@@ -156,20 +155,19 @@ Tokenizer_push_textbuffer(Tokenizer* self)


PyObject* token = PyObject_Call(Text, NOARGS, kwargs); PyObject* token = PyObject_Call(Text, NOARGS, kwargs);
Py_DECREF(kwargs); Py_DECREF(kwargs);
if (!token) return -1;
if (!token)
return -1;


if (PyList_Append(self->topstack->stack, token)) { if (PyList_Append(self->topstack->stack, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
} }

Py_DECREF(token); Py_DECREF(token);


Textbuffer_dealloc(buffer); Textbuffer_dealloc(buffer);
self->topstack->textbuffer = Textbuffer_new(); self->topstack->textbuffer = Textbuffer_new();
if (!self->topstack->textbuffer) {
if (!self->topstack->textbuffer)
return -1; return -1;
}
return 0; return 0;
} }


@@ -239,10 +237,8 @@ Tokenizer_write(Tokenizer* self, PyObject* token)
{ {
if (Tokenizer_push_textbuffer(self)) if (Tokenizer_push_textbuffer(self))
return -1; return -1;

if (PyList_Append(self->topstack->stack, token)) if (PyList_Append(self->topstack->stack, token))
return -1; return -1;

return 0; return 0;
} }


@@ -254,10 +250,8 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token)
{ {
if (Tokenizer_push_textbuffer(self)) if (Tokenizer_push_textbuffer(self))
return -1; return -1;

if (PyList_Insert(self->topstack->stack, 0, token)) if (PyList_Insert(self->topstack->stack, 0, token))
return -1; return -1;

return 0; return 0;
} }


@@ -270,9 +264,8 @@ Tokenizer_write_text(Tokenizer* self, Py_UNICODE text)
struct Textbuffer* buf = self->topstack->textbuffer; struct Textbuffer* buf = self->topstack->textbuffer;
if (buf->size == TEXTBUFFER_BLOCKSIZE) { if (buf->size == TEXTBUFFER_BLOCKSIZE) {
struct Textbuffer* new = Textbuffer_new(); struct Textbuffer* new = Textbuffer_new();
if (!new) {
if (!new)
return -1; return -1;
}
new->next = buf; new->next = buf;
self->topstack->textbuffer = new; self->topstack->textbuffer = new;
buf = new; buf = new;
@@ -297,18 +290,20 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)
case 1: { case 1: {
pushed = 1; pushed = 1;
struct Textbuffer* buffer = self->topstack->textbuffer; struct Textbuffer* buffer = self->topstack->textbuffer;
if (buffer->size == 0 && !buffer->next) {
if (buffer->size == 0 && !buffer->next)
break; break;
}
PyObject* left = Textbuffer_render(buffer); PyObject* left = Textbuffer_render(buffer);
if (!left) return -1;
if (!left)
return -1;
PyObject* right = PyObject_GetAttrString(token, "text"); PyObject* right = PyObject_GetAttrString(token, "text");
if (!right) return -1;
if (!right)
return -1;


PyObject* text = PyUnicode_Concat(left, right); PyObject* text = PyUnicode_Concat(left, right);
Py_DECREF(left); Py_DECREF(left);
Py_DECREF(right); Py_DECREF(right);
if (!text) return -1;
if (!text)
return -1;


if (PyObject_SetAttrString(token, "text", text)) { if (PyObject_SetAttrString(token, "text", text)) {
Py_DECREF(text); Py_DECREF(text);
@@ -318,9 +313,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)


Textbuffer_dealloc(buffer); Textbuffer_dealloc(buffer);
self->topstack->textbuffer = Textbuffer_new(); self->topstack->textbuffer = Textbuffer_new();
if (!self->topstack->textbuffer) {
if (!self->topstack->textbuffer)
return -1; return -1;
}
break; break;
} }
case -1: case -1:
@@ -334,10 +328,8 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist)


PyObject* stack = self->topstack->stack; PyObject* stack = self->topstack->stack;
Py_ssize_t size = PyList_GET_SIZE(stack); Py_ssize_t size = PyList_GET_SIZE(stack);

if (PyList_SetSlice(stack, size, size, tokenlist)) if (PyList_SetSlice(stack, size, size, tokenlist))
return -1; return -1;

return 0; return 0;
} }


@@ -351,7 +343,8 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text)
PyObject* stack = Tokenizer_pop(self); PyObject* stack = Tokenizer_pop(self);
int i = 0; int i = 0;
while (1) { while (1) {
if (!text[i]) break;
if (!text[i])
break;
if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) { if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) {
Py_XDECREF(stack); Py_XDECREF(stack);
return -1; return -1;
@@ -380,10 +373,8 @@ static PyObject*
Tokenizer_read(Tokenizer* self, Py_ssize_t delta) Tokenizer_read(Tokenizer* self, Py_ssize_t delta)
{ {
Py_ssize_t index = self->head + delta; Py_ssize_t index = self->head + delta;

if (index >= self->length) if (index >= self->length)
return EMPTY; return EMPTY;

return PyList_GET_ITEM(self->text, index); return PyList_GET_ITEM(self->text, index);
} }


@@ -395,7 +386,6 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
{ {
if (delta > self->head) if (delta > self->head)
return EMPTY; return EMPTY;

Py_ssize_t index = self->head - delta; Py_ssize_t index = self->head - delta;
return PyList_GET_ITEM(self->text, index); return PyList_GET_ITEM(self->text, index);
} }
@@ -457,21 +447,19 @@ Tokenizer_parse_template_or_argument(Tokenizer* self)
Py_XDECREF(text); Py_XDECREF(text);
return 0; return 0;
} }
else {
else
braces -= 2; braces -= 2;
}
} }
else {
else
braces -= 3; braces -= 3;
}


if (braces) {
if (braces)
self->head++; self->head++;
}
} }


PyObject* tokenlist = Tokenizer_pop(self); PyObject* tokenlist = Tokenizer_pop(self);
if (!tokenlist) return -1;
if (!tokenlist)
return -1;
if (Tokenizer_write_all(self, tokenlist)) { if (Tokenizer_write_all(self, tokenlist)) {
Py_DECREF(tokenlist); Py_DECREF(tokenlist);
return -1; return -1;
@@ -495,7 +483,8 @@ Tokenizer_parse_template(Tokenizer* self)
self->head = reset; self->head = reset;
return 0; return 0;
} }
if (!template) return -1;
if (!template)
return -1;


token = PyObject_CallObject(TemplateOpen, NULL); token = PyObject_CallObject(TemplateOpen, NULL);
if (!token) { if (!token) {
@@ -517,14 +506,14 @@ Tokenizer_parse_template(Tokenizer* self)
Py_DECREF(template); Py_DECREF(template);


token = PyObject_CallObject(TemplateClose, NULL); token = PyObject_CallObject(TemplateClose, NULL);
if (!token) return -1;
if (!token)
return -1;


if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
} }
Py_DECREF(token); Py_DECREF(token);

return 0; return 0;
} }


@@ -542,7 +531,8 @@ Tokenizer_parse_argument(Tokenizer* self)
self->head = reset; self->head = reset;
return 0; return 0;
} }
if (!argument) return -1;
if (!argument)
return -1;


token = PyObject_CallObject(ArgumentOpen, NULL); token = PyObject_CallObject(ArgumentOpen, NULL);
if (!token) { if (!token) {
@@ -564,14 +554,14 @@ Tokenizer_parse_argument(Tokenizer* self)
Py_DECREF(argument); Py_DECREF(argument);


token = PyObject_CallObject(ArgumentClose, NULL); token = PyObject_CallObject(ArgumentClose, NULL);
if (!token) return -1;
if (!token)
return -1;


if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
} }
Py_DECREF(token); Py_DECREF(token);

return 0; return 0;
} }


@@ -581,28 +571,27 @@ Tokenizer_parse_argument(Tokenizer* self)
static int static int
Tokenizer_handle_template_param(Tokenizer* self) Tokenizer_handle_template_param(Tokenizer* self)
{ {
if (self->topstack->context & LC_TEMPLATE_NAME) {
if (self->topstack->context & LC_TEMPLATE_NAME)
self->topstack->context ^= LC_TEMPLATE_NAME; self->topstack->context ^= LC_TEMPLATE_NAME;
}
else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) {
else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE)
self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE;
}


if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
PyObject* stack = Tokenizer_pop_keeping_context(self); PyObject* stack = Tokenizer_pop_keeping_context(self);
if (!stack) return -1;
if (!stack)
return -1;
if (Tokenizer_write_all(self, stack)) { if (Tokenizer_write_all(self, stack)) {
Py_DECREF(stack); Py_DECREF(stack);
return -1; return -1;
} }
Py_DECREF(stack); Py_DECREF(stack);
} }
else {
else
self->topstack->context |= LC_TEMPLATE_PARAM_KEY; self->topstack->context |= LC_TEMPLATE_PARAM_KEY;
}


PyObject* token = PyObject_CallObject(TemplateParamSeparator, NULL); PyObject* token = PyObject_CallObject(TemplateParamSeparator, NULL);
if (!token) return -1;
if (!token)
return -1;


if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
@@ -622,7 +611,8 @@ static int
Tokenizer_handle_template_param_value(Tokenizer* self) Tokenizer_handle_template_param_value(Tokenizer* self)
{ {
PyObject* stack = Tokenizer_pop_keeping_context(self); PyObject* stack = Tokenizer_pop_keeping_context(self);
if (!stack) return -1;
if (!stack)
return -1;
if (Tokenizer_write_all(self, stack)) { if (Tokenizer_write_all(self, stack)) {
Py_DECREF(stack); Py_DECREF(stack);
return -1; return -1;
@@ -633,8 +623,8 @@ Tokenizer_handle_template_param_value(Tokenizer* self)
self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; self->topstack->context |= LC_TEMPLATE_PARAM_VALUE;


PyObject* token = PyObject_CallObject(TemplateParamEquals, NULL); PyObject* token = PyObject_CallObject(TemplateParamEquals, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -652,14 +642,14 @@ Tokenizer_handle_template_end(Tokenizer* self)
PyObject* stack; PyObject* stack;
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
stack = Tokenizer_pop_keeping_context(self); stack = Tokenizer_pop_keeping_context(self);
if (!stack) return NULL;
if (!stack)
return NULL;
if (Tokenizer_write_all(self, stack)) { if (Tokenizer_write_all(self, stack)) {
Py_DECREF(stack); Py_DECREF(stack);
return NULL; return NULL;
} }
Py_DECREF(stack); Py_DECREF(stack);
} }

self->head++; self->head++;
stack = Tokenizer_pop(self); stack = Tokenizer_pop(self);
return stack; return stack;
@@ -675,8 +665,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self)
self->topstack->context |= LC_ARGUMENT_DEFAULT; self->topstack->context |= LC_ARGUMENT_DEFAULT;


PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL); PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -702,11 +692,12 @@ Tokenizer_handle_argument_end(Tokenizer* self)
static int static int
Tokenizer_parse_wikilink(Tokenizer* self) Tokenizer_parse_wikilink(Tokenizer* self)
{ {
self->head += 2;
Py_ssize_t reset = self->head - 1;
Py_ssize_t reset;
PyObject *token, *wikilink;


PyObject *token;
PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);
self->head += 2;
reset = self->head - 1;
wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE);


if (BAD_ROUTE) { if (BAD_ROUTE) {
RESET_ROUTE(); RESET_ROUTE();
@@ -718,7 +709,8 @@ Tokenizer_parse_wikilink(Tokenizer* self)
} }
return 0; return 0;
} }
if (!wikilink) return -1;
if (!wikilink)
return -1;


token = PyObject_CallObject(WikilinkOpen, NULL); token = PyObject_CallObject(WikilinkOpen, NULL);
if (!token) { if (!token) {
@@ -740,8 +732,8 @@ Tokenizer_parse_wikilink(Tokenizer* self)
Py_DECREF(wikilink); Py_DECREF(wikilink);


token = PyObject_CallObject(WikilinkClose, NULL); token = PyObject_CallObject(WikilinkClose, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -760,8 +752,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self)
self->topstack->context |= LC_WIKILINK_TEXT; self->topstack->context |= LC_WIKILINK_TEXT;


PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL); PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -866,14 +858,13 @@ Tokenizer_parse_heading(Tokenizer* self)
free(heading); free(heading);


token = PyObject_CallObject(HeadingEnd, NULL); token = PyObject_CallObject(HeadingEnd, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
} }
Py_DECREF(token); Py_DECREF(token);

self->global ^= GL_HEADING; self->global ^= GL_HEADING;
return 0; return 0;
} }
@@ -931,7 +922,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
} }


PyObject* stack = Tokenizer_pop(self); PyObject* stack = Tokenizer_pop(self);
if (!stack) return NULL;
if (!stack)
return NULL;


HeadingData* heading = malloc(sizeof(HeadingData)); HeadingData* heading = malloc(sizeof(HeadingData));
if (!heading) { if (!heading) {
@@ -955,7 +947,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
char *valid, *text, *def; char *valid, *text, *def;


token = PyObject_CallObject(HTMLEntityStart, NULL); token = PyObject_CallObject(HTMLEntityStart, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -972,7 +965,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
if (this == *"#") { if (this == *"#") {
numeric = 1; numeric = 1;
token = PyObject_CallObject(HTMLEntityNumeric, NULL); token = PyObject_CallObject(HTMLEntityNumeric, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -988,11 +982,13 @@ Tokenizer_really_parse_entity(Tokenizer* self)
if (this == *"x" || this == *"X") { if (this == *"x" || this == *"X") {
hexadecimal = 1; hexadecimal = 1;
kwargs = PyDict_New(); kwargs = PyDict_New();
if (!kwargs) return -1;
if (!kwargs)
return -1;
PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0)); PyDict_SetItemString(kwargs, "char", Tokenizer_read(self, 0));
PyObject* token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs); PyObject* token = PyObject_Call(HTMLEntityHex, NOARGS, kwargs);
Py_DECREF(kwargs); Py_DECREF(kwargs);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -1000,13 +996,11 @@ Tokenizer_really_parse_entity(Tokenizer* self)
Py_DECREF(token); Py_DECREF(token);
self->head++; self->head++;
} }
else {
else
hexadecimal = 0; hexadecimal = 0;
}
} }
else {
else
numeric = hexadecimal = 0; numeric = hexadecimal = 0;
}


if (hexadecimal) if (hexadecimal)
valid = "0123456789abcdefABCDEF"; valid = "0123456789abcdefABCDEF";
@@ -1091,7 +1085,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
Py_DECREF(textobj); Py_DECREF(textobj);
token = PyObject_Call(Text, NOARGS, kwargs); token = PyObject_Call(Text, NOARGS, kwargs);
Py_DECREF(kwargs); Py_DECREF(kwargs);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -1099,7 +1094,8 @@ Tokenizer_really_parse_entity(Tokenizer* self)
Py_DECREF(token); Py_DECREF(token);


token = PyObject_CallObject(HTMLEntityEnd, NULL); token = PyObject_CallObject(HTMLEntityEnd, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -1117,9 +1113,8 @@ Tokenizer_parse_entity(Tokenizer* self)
Py_ssize_t reset = self->head; Py_ssize_t reset = self->head;
if (Tokenizer_push(self, 0)) if (Tokenizer_push(self, 0))
return -1; return -1;

if (Tokenizer_really_parse_entity(self)) if (Tokenizer_really_parse_entity(self))
return -1;
return -1;


if (BAD_ROUTE) { if (BAD_ROUTE) {
RESET_ROUTE(); RESET_ROUTE();
@@ -1130,12 +1125,12 @@ Tokenizer_parse_entity(Tokenizer* self)
} }


PyObject* tokenlist = Tokenizer_pop(self); PyObject* tokenlist = Tokenizer_pop(self);
if (!tokenlist) return -1;
if (!tokenlist)
return -1;
if (Tokenizer_write_all(self, tokenlist)) { if (Tokenizer_write_all(self, tokenlist)) {
Py_DECREF(tokenlist); Py_DECREF(tokenlist);
return -1; return -1;
} }

Py_DECREF(tokenlist); Py_DECREF(tokenlist);
return 0; return 0;
} }
@@ -1158,7 +1153,8 @@ Tokenizer_parse_comment(Tokenizer* self)
const char* text = "<!--"; const char* text = "<!--";
int i = 0; int i = 0;
while (1) { while (1) {
if (!text[i]) return 0;
if (!text[i])
return 0;
if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) { if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) {
Py_XDECREF(text); Py_XDECREF(text);
return -1; return -1;
@@ -1167,7 +1163,8 @@ Tokenizer_parse_comment(Tokenizer* self)
} }
return 0; return 0;
} }
if (!comment) return -1;
if (!comment)
return -1;


token = PyObject_CallObject(CommentStart, NULL); token = PyObject_CallObject(CommentStart, NULL);
if (!token) { if (!token) {
@@ -1181,7 +1178,6 @@ Tokenizer_parse_comment(Tokenizer* self)
return -1; return -1;
} }
Py_DECREF(token); Py_DECREF(token);

if (Tokenizer_write_all(self, comment)) { if (Tokenizer_write_all(self, comment)) {
Py_DECREF(comment); Py_DECREF(comment);
return -1; return -1;
@@ -1189,8 +1185,8 @@ Tokenizer_parse_comment(Tokenizer* self)
Py_DECREF(comment); Py_DECREF(comment);


token = PyObject_CallObject(CommentEnd, NULL); token = PyObject_CallObject(CommentEnd, NULL);
if (!token) return -1;
if (!token)
return -1;
if (Tokenizer_write(self, token)) { if (Tokenizer_write(self, token)) {
Py_DECREF(token); Py_DECREF(token);
return -1; return -1;
@@ -1232,12 +1228,10 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
} }
self->topstack->context ^= LC_FAIL_ON_RBRACE; self->topstack->context ^= LC_FAIL_ON_RBRACE;
} }
else if (data == *"{") {
else if (data == *"{")
self->topstack->context |= LC_FAIL_ON_LBRACE; self->topstack->context |= LC_FAIL_ON_LBRACE;
}
else if (data == *"}") {
else if (data == *"}")
self->topstack->context |= LC_FAIL_ON_RBRACE; self->topstack->context |= LC_FAIL_ON_RBRACE;
}
} }


if (context & LC_HAS_TEXT) { if (context & LC_HAS_TEXT) {
@@ -1248,14 +1242,12 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
} }
} }
else { else {
if (data == *"\n") {
if (data == *"\n")
self->topstack->context |= LC_FAIL_ON_TEXT; self->topstack->context |= LC_FAIL_ON_TEXT;
}
} }
} }
else if (!Py_UNICODE_ISSPACE(data)) {
else if (!Py_UNICODE_ISSPACE(data))
self->topstack->context |= LC_HAS_TEXT; self->topstack->context |= LC_HAS_TEXT;
}
} }


/* /*
@@ -1301,9 +1293,8 @@ Tokenizer_parse(Tokenizer* self, int context)
PyObject* trash = Tokenizer_pop(self); PyObject* trash = Tokenizer_pop(self);
Py_XDECREF(trash); Py_XDECREF(trash);
} }
if (this_context & fail_contexts) {
if (this_context & fail_contexts)
return Tokenizer_fail_route(self); return Tokenizer_fail_route(self);
}
return Tokenizer_pop(self); return Tokenizer_pop(self);
} }


@@ -1311,9 +1302,8 @@ Tokenizer_parse(Tokenizer* self, int context)


if (this_context & LC_COMMENT) { if (this_context & LC_COMMENT) {
if (this == next && next == *"-") { if (this == next && next == *"-") {
if (Tokenizer_READ(self, 2) == *">") {
if (Tokenizer_READ(self, 2) == *">")
return Tokenizer_pop(self); return Tokenizer_pop(self);
}
} }
Tokenizer_write_text(self, this); Tokenizer_write_text(self, this);
} }
@@ -1331,9 +1321,8 @@ Tokenizer_parse(Tokenizer* self, int context)
if (Tokenizer_handle_template_param_value(self)) if (Tokenizer_handle_template_param_value(self))
return NULL; return NULL;
} }
else if (this == next && next == *"}" && this_context & LC_TEMPLATE) {
else if (this == next && next == *"}" && this_context & LC_TEMPLATE)
return Tokenizer_handle_template_end(self); return Tokenizer_handle_template_end(self);
}
else if (this == *"|" && this_context & LC_ARGUMENT_NAME) { else if (this == *"|" && this_context & LC_ARGUMENT_NAME) {
if (Tokenizer_handle_argument_separator(self)) if (Tokenizer_handle_argument_separator(self))
return NULL; return NULL;
@@ -1359,25 +1348,21 @@ Tokenizer_parse(Tokenizer* self, int context)
if (Tokenizer_handle_wikilink_separator(self)) if (Tokenizer_handle_wikilink_separator(self))
return NULL; return NULL;
} }
else if (this == next && next == *"]" && this_context & LC_WIKILINK) {
else if (this == next && next == *"]" && this_context & LC_WIKILINK)
return Tokenizer_handle_wikilink_end(self); return Tokenizer_handle_wikilink_end(self);
}
else if (this == *"=" && !(self->global & GL_HEADING)) { else if (this == *"=" && !(self->global & GL_HEADING)) {
last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1)); last = *PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1));
if (last == *"\n" || last == *"") { if (last == *"\n" || last == *"") {
if (Tokenizer_parse_heading(self)) if (Tokenizer_parse_heading(self))
return NULL; return NULL;
} }
else {
else
Tokenizer_write_text(self, this); Tokenizer_write_text(self, this);
}
} }
else if (this == *"=" && this_context & LC_HEADING) {
else if (this == *"=" && this_context & LC_HEADING)
return (PyObject*) Tokenizer_handle_heading_end(self); return (PyObject*) Tokenizer_handle_heading_end(self);
}
else if (this == *"\n" && this_context & LC_HEADING) {
else if (this == *"\n" && this_context & LC_HEADING)
return Tokenizer_fail_route(self); return Tokenizer_fail_route(self);
}
else if (this == *"&") { else if (this == *"&") {
if (Tokenizer_parse_entity(self)) if (Tokenizer_parse_entity(self))
return NULL; return NULL;
@@ -1388,14 +1373,11 @@ Tokenizer_parse(Tokenizer* self, int context)
if (Tokenizer_parse_comment(self)) if (Tokenizer_parse_comment(self))
return NULL; return NULL;
} }
else {
else
Tokenizer_write_text(self, this); Tokenizer_write_text(self, this);
}
} }
else {
else
Tokenizer_write_text(self, this); Tokenizer_write_text(self, this);
}

self->head++; self->head++;
} }
} }
@@ -1414,9 +1396,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
const char* encoded; const char* encoded;
Py_ssize_t size; Py_ssize_t size;


if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) {
if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
return NULL; return NULL;
}


PyObject* temp; PyObject* temp;
temp = PyUnicode_FromStringAndSize(encoded, size); temp = PyUnicode_FromStringAndSize(encoded, size);
@@ -1434,7 +1415,6 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
} }


self->length = PyList_GET_SIZE(self->text); self->length = PyList_GET_SIZE(self->text);

return Tokenizer_parse(self, 0); return Tokenizer_parse(self, 0);
} }


@@ -1453,16 +1433,19 @@ init_tokenizer(void)
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);


PyObject* htmlentitydefs = PyImport_ImportModule("htmlentitydefs"); PyObject* htmlentitydefs = PyImport_ImportModule("htmlentitydefs");
if (!htmlentitydefs) return;
if (!htmlentitydefs)
return;


PyObject* defmap = PyObject_GetAttrString(htmlentitydefs, "entitydefs"); PyObject* defmap = PyObject_GetAttrString(htmlentitydefs, "entitydefs");
if (!defmap) return;
if (!defmap)
return;
Py_DECREF(htmlentitydefs); Py_DECREF(htmlentitydefs);


unsigned numdefs = (unsigned) PyDict_Size(defmap); unsigned numdefs = (unsigned) PyDict_Size(defmap);
entitydefs = calloc(numdefs + 1, sizeof(char*)); entitydefs = calloc(numdefs + 1, sizeof(char*));
PyObject* deflist = PyDict_Keys(defmap); PyObject* deflist = PyDict_Keys(defmap);
if (!deflist) return;
if (!deflist)
return;
Py_DECREF(defmap); Py_DECREF(defmap);


unsigned i; unsigned i;
@@ -1478,7 +1461,8 @@ init_tokenizer(void)
PyObject* globals = PyEval_GetGlobals(); PyObject* globals = PyEval_GetGlobals();
PyObject* locals = PyEval_GetLocals(); PyObject* locals = PyEval_GetLocals();
PyObject* fromlist = PyList_New(1); PyObject* fromlist = PyList_New(1);
if (!fromlist) return;
if (!fromlist)
return;
PyObject* submodname = PyBytes_FromString("tokens"); PyObject* submodname = PyBytes_FromString("tokens");
if (!submodname) { if (!submodname) {
Py_DECREF(fromlist); Py_DECREF(fromlist);
@@ -1488,7 +1472,8 @@ init_tokenizer(void)


PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); PyObject* tokmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
Py_DECREF(fromlist); Py_DECREF(fromlist);
if (!tokmodule) return;
if (!tokmodule)
return;


tokens = PyObject_GetAttrString(tokmodule, "tokens"); tokens = PyObject_GetAttrString(tokmodule, "tokens");
Py_DECREF(tokmodule); Py_DECREF(tokmodule);


Laden…
Abbrechen
Speichern