Browse Source

emit_FAST(), emit_first_FAST(); update comment parsing

tags/v0.3
Ben Kurtovic 11 years ago
parent
commit
df9f7388b6
2 changed files with 140 additions and 274 deletions
  1. +98
    -230
      mwparserfromhell/parser/tokenizer.c
  2. +42
    -44
      mwparserfromhell/parser/tokenizer.h

+ 98
- 230
mwparserfromhell/parser/tokenizer.c View File

@@ -368,6 +368,40 @@ static int Tokenizer_emit_first(Tokenizer* self, PyObject* token)
return 0; return 0;
} }


static int Tokenizer_emit_FAST(Tokenizer* self, PyObject* token)
{
PyObject* instance;

if (Tokenizer_push_textbuffer(self))
return -1;
instance = PyObject_CallObject(token, NULL);
if (!instance)
return -1;
if (PyList_Append(self->topstack->stack, instance)) {
Py_DECREF(instance);
return -1;
}
Py_DECREF(instance);
return 0;
}

static int Tokenizer_emit_first_FAST(Tokenizer* self, PyObject* token)
{
PyObject* instance;

if (Tokenizer_push_textbuffer(self))
return -1;
instance = PyObject_CallObject(token, NULL);
if (!instance)
return -1;
if (PyList_Insert(self->topstack->stack, 0, instance)) {
Py_DECREF(instance);
return -1;
}
Py_DECREF(instance);
return 0;
}

/* /*
Write a Unicode codepoint to the current textbuffer. Write a Unicode codepoint to the current textbuffer.
*/ */
@@ -503,7 +537,7 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta)
*/ */
static int Tokenizer_parse_template(Tokenizer* self) static int Tokenizer_parse_template(Tokenizer* self)
{ {
PyObject *template, *token;
PyObject *template;
Py_ssize_t reset = self->head; Py_ssize_t reset = self->head;


template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1);
@@ -513,30 +547,17 @@ static int Tokenizer_parse_template(Tokenizer* self)
} }
if (!template) if (!template)
return -1; return -1;
token = PyObject_CallObject(TemplateOpen, NULL);
if (!token) {
if (Tokenizer_emit_first_FAST(self, TemplateOpen)) {
Py_DECREF(template); Py_DECREF(template);
return -1; return -1;
} }
if (Tokenizer_emit_first(self, token)) {
Py_DECREF(token);
Py_DECREF(template);
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_all(self, template)) { if (Tokenizer_emit_all(self, template)) {
Py_DECREF(template); Py_DECREF(template);
return -1; return -1;
} }
Py_DECREF(template); Py_DECREF(template);
token = PyObject_CallObject(TemplateClose, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TemplateClose))
return -1; return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -545,7 +566,7 @@ static int Tokenizer_parse_template(Tokenizer* self)
*/ */
static int Tokenizer_parse_argument(Tokenizer* self) static int Tokenizer_parse_argument(Tokenizer* self)
{ {
PyObject *argument, *token;
PyObject *argument;
Py_ssize_t reset = self->head; Py_ssize_t reset = self->head;


argument = Tokenizer_parse(self, LC_ARGUMENT_NAME, 1); argument = Tokenizer_parse(self, LC_ARGUMENT_NAME, 1);
@@ -555,30 +576,17 @@ static int Tokenizer_parse_argument(Tokenizer* self)
} }
if (!argument) if (!argument)
return -1; return -1;
token = PyObject_CallObject(ArgumentOpen, NULL);
if (!token) {
Py_DECREF(argument);
return -1;
}
if (Tokenizer_emit_first(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_first_FAST(self, ArgumentOpen)) {
Py_DECREF(argument); Py_DECREF(argument);
return -1; return -1;
} }
Py_DECREF(token);
if (Tokenizer_emit_all(self, argument)) { if (Tokenizer_emit_all(self, argument)) {
Py_DECREF(argument); Py_DECREF(argument);
return -1; return -1;
} }
Py_DECREF(argument); Py_DECREF(argument);
token = PyObject_CallObject(ArgumentClose, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, ArgumentClose))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -658,7 +666,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
*/ */
static int Tokenizer_handle_template_param(Tokenizer* self) static int Tokenizer_handle_template_param(Tokenizer* self)
{ {
PyObject *stack, *token;
PyObject *stack;


if (self->topstack->context & LC_TEMPLATE_NAME) if (self->topstack->context & LC_TEMPLATE_NAME)
self->topstack->context ^= LC_TEMPLATE_NAME; self->topstack->context ^= LC_TEMPLATE_NAME;
@@ -676,15 +684,8 @@ static int Tokenizer_handle_template_param(Tokenizer* self)
} }
else else
self->topstack->context |= LC_TEMPLATE_PARAM_KEY; self->topstack->context |= LC_TEMPLATE_PARAM_KEY;

token = PyObject_CallObject(TemplateParamSeparator, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TemplateParamSeparator))
return -1; return -1;
}
Py_DECREF(token);
if (Tokenizer_push(self, self->topstack->context)) if (Tokenizer_push(self, self->topstack->context))
return -1; return -1;
return 0; return 0;
@@ -695,7 +696,7 @@ static int Tokenizer_handle_template_param(Tokenizer* self)
*/ */
static int Tokenizer_handle_template_param_value(Tokenizer* self) static int Tokenizer_handle_template_param_value(Tokenizer* self)
{ {
PyObject *stack, *token;
PyObject *stack;


stack = Tokenizer_pop_keeping_context(self); stack = Tokenizer_pop_keeping_context(self);
if (!stack) if (!stack)
@@ -707,14 +708,8 @@ static int Tokenizer_handle_template_param_value(Tokenizer* self)
Py_DECREF(stack); Py_DECREF(stack);
self->topstack->context ^= LC_TEMPLATE_PARAM_KEY; self->topstack->context ^= LC_TEMPLATE_PARAM_KEY;
self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; self->topstack->context |= LC_TEMPLATE_PARAM_VALUE;
token = PyObject_CallObject(TemplateParamEquals, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, TemplateParamEquals))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -745,17 +740,10 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self)
*/ */
static int Tokenizer_handle_argument_separator(Tokenizer* self) static int Tokenizer_handle_argument_separator(Tokenizer* self)
{ {
PyObject* token;
self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context ^= LC_ARGUMENT_NAME;
self->topstack->context |= LC_ARGUMENT_DEFAULT; self->topstack->context |= LC_ARGUMENT_DEFAULT;
token = PyObject_CallObject(ArgumentSeparator, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, ArgumentSeparator))
return -1; return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -765,6 +753,7 @@ static int Tokenizer_handle_argument_separator(Tokenizer* self)
static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self)
{ {
PyObject* stack = Tokenizer_pop(self); PyObject* stack = Tokenizer_pop(self);

self->head += 2; self->head += 2;
return stack; return stack;
} }
@@ -775,7 +764,7 @@ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self)
static int Tokenizer_parse_wikilink(Tokenizer* self) static int Tokenizer_parse_wikilink(Tokenizer* self)
{ {
Py_ssize_t reset; Py_ssize_t reset;
PyObject *wikilink, *token;
PyObject *wikilink;


self->head += 2; self->head += 2;
reset = self->head - 1; reset = self->head - 1;
@@ -789,30 +778,17 @@ static int Tokenizer_parse_wikilink(Tokenizer* self)
} }
if (!wikilink) if (!wikilink)
return -1; return -1;
token = PyObject_CallObject(WikilinkOpen, NULL);
if (!token) {
if (Tokenizer_emit_FAST(self, WikilinkOpen)) {
Py_DECREF(wikilink); Py_DECREF(wikilink);
return -1; return -1;
} }
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
Py_DECREF(wikilink);
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_all(self, wikilink)) { if (Tokenizer_emit_all(self, wikilink)) {
Py_DECREF(wikilink); Py_DECREF(wikilink);
return -1; return -1;
} }
Py_DECREF(wikilink); Py_DECREF(wikilink);
token = PyObject_CallObject(WikilinkClose, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, WikilinkClose))
return -1; return -1;
}
Py_DECREF(token);
if (self->topstack->context & LC_FAIL_NEXT) if (self->topstack->context & LC_FAIL_NEXT)
self->topstack->context ^= LC_FAIL_NEXT; self->topstack->context ^= LC_FAIL_NEXT;
return 0; return 0;
@@ -823,17 +799,10 @@ static int Tokenizer_parse_wikilink(Tokenizer* self)
*/ */
static int Tokenizer_handle_wikilink_separator(Tokenizer* self) static int Tokenizer_handle_wikilink_separator(Tokenizer* self)
{ {
PyObject* token;
self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context ^= LC_WIKILINK_TITLE;
self->topstack->context |= LC_WIKILINK_TEXT; self->topstack->context |= LC_WIKILINK_TEXT;
token = PyObject_CallObject(WikilinkSeparator, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, WikilinkSeparator))
return -1; return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -921,14 +890,8 @@ static int Tokenizer_parse_heading(Tokenizer* self)
} }
Py_DECREF(heading->title); Py_DECREF(heading->title);
free(heading); free(heading);
token = PyObject_CallObject(HeadingEnd, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, HeadingEnd))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
self->global ^= GL_HEADING; self->global ^= GL_HEADING;
return 0; return 0;
} }
@@ -1010,14 +973,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self)
return 0; \ return 0; \
} }


token = PyObject_CallObject(HTMLEntityStart, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, HTMLEntityStart))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
self->head++; self->head++;
this = Tokenizer_READ(self, 0); this = Tokenizer_READ(self, 0);
if (this == *"") { if (this == *"") {
@@ -1026,14 +983,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self)
} }
if (this == *"#") { if (this == *"#") {
numeric = 1; numeric = 1;
token = PyObject_CallObject(HTMLEntityNumeric, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, HTMLEntityNumeric))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
self->head++; self->head++;
this = Tokenizer_READ(self, 0); this = Tokenizer_READ(self, 0);
if (this == *"") { if (this == *"") {
@@ -1156,14 +1107,8 @@ static int Tokenizer_really_parse_entity(Tokenizer* self)
return -1; return -1;
} }
Py_DECREF(token); Py_DECREF(token);
token = PyObject_CallObject(HTMLEntityEnd, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, HTMLEntityEnd))
return -1; return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -1203,45 +1148,39 @@ static int Tokenizer_parse_entity(Tokenizer* self)
static int Tokenizer_parse_comment(Tokenizer* self) static int Tokenizer_parse_comment(Tokenizer* self)
{ {
Py_ssize_t reset = self->head + 3; Py_ssize_t reset = self->head + 3;
PyObject *token, *comment;
PyObject *comment;
Py_UNICODE this;


self->head += 4; self->head += 4;
comment = Tokenizer_parse(self, LC_COMMENT, 1);
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_text(self, "<!--"))
return -1;
return 0;
}
if (!comment)
return -1;
token = PyObject_CallObject(CommentStart, NULL);
if (!token) {
Py_DECREF(comment);
return -1;
}
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
Py_DECREF(comment);
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_all(self, comment)) {
Py_DECREF(comment);
return -1;
}
Py_DECREF(comment);
token = PyObject_CallObject(CommentEnd, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_push(self, 0))
return -1; return -1;
while (1) {
this = Tokenizer_READ(self, 0);
if (this == *"") {
comment = Tokenizer_pop(self);
Py_XDECREF(comment);
self->head = reset;
return Tokenizer_emit_text(self, "<!--");
}
if (this == *"-" && Tokenizer_READ(self, 1) == this &&
Tokenizer_READ(self, 2) == *">") {
if (Tokenizer_emit_first_FAST(self, CommentStart))
return -1;
if (Tokenizer_emit_FAST(self, CommentEnd))
return -1;
comment = Tokenizer_pop(self);
if (!comment)
return -1;
if (Tokenizer_emit_all(self, comment))
return -1;
Py_DECREF(comment);
self->head += 2;
return 0;
}
if (Tokenizer_emit_char(self, this))
return -1;
self->head++;
} }
Py_DECREF(token);
self->head += 2;
return 0;
} }


/* /*
@@ -1253,14 +1192,8 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
*pad_after_eq; *pad_after_eq;


if (data->context & TAG_QUOTED) { if (data->context & TAG_QUOTED) {
token = PyObject_CallObject(TagAttrQuote, NULL);
if (!token)
if (Tokenizer_emit_first_FAST(self, TagAttrQuote))
return -1; return -1;
if (Tokenizer_emit_first(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
tokens = Tokenizer_pop(self); tokens = Tokenizer_pop(self);
if (!tokens) if (!tokens)
return -1; return -1;
@@ -1370,7 +1303,7 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text)
static int static int
Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
{ {
PyObject *trash, *token;
PyObject *trash;
int first_time, i, is_marker = 0, escaped; int first_time, i, is_marker = 0, escaped;


if (data->context & TAG_NAME) { if (data->context & TAG_NAME) {
@@ -1414,14 +1347,8 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
else if (data->context & TAG_ATTR_NAME) { else if (data->context & TAG_ATTR_NAME) {
if (chunk == *"=") { if (chunk == *"=") {
data->context = TAG_ATTR_VALUE | TAG_NOTE_QUOTE; data->context = TAG_ATTR_VALUE | TAG_NOTE_QUOTE;
token = PyObject_CallObject(TagAttrEquals, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, TagAttrEquals))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
return 0; return 0;
} }
if (data->context & TAG_NOTE_EQUALS) { if (data->context & TAG_NOTE_EQUALS) {
@@ -1495,16 +1422,8 @@ Tokenizer_handle_tag_close_open(Tokenizer* self, TagData* data, PyObject* cls)
*/ */
static int Tokenizer_handle_tag_open_close(Tokenizer* self) static int Tokenizer_handle_tag_open_close(Tokenizer* self)
{ {
PyObject* token;

token = PyObject_CallObject(TagOpenClose, NULL);
if (!token)
if (Tokenizer_emit_FAST(self, TagOpenClose))
return -1; return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
return -1;
}
Py_DECREF(token);
if (Tokenizer_push(self, LC_TAG_CLOSE)) if (Tokenizer_push(self, LC_TAG_CLOSE))
return -1; return -1;
self->head++; self->head++;
@@ -1516,7 +1435,7 @@ static int Tokenizer_handle_tag_open_close(Tokenizer* self)
*/ */
static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self) static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self)
{ {
PyObject *closing, *first, *so, *sc, *token;
PyObject *closing, *first, *so, *sc;
int valid = 1; int valid = 1;


closing = Tokenizer_pop(self); closing = Tokenizer_pop(self);
@@ -1557,14 +1476,8 @@ static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self)
return NULL; return NULL;
} }
Py_DECREF(closing); Py_DECREF(closing);
token = PyObject_CallObject(TagCloseClose, NULL);
if (!token)
return NULL;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TagCloseClose))
return NULL; return NULL;
}
Py_DECREF(token);
return Tokenizer_pop(self); return Tokenizer_pop(self);
} }


@@ -1684,17 +1597,10 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self)
TagData_dealloc(data); TagData_dealloc(data);
return NULL; return NULL;
} }
token = PyObject_CallObject(TagOpenOpen, NULL);
if (!token) {
if (Tokenizer_emit_FAST(self, TagOpenOpen)) {
TagData_dealloc(data); TagData_dealloc(data);
return NULL; return NULL;
} }
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
TagData_dealloc(data);
return NULL;
}
Py_DECREF(token);
while (1) { while (1) {
this = Tokenizer_READ(self, 0); this = Tokenizer_READ(self, 0);
next = Tokenizer_READ(self, 1); next = Tokenizer_READ(self, 1);
@@ -1864,35 +1770,17 @@ static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag,
Py_DECREF(token); Py_DECREF(token);
if (Tokenizer_emit_text(self, tag)) if (Tokenizer_emit_text(self, tag))
return -1; return -1;
token = PyObject_CallObject(TagCloseOpen, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TagCloseOpen))
return -1; return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_all(self, body)) if (Tokenizer_emit_all(self, body))
return -1; return -1;
token = PyObject_CallObject(TagOpenClose, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
Py_DECREF(body);
if (Tokenizer_emit_FAST(self, TagOpenClose))
return -1; return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_text(self, tag)) if (Tokenizer_emit_text(self, tag))
return -1; return -1;
token = PyObject_CallObject(TagCloseClose, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TagCloseClose))
return -1; return -1;
}
Py_DECREF(token);
Py_DECREF(body);
return 0; return 0;
} }


@@ -2108,14 +1996,8 @@ static int Tokenizer_handle_list_marker(Tokenizer* self)
Py_DECREF(token); Py_DECREF(token);
if (Tokenizer_emit_text(self, GET_HTML_TAG(code))) if (Tokenizer_emit_text(self, GET_HTML_TAG(code)))
return -1; return -1;
token = PyObject_CallObject(TagCloseSelfclose, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TagCloseSelfclose))
return -1; return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -2181,14 +2063,8 @@ static int Tokenizer_handle_hr(Tokenizer* self)
Py_DECREF(token); Py_DECREF(token);
if (Tokenizer_emit_text(self, "hr")) if (Tokenizer_emit_text(self, "hr"))
return -1; return -1;
token = PyObject_CallObject(TagCloseSelfclose, NULL);
if (!token)
return -1;
if (Tokenizer_emit(self, token)) {
Py_DECREF(token);
if (Tokenizer_emit_FAST(self, TagCloseSelfclose))
return -1; return -1;
}
Py_DECREF(token);
return 0; return 0;
} }


@@ -2209,7 +2085,7 @@ static int Tokenizer_handle_dl_term(Tokenizer* self)
static PyObject* Tokenizer_handle_end(Tokenizer* self, int context) static PyObject* Tokenizer_handle_end(Tokenizer* self, int context)
{ {
static int fail_contexts = (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | static int fail_contexts = (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK |
LC_HEADING | LC_COMMENT | LC_TAG | LC_STYLE);
LC_HEADING | LC_TAG | LC_STYLE);
static int double_fail = (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE); static int double_fail = (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE);
PyObject *token, *text, *trash; PyObject *token, *text, *trash;
int single; int single;
@@ -2359,15 +2235,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
return Tokenizer_handle_end(self, this_context); return Tokenizer_handle_end(self, this_context);
next = Tokenizer_READ(self, 1); next = Tokenizer_READ(self, 1);
last = Tokenizer_READ_BACKWARDS(self, 1); last = Tokenizer_READ_BACKWARDS(self, 1);
if (this_context & LC_COMMENT) {
if (this == next && next == *"-") {
if (Tokenizer_READ(self, 2) == *">")
return Tokenizer_pop(self);
}
if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == next && next == *"{") {
if (this == next && next == *"{") {
if (Tokenizer_CAN_RECURSE(self)) { if (Tokenizer_CAN_RECURSE(self)) {
if (Tokenizer_parse_template_or_argument(self)) if (Tokenizer_parse_template_or_argument(self))
return NULL; return NULL;


+ 42
- 44
mwparserfromhell/parser/tokenizer.h View File

@@ -103,50 +103,48 @@ static PyObject* TagCloseClose;


/* Local contexts: */ /* Local contexts: */


#define LC_TEMPLATE 0x00000007
#define LC_TEMPLATE_NAME 0x00000001
#define LC_TEMPLATE_PARAM_KEY 0x00000002
#define LC_TEMPLATE_PARAM_VALUE 0x00000004

#define LC_ARGUMENT 0x00000018
#define LC_ARGUMENT_NAME 0x00000008
#define LC_ARGUMENT_DEFAULT 0x00000010

#define LC_WIKILINK 0x00000060
#define LC_WIKILINK_TITLE 0x00000020
#define LC_WIKILINK_TEXT 0x00000040

#define LC_HEADING 0x00001F80
#define LC_HEADING_LEVEL_1 0x00000080
#define LC_HEADING_LEVEL_2 0x00000100
#define LC_HEADING_LEVEL_3 0x00000200
#define LC_HEADING_LEVEL_4 0x00000400
#define LC_HEADING_LEVEL_5 0x00000800
#define LC_HEADING_LEVEL_6 0x00001000

#define LC_COMMENT 0x00002000

#define LC_TAG 0x0003C000
#define LC_TAG_OPEN 0x00004000
#define LC_TAG_ATTR 0x00008000
#define LC_TAG_BODY 0x00010000
#define LC_TAG_CLOSE 0x00020000

#define LC_STYLE 0x003C0000
#define LC_STYLE_ITALICS 0x00040000
#define LC_STYLE_BOLD 0x00080000
#define LC_STYLE_PASS_AGAIN 0x00100000
#define LC_STYLE_SECOND_PASS 0x00200000

#define LC_DLTERM 0x00400000

#define LC_SAFETY_CHECK 0x1F800000
#define LC_HAS_TEXT 0x00800000
#define LC_FAIL_ON_TEXT 0x01000000
#define LC_FAIL_NEXT 0x02000000
#define LC_FAIL_ON_LBRACE 0x04000000
#define LC_FAIL_ON_RBRACE 0x08000000
#define LC_FAIL_ON_EQUALS 0x10000000
#define LC_TEMPLATE 0x0000007
#define LC_TEMPLATE_NAME 0x0000001
#define LC_TEMPLATE_PARAM_KEY 0x0000002
#define LC_TEMPLATE_PARAM_VALUE 0x0000004

#define LC_ARGUMENT 0x0000018
#define LC_ARGUMENT_NAME 0x0000008
#define LC_ARGUMENT_DEFAULT 0x0000010

#define LC_WIKILINK 0x0000060
#define LC_WIKILINK_TITLE 0x0000020
#define LC_WIKILINK_TEXT 0x0000040

#define LC_HEADING 0x0001F80
#define LC_HEADING_LEVEL_1 0x0000080
#define LC_HEADING_LEVEL_2 0x0000100
#define LC_HEADING_LEVEL_3 0x0000200
#define LC_HEADING_LEVEL_4 0x0000400
#define LC_HEADING_LEVEL_5 0x0000800
#define LC_HEADING_LEVEL_6 0x0001000

#define LC_TAG 0x001E000
#define LC_TAG_OPEN 0x0002000
#define LC_TAG_ATTR 0x0004000
#define LC_TAG_BODY 0x0008000
#define LC_TAG_CLOSE 0x0010000

#define LC_STYLE 0x01E0000
#define LC_STYLE_ITALICS 0x0020000
#define LC_STYLE_BOLD 0x0040000
#define LC_STYLE_PASS_AGAIN 0x0080000
#define LC_STYLE_SECOND_PASS 0x0100000

#define LC_DLTERM 0x0200000

#define LC_SAFETY_CHECK 0xFC00000
#define LC_HAS_TEXT 0x0400000
#define LC_FAIL_ON_TEXT 0x0800000
#define LC_FAIL_NEXT 0x1000000
#define LC_FAIL_ON_LBRACE 0x2000000
#define LC_FAIL_ON_RBRACE 0x4000000
#define LC_FAIL_ON_EQUALS 0x8000000


/* Global contexts: */ /* Global contexts: */




Loading…
Cancel
Save