Переглянути джерело

To clarify usage, emit_text() -> emit_char() and emit_string() -> emit_text()

tags/v0.3
Ben Kurtovic 11 роки тому
джерело
коміт
36180a9e47
1 змінених файлів з 48 додано та 48 видалено
  1. +48
    -48
      mwparserfromhell/parser/tokenizer.c

+ 48
- 48
mwparserfromhell/parser/tokenizer.c Переглянути файл

@@ -102,9 +102,9 @@ static void Textbuffer_dealloc(Textbuffer* self)
}

/*
Write text to the given textbuffer.
Write a Unicode codepoint to the given textbuffer.
*/
static int Textbuffer_write(Textbuffer** this, Py_UNICODE text)
static int Textbuffer_write(Textbuffer** this, Py_UNICODE code)
{
Textbuffer* self = *this;

@@ -115,7 +115,7 @@ static int Textbuffer_write(Textbuffer** this, Py_UNICODE text)
new->next = self;
*this = self = new;
}
self->data[self->size] = text;
self->data[self->size] = code;
self->size++;
return 0;
}
@@ -369,22 +369,22 @@ static int Tokenizer_emit_first(Tokenizer* self, PyObject* token)
}

/*
Write text to the current textbuffer.
Write a Unicode codepoint to the current textbuffer.
*/
static int Tokenizer_emit_text(Tokenizer* self, Py_UNICODE text)
static int Tokenizer_emit_char(Tokenizer* self, Py_UNICODE code)
{
return Textbuffer_write(&(self->topstack->textbuffer), text);
return Textbuffer_write(&(self->topstack->textbuffer), code);
}

/*
Write a string of text to the current textbuffer.
*/
static int Tokenizer_emit_string(Tokenizer* self, const char* text)
static int Tokenizer_emit_text(Tokenizer* self, const char* text)
{
int i = 0;

while (text[i]) {
if (Tokenizer_emit_text(self, text[i]))
if (Tokenizer_emit_char(self, text[i]))
return -1;
i++;
}
@@ -456,7 +456,7 @@ static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text)
{
PyObject* stack = Tokenizer_pop(self);

if (Tokenizer_emit_string(self, text)) {
if (Tokenizer_emit_text(self, text)) {
Py_DECREF(stack);
return -1;
}
@@ -783,7 +783,7 @@ static int Tokenizer_parse_wikilink(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_string(self, "[["))
if (Tokenizer_emit_text(self, "[["))
return -1;
return 0;
}
@@ -869,7 +869,7 @@ static int Tokenizer_parse_heading(Tokenizer* self)
RESET_ROUTE();
self->head = reset + best - 1;
for (i = 0; i < best; i++) {
if (Tokenizer_emit_text(self, *"="))
if (Tokenizer_emit_char(self, *"="))
return -1;
}
self->global ^= GL_HEADING;
@@ -907,7 +907,7 @@ static int Tokenizer_parse_heading(Tokenizer* self)
if (heading->level < best) {
diff = best - heading->level;
for (i = 0; i < diff; i++) {
if (Tokenizer_emit_text(self, *"=")) {
if (Tokenizer_emit_char(self, *"=")) {
Py_DECREF(heading->title);
free(heading);
return -1;
@@ -958,7 +958,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self)
if (level < best) {
diff = best - level;
for (i = 0; i < diff; i++) {
if (Tokenizer_emit_text(self, *"="))
if (Tokenizer_emit_char(self, *"="))
return NULL;
}
}
@@ -966,7 +966,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self)
}
else {
for (i = 0; i < best; i++) {
if (Tokenizer_emit_text(self, *"=")) {
if (Tokenizer_emit_char(self, *"=")) {
Py_DECREF(after->title);
free(after);
return NULL;
@@ -1182,7 +1182,7 @@ static int Tokenizer_parse_entity(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_text(self, *"&"))
if (Tokenizer_emit_char(self, *"&"))
return -1;
return 0;
}
@@ -1210,7 +1210,7 @@ static int Tokenizer_parse_comment(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_string(self, "<!--"))
if (Tokenizer_emit_text(self, "<!--"))
return -1;
return 0;
}
@@ -1329,7 +1329,7 @@ Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text)
return -1;
}
if (ctx & TAG_QUOTED && !(ctx & TAG_NOTE_SPACE)) {
if (Tokenizer_emit_text(self, text))
if (Tokenizer_emit_char(self, text))
return -1;
}
else if (data->context & TAG_ATTR_READY)
@@ -1354,14 +1354,14 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text)
}
}
if (!is_marker || !Tokenizer_CAN_RECURSE(self))
return Tokenizer_emit_text(self, text);
return Tokenizer_emit_char(self, text);
else if (text == next && next == *"{")
return Tokenizer_parse_template_or_argument(self);
else if (text == next && next == *"[")
return Tokenizer_parse_wikilink(self);
else if (text == *"<")
return Tokenizer_parse_tag(self);
return Tokenizer_emit_text(self, text);
return Tokenizer_emit_char(self, text);
}

/*
@@ -1586,7 +1586,7 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self)
return NULL;
return Tokenizer_parse(self, 0, 0);
}
if (Tokenizer_emit_text(self, this))
if (Tokenizer_emit_char(self, this))
return NULL;
}
}
@@ -1802,7 +1802,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
return Tokenizer_emit_string(self, "</");
return Tokenizer_emit_text(self, "</");
}
// Set invalid=True flag of TagOpenOpen
if (PyObject_SetAttrString(PyList_GET_ITEM(tag, 0), "invalid", Py_True))
@@ -1823,7 +1823,7 @@ static int Tokenizer_parse_tag(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
return Tokenizer_emit_text(self, *"<");
return Tokenizer_emit_char(self, *"<");
}
if (!tag) {
return -1;
@@ -1862,7 +1862,7 @@ static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag,
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_string(self, tag))
if (Tokenizer_emit_text(self, tag))
return -1;
token = PyObject_CallObject(TagCloseOpen, NULL);
if (!token)
@@ -1882,7 +1882,7 @@ static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag,
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_string(self, tag))
if (Tokenizer_emit_text(self, tag))
return -1;
token = PyObject_CallObject(TagCloseClose, NULL);
if (!token)
@@ -1914,7 +1914,7 @@ static int Tokenizer_parse_italics(Tokenizer* self)
stack = Tokenizer_parse(self, context, 1);
}
else
return Tokenizer_emit_string(self, "''");
return Tokenizer_emit_text(self, "''");
}
if (!stack)
return -1;
@@ -1934,12 +1934,12 @@ static int Tokenizer_parse_bold(Tokenizer* self)
RESET_ROUTE();
self->head = reset;
if (self->topstack->context & LC_STYLE_SECOND_PASS)
return Tokenizer_emit_text(self, *"'") ? -1 : 1;
return Tokenizer_emit_char(self, *"'") ? -1 : 1;
if (self->topstack->context & LC_STYLE_ITALICS) {
self->topstack->context |= LC_STYLE_PASS_AGAIN;
return Tokenizer_emit_string(self, "'''");
return Tokenizer_emit_text(self, "'''");
}
if (Tokenizer_emit_text(self, *"'"))
if (Tokenizer_emit_char(self, *"'"))
return -1;
return Tokenizer_parse_italics(self);
}
@@ -1964,7 +1964,7 @@ static int Tokenizer_parse_italics_and_bold(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
return Tokenizer_emit_string(self, "'''''");
return Tokenizer_emit_text(self, "'''''");
}
if (!stack)
return -1;
@@ -1973,7 +1973,7 @@ static int Tokenizer_parse_italics_and_bold(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_string(self, "'''"))
if (Tokenizer_emit_text(self, "'''"))
return -1;
return Tokenizer_emit_style_tag(self, "i", "''", stack);
}
@@ -1998,7 +1998,7 @@ static int Tokenizer_parse_italics_and_bold(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_string(self, "''"))
if (Tokenizer_emit_text(self, "''"))
return -1;
return Tokenizer_emit_style_tag(self, "b", "'''", stack);
}
@@ -2031,13 +2031,13 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self)
}
if (ticks > 5) {
for (i = 0; i < ticks - 5; i++) {
if (Tokenizer_emit_text(self, *"'"))
if (Tokenizer_emit_char(self, *"'"))
return NULL;
}
ticks = 5;
}
else if (ticks == 4) {
if (Tokenizer_emit_text(self, *"'"))
if (Tokenizer_emit_char(self, *"'"))
return NULL;
ticks = 3;
}
@@ -2050,14 +2050,14 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self)
if (!Tokenizer_CAN_RECURSE(self)) {
if (ticks == 3) {
if (context & LC_STYLE_SECOND_PASS) {
if (Tokenizer_emit_text(self, *"'"))
if (Tokenizer_emit_char(self, *"'"))
return NULL;
return Tokenizer_pop(self);
}
self->topstack->context |= LC_STYLE_PASS_AGAIN;
}
for (i = 0; i < ticks; i++) {
if (Tokenizer_emit_text(self, *"'"))
if (Tokenizer_emit_char(self, *"'"))
return NULL;
}
}
@@ -2106,7 +2106,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self)
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_string(self, GET_HTML_TAG(code)))
if (Tokenizer_emit_text(self, GET_HTML_TAG(code)))
return -1;
token = PyObject_CallObject(TagCloseSelfclose, NULL);
if (!token)
@@ -2179,7 +2179,7 @@ static int Tokenizer_handle_hr(Tokenizer* self)
return -1;
}
Py_DECREF(token);
if (Tokenizer_emit_string(self, "hr"))
if (Tokenizer_emit_text(self, "hr"))
return -1;
token = PyObject_CallObject(TagCloseSelfclose, NULL);
if (!token)
@@ -2200,7 +2200,7 @@ static int Tokenizer_handle_dl_term(Tokenizer* self)
self->topstack->context ^= LC_DLTERM;
if (Tokenizer_READ(self, 0) == *":")
return Tokenizer_handle_list_marker(self);
return Tokenizer_emit_text(self, *"\n");
return Tokenizer_emit_char(self, *"\n");
}

/*
@@ -2350,7 +2350,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
}
}
if (!is_marker) {
if (Tokenizer_emit_text(self, this))
if (Tokenizer_emit_char(self, this))
return NULL;
self->head++;
continue;
@@ -2364,7 +2364,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_READ(self, 2) == *">")
return Tokenizer_pop(self);
}
if (Tokenizer_emit_text(self, this))
if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == next && next == *"{") {
@@ -2372,7 +2372,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_parse_template_or_argument(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == *"|" && this_context & LC_TEMPLATE) {
@@ -2393,7 +2393,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_READ(self, 2) == *"}") {
return Tokenizer_handle_argument_end(self);
}
if (Tokenizer_emit_text(self, this))
if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == next && next == *"[") {
@@ -2402,7 +2402,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_parse_wikilink(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
@@ -2416,7 +2416,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_parse_heading(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == *"=" && this_context & LC_HEADING)
@@ -2433,7 +2433,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_parse_comment(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == *"<" && next == *"/" &&
@@ -2453,7 +2453,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_parse_tag(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
}
else if (this == *">" && this_context & LC_TAG_CLOSE)
@@ -2474,14 +2474,14 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
if (Tokenizer_handle_hr(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
}
else if ((this == *"\n" || this == *":") && this_context & LC_DLTERM) {
if (Tokenizer_handle_dl_term(self))
return NULL;
}
else if (Tokenizer_emit_text(self, this))
else if (Tokenizer_emit_char(self, this))
return NULL;
self->head++;
}


Завантаження…
Відмінити
Зберегти