diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst index d1016f9..a093c17 100644 --- a/docs/api/mwparserfromhell.nodes.rst +++ b/docs/api/mwparserfromhell.nodes.rst @@ -46,6 +46,7 @@ nodes Package .. automodule:: mwparserfromhell.nodes.tag :members: + :undoc-members: :show-inheritance: :mod:`template` Module diff --git a/docs/api/mwparserfromhell.rst b/docs/api/mwparserfromhell.rst index 3ca09c9..b682139 100644 --- a/docs/api/mwparserfromhell.rst +++ b/docs/api/mwparserfromhell.rst @@ -30,6 +30,12 @@ mwparserfromhell Package :members: :undoc-members: +:mod:`tag_defs` Module +---------------------- + +.. automodule:: mwparserfromhell.tag_defs + :members: + :mod:`utils` Module ------------------- diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index ebb65ab..5888dba 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -36,18 +36,23 @@ class Attribute(StringMixIn): whose value is ``"foo"``. """ - def __init__(self, name, value=None, quoted=True): + def __init__(self, name, value=None, quoted=True, pad_first="", + pad_before_eq="", pad_after_eq=""): super(Attribute, self).__init__() self._name = name self._value = value self._quoted = quoted + self._pad_first = pad_first + self._pad_before_eq = pad_before_eq + self._pad_after_eq = pad_after_eq def __unicode__(self): + base = self.pad_first + str(self.name) + self.pad_before_eq if self.value: if self.quoted: - return str(self.name) + '="' + str(self.value) + '"' - return str(self.name) + "=" + str(self.value) - return str(self.name) + return base + '="' + self.pad_after_eq + str(self.value) + '"' + return base + "=" + self.pad_after_eq + str(self.value) + return base @property def name(self): @@ -64,14 +69,41 @@ class Attribute(StringMixIn): """Whether the attribute's value is quoted with double quotes.""" return self._quoted + @property + def pad_first(self): + """Spacing to insert right before the attribute.""" + return self._pad_first + + @property + def pad_before_eq(self): + """Spacing to insert right before the equal sign.""" + return self._pad_before_eq + + @property + def pad_after_eq(self): + """Spacing to insert right after the equal sign.""" + return self._pad_after_eq + @name.setter - def name(self, newval): - self._name = parse_anything(newval) + def name(self, value): + self._name = parse_anything(value) @value.setter def value(self, newval): self._value = parse_anything(newval) @quoted.setter - def quoted(self, newval): - self._quoted = bool(newval) + def quoted(self, value): + self._quoted = bool(value) + + @pad_first.setter + def pad_first(self, value): + self._pad_first = str(value) + + @pad_before_eq.setter + def pad_before_eq(self, value): + self._pad_before_eq = str(value) + + @pad_after_eq.setter + def pad_after_eq(self, value): + self._pad_after_eq = str(value) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index eaf2b6e..dc78b34 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -24,6 +24,7 @@ from __future__ import unicode_literals from . import Node, Text from ..compat import str +from ..tag_defs import get_wikicode, is_visible from ..utils import parse_anything __all__ = ["Tag"] @@ -31,79 +32,39 @@ __all__ = ["Tag"] class Tag(Node): """Represents an HTML-style tag in wikicode, like ````.""" - TAG_UNKNOWN = 0 - - # Basic HTML: - TAG_ITALIC = 1 - TAG_BOLD = 2 - TAG_UNDERLINE = 3 - TAG_STRIKETHROUGH = 4 - TAG_UNORDERED_LIST = 5 - TAG_ORDERED_LIST = 6 - TAG_DEF_TERM = 7 - TAG_DEF_ITEM = 8 - TAG_BLOCKQUOTE = 9 - TAG_RULE = 10 - TAG_BREAK = 11 - TAG_ABBR = 12 - TAG_PRE = 13 - TAG_MONOSPACE = 14 - TAG_CODE = 15 - TAG_SPAN = 16 - TAG_DIV = 17 - TAG_FONT = 18 - TAG_SMALL = 19 - TAG_BIG = 20 - TAG_CENTER = 21 - - # MediaWiki parser hooks: - TAG_REF = 101 - TAG_GALLERY = 102 - TAG_MATH = 103 - TAG_NOWIKI = 104 - TAG_NOINCLUDE = 105 - TAG_INCLUDEONLY = 106 - TAG_ONLYINCLUDE = 107 - - # Additional parser hooks: - TAG_SYNTAXHIGHLIGHT = 201 - TAG_POEM = 202 - - # Lists of tags: - TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) - TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE - - def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, - self_closing=False, open_padding=0, close_padding=0): + def __init__(self, tag, contents=None, attrs=None, showtag=True, + self_closing=False, invalid=False, implicit=False, padding="", + closing_tag=None): super(Tag, self).__init__() - self._type = type_ self._tag = tag self._contents = contents - if attrs: - self._attrs = attrs - else: - self._attrs = [] + self._attrs = attrs if attrs else [] self._showtag = showtag self._self_closing = self_closing - self._open_padding = open_padding - self._close_padding = close_padding + self._invalid = invalid + self._implicit = implicit + self._padding = padding + if closing_tag: + self._closing_tag = closing_tag + elif not self_closing: + self._closing_tag = tag def __unicode__(self): if not self.showtag: - open_, close = self._translate() + open_, close = get_wikicode[self.tag] if self.self_closing: return open_ else: return open_ + str(self.contents) + close - result = "<" + str(self.tag) - if self.attrs: - result += " " + " ".join([str(attr) for attr in self.attrs]) + result = ("" + result += self.padding + (">" if self.implicit else "/>") else: - result += " " * self.open_padding + ">" + str(self.contents) - result += "" + result += self.padding + ">" + str(self.contents) + result += "" return result def __iternodes__(self, getter): @@ -111,66 +72,43 @@ class Tag(Node): if self.showtag: for child in getter(self.tag): yield self.tag, child - for attr in self.attrs: + for attr in self.attributes: for child in getter(attr.name): yield attr.name, child if attr.value: for child in getter(attr.value): yield attr.value, child - for child in getter(self.contents): - yield self.contents, child + if self.contents: + for child in getter(self.contents): + yield self.contents, child + if not self.self_closing and self.closing_tag: + for child in getter(self.closing_tag): + yield self.closing_tag, child def __strip__(self, normalize, collapse): - if self.type in self.TAGS_VISIBLE: + if is_visible(self.tag): return self.contents.strip_code(normalize, collapse) return None def __showtree__(self, write, get, mark): - tagnodes = self.tag.nodes - if (not self.attrs and len(tagnodes) == 1 and isinstance(tagnodes[0], Text)): - write("<" + str(tagnodes[0]) + ">") + write("" if self.implicit else "/>") else: - write("<") - get(self.tag) - for attr in self.attrs: - get(attr.name) - if not attr.value: - continue - write(" = ") - mark() - get(attr.value) write(">") - get(self.contents) - if len(tagnodes) == 1 and isinstance(tagnodes[0], Text): - write("") - else: + get(self.contents) write("") - def _translate(self): - """If the HTML-style tag has a wikicode representation, return that. - - For example, ``Foo`` can be represented as ``'''Foo'''``. This - returns a tuple of the character starting the sequence and the - character ending it. - """ - translations = { - self.TAG_ITALIC: ("''", "''"), - self.TAG_BOLD: ("'''", "'''"), - self.TAG_UNORDERED_LIST: ("*", ""), - self.TAG_ORDERED_LIST: ("#", ""), - self.TAG_DEF_TERM: (";", ""), - self.TAG_DEF_ITEM: (":", ""), - self.TAG_RULE: ("----", ""), - } - return translations[self.type] - - @property - def type(self): - """The tag type.""" - return self._type - @property def tag(self): """The tag itself, as a :py:class:`~.Wikicode` object.""" @@ -182,7 +120,7 @@ class Tag(Node): return self._contents @property - def attrs(self): + def attributes(self): """The list of attributes affecting the tag. Each attribute is an instance of :py:class:`~.Attribute`. @@ -196,29 +134,47 @@ class Tag(Node): @property def self_closing(self): - """Whether the tag is self-closing with no content.""" + """Whether the tag is self-closing with no content (like ``
``).""" return self._self_closing @property - def open_padding(self): - """How much spacing to insert before the first closing >.""" - return self._open_padding + def invalid(self): + """Whether the tag starts with a backslash after the opening bracket. + + This makes the tag look like a lone close tag. It is technically + invalid and is only parsable Wikicode when the tag itself is + single-only, like ``
`` and ````. See + :py:func:`tag_defs.is_single_only`. + """ + return self._invalid @property - def close_padding(self): - """How much spacing to insert before the last closing >.""" - return self._close_padding + def implicit(self): + """Whether the tag is implicitly self-closing, with no ending slash. - @type.setter - def type(self, value): - value = int(value) - if value not in self.TAGS_INVISIBLE | self.TAGS_VISIBLE: - raise ValueError(value) - self._type = value + This is only possible for specific "single" tags like ``
`` and + ``
  • ``. See :py:func:`tag_defs.is_single`. This field only has an + effect if :py:attr:`self_closing` is also ``True``. + """ + return self._implicit + + @property + def padding(self): + """Spacing to insert before the first closing ``>``.""" + return self._padding + + @property + def closing_tag(self): + """The closing tag, as a :py:class:`~.Wikicode` object. + + This will usually equal :py:attr:`tag`, unless there is additional + spacing, comments, or the like. + """ + return self._closing_tag @tag.setter def tag(self, value): - self._tag = parse_anything(value) + self._tag = self._closing_tag = parse_anything(value) @contents.setter def contents(self, value): @@ -232,10 +188,18 @@ class Tag(Node): def self_closing(self, value): self._self_closing = bool(value) - @open_padding.setter - def open_padding(self, value): - self._open_padding = int(value) + @invalid.setter + def invalid(self, value): + self._invalid = bool(value) + + @implicit.setter + def implicit(self, value): + self._implicit = bool(value) + + @padding.setter + def padding(self, value): + self._padding = str(value) - @close_padding.setter - def close_padding(self, value): - self._close_padding = int(value) + @closing_tag.setter + def closing_tag(self, value): + self._closing_tag = parse_anything(value) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index e89fb33..9366742 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -170,7 +170,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_comment(self): - """Handle a case where a hidden comment is at the head of the tokens.""" + """Handle a case where an HTML comment is at the head of the tokens.""" self._push() while self._tokens: token = self._tokens.pop() @@ -180,7 +180,7 @@ class Builder(object): else: self._write(self._handle_token(token)) - def _handle_attribute(self): + def _handle_attribute(self, start): """Handle a case where a tag attribute is at the head of the tokens.""" name, quoted = None, False self._push() @@ -191,37 +191,47 @@ class Builder(object): self._push() elif isinstance(token, tokens.TagAttrQuote): quoted = True - elif isinstance(token, (tokens.TagAttrStart, - tokens.TagCloseOpen)): + elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, + tokens.TagCloseSelfclose)): self._tokens.append(token) - if name is not None: - return Attribute(name, self._pop(), quoted) - return Attribute(self._pop(), quoted=quoted) + if name: + value = self._pop() + else: + name, value = self._pop(), None + return Attribute(name, value, quoted, start.pad_first, + start.pad_before_eq, start.pad_after_eq) else: self._write(self._handle_token(token)) def _handle_tag(self, token): """Handle a case where a tag is at the head of the tokens.""" - type_, showtag = token.type, token.showtag - attrs = [] + close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) + implicit, attrs, contents, closing_tag = False, [], None, None + showtag = token.get("showtag", True) + invalid = token.get("invalid", False) self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.TagAttrStart): - attrs.append(self._handle_attribute()) + attrs.append(self._handle_attribute(token)) elif isinstance(token, tokens.TagCloseOpen): - open_pad = token.padding + padding = token.padding tag = self._pop() self._push() - elif isinstance(token, tokens.TagCloseSelfclose): - tag = self._pop() - return Tag(type_, tag, attrs=attrs, showtag=showtag, - self_closing=True, open_padding=token.padding) elif isinstance(token, tokens.TagOpenClose): contents = self._pop() - elif isinstance(token, tokens.TagCloseClose): - return Tag(type_, tag, contents, attrs, showtag, False, - open_pad, token.padding) + self._push() + elif isinstance(token, close_tokens): + if isinstance(token, tokens.TagCloseSelfclose): + tag = self._pop() + self_closing = True + padding = token.padding + implicit = token.get("implicit", False) + else: + self_closing = False + closing_tag = self._pop() + return Tag(tag, contents, attrs, showtag, self_closing, + invalid, implicit, padding, closing_tag) else: self._write(self._handle_token(token)) diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 896d137..211136c 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -62,6 +62,13 @@ Local (stack-specific) contexts: * :py:const:`COMMENT` +* :py:const:`TAG` + + * :py:const:`TAG_OPEN` + * :py:const:`TAG_ATTR` + * :py:const:`TAG_BODY` + * :py:const:`TAG_CLOSE` + * :py:const:`SAFETY_CHECK` * :py:const:`HAS_TEXT` @@ -78,37 +85,45 @@ Global contexts: # Local contexts: -TEMPLATE = 0b00000000000000000111 -TEMPLATE_NAME = 0b00000000000000000001 -TEMPLATE_PARAM_KEY = 0b00000000000000000010 -TEMPLATE_PARAM_VALUE = 0b00000000000000000100 - -ARGUMENT = 0b00000000000000011000 -ARGUMENT_NAME = 0b00000000000000001000 -ARGUMENT_DEFAULT = 0b00000000000000010000 - -WIKILINK = 0b00000000000001100000 -WIKILINK_TITLE = 0b00000000000000100000 -WIKILINK_TEXT = 0b00000000000001000000 - -HEADING = 0b00000001111110000000 -HEADING_LEVEL_1 = 0b00000000000010000000 -HEADING_LEVEL_2 = 0b00000000000100000000 -HEADING_LEVEL_3 = 0b00000000001000000000 -HEADING_LEVEL_4 = 0b00000000010000000000 -HEADING_LEVEL_5 = 0b00000000100000000000 -HEADING_LEVEL_6 = 0b00000001000000000000 - -COMMENT = 0b00000010000000000000 - -SAFETY_CHECK = 0b11111100000000000000 -HAS_TEXT = 0b00000100000000000000 -FAIL_ON_TEXT = 0b00001000000000000000 -FAIL_NEXT = 0b00010000000000000000 -FAIL_ON_LBRACE = 0b00100000000000000000 -FAIL_ON_RBRACE = 0b01000000000000000000 -FAIL_ON_EQUALS = 0b10000000000000000000 +TEMPLATE_NAME = 1 << 0 +TEMPLATE_PARAM_KEY = 1 << 1 +TEMPLATE_PARAM_VALUE = 1 << 2 +TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE + +ARGUMENT_NAME = 1 << 3 +ARGUMENT_DEFAULT = 1 << 4 +ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT + +WIKILINK_TITLE = 1 << 5 +WIKILINK_TEXT = 1 << 6 +WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT + +HEADING_LEVEL_1 = 1 << 7 +HEADING_LEVEL_2 = 1 << 8 +HEADING_LEVEL_3 = 1 << 9 +HEADING_LEVEL_4 = 1 << 10 +HEADING_LEVEL_5 = 1 << 11 +HEADING_LEVEL_6 = 1 << 12 +HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + + HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) + +COMMENT = 1 << 13 + +TAG_OPEN = 1 << 14 +TAG_ATTR = 1 << 15 +TAG_BODY = 1 << 16 +TAG_CLOSE = 1 << 17 +TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE + +HAS_TEXT = 1 << 18 +FAIL_ON_TEXT = 1 << 19 +FAIL_NEXT = 1 << 20 +FAIL_ON_LBRACE = 1 << 21 +FAIL_ON_RBRACE = 1 << 22 +FAIL_ON_EQUALS = 1 << 23 +SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + + FAIL_ON_RBRACE + FAIL_ON_EQUALS) # Global contexts: -GL_HEADING = 0b1 +GL_HEADING = 1 << 0 diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 86f2884..bae5ec2 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -35,17 +35,43 @@ static int heading_level_from_context(int n) return level; } -static PyObject* -Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) +/* + Call the given function in tag_defs, using 'tag' as a parameter, and return + its output as a bool. +*/ +static int call_tag_def_func(const char* funcname, PyObject* tag) { - Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0); - return (PyObject*) self; + PyObject* func = PyObject_GetAttrString(tag_defs, funcname); + PyObject* result = PyObject_CallFunctionObjArgs(func, tag, NULL); + int ans = (result == Py_True) ? 1 : 0; + + Py_DECREF(func); + Py_DECREF(result); + return ans; +} + +/* + Sanitize the name of a tag so it can be compared with others for equality. +*/ +static PyObject* strip_tag_name(PyObject* token) +{ + PyObject *text, *rstripped, *lowered; + + text = PyObject_GetAttrString(token, "text"); + if (!text) + return NULL; + rstripped = PyObject_CallMethod(text, "rstrip", NULL); + Py_DECREF(text); + if (!rstripped) + return NULL; + lowered = PyObject_CallMethod(rstripped, "rstrip", NULL); + Py_DECREF(rstripped); + return lowered; } -static struct Textbuffer* -Textbuffer_new(void) +static Textbuffer* Textbuffer_new(void) { - struct Textbuffer* buffer = malloc(sizeof(struct Textbuffer)); + Textbuffer* buffer = malloc(sizeof(Textbuffer)); if (!buffer) { PyErr_NoMemory(); return NULL; @@ -61,36 +87,125 @@ Textbuffer_new(void) return buffer; } -static void -Tokenizer_dealloc(Tokenizer* self) +static void Textbuffer_dealloc(Textbuffer* self) { - struct Stack *this = self->topstack, *next; - Py_XDECREF(self->text); + Textbuffer* next; + while (self) { + free(self->data); + next = self->next; + free(self); + self = next; + } +} - while (this) { - Py_DECREF(this->stack); - Textbuffer_dealloc(this->textbuffer); - next = this->next; - free(this); - this = next; +/* + Write text to the given textbuffer. +*/ +static int Textbuffer_write(Textbuffer** this, Py_UNICODE text) +{ + Textbuffer* self = *this; + if (self->size == TEXTBUFFER_BLOCKSIZE) { + Textbuffer* new = Textbuffer_new(); + if (!new) + return -1; + new->next = self; + *this = self = new; } - self->ob_type->tp_free((PyObject*) self); + self->data[self->size] = text; + self->size++; + return 0; +} + +/* + Return the contents of the textbuffer as a Python Unicode object. +*/ +static PyObject* Textbuffer_render(Textbuffer* self) +{ + PyObject *result = PyUnicode_FromUnicode(self->data, self->size); + PyObject *left, *concat; + while (self->next) { + self = self->next; + left = PyUnicode_FromUnicode(self->data, self->size); + concat = PyUnicode_Concat(left, result); + Py_DECREF(left); + Py_DECREF(result); + result = concat; + } + return result; +} + +static TagData* TagData_new(void) +{ + TagData *self = malloc(sizeof(TagData)); + + #define ALLOC_BUFFER(name) \ + name = Textbuffer_new(); \ + if (!name) { \ + TagData_dealloc(self); \ + return NULL; \ + } + + if (!self) { + PyErr_NoMemory(); + return NULL; + } + self->context = TAG_NAME; + ALLOC_BUFFER(self->pad_first) + ALLOC_BUFFER(self->pad_before_eq) + ALLOC_BUFFER(self->pad_after_eq) + self->reset = 0; + return self; +} + +static void TagData_dealloc(TagData* self) +{ + #define DEALLOC_BUFFER(name) \ + if (name) \ + Textbuffer_dealloc(name); + + DEALLOC_BUFFER(self->pad_first); + DEALLOC_BUFFER(self->pad_before_eq); + DEALLOC_BUFFER(self->pad_after_eq); + free(self); +} + +static int TagData_reset_buffers(TagData* self) +{ + #define RESET_BUFFER(name) \ + Textbuffer_dealloc(name); \ + name = Textbuffer_new(); \ + if (!name) \ + return -1; + + RESET_BUFFER(self->pad_first) + RESET_BUFFER(self->pad_before_eq) + RESET_BUFFER(self->pad_after_eq) + return 0; +} + +static PyObject* +Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) +{ + Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0); + return (PyObject*) self; } -static void -Textbuffer_dealloc(struct Textbuffer* this) +static void Tokenizer_dealloc(Tokenizer* self) { - struct Textbuffer* next; + Stack *this = self->topstack, *next; + Py_XDECREF(self->text); + while (this) { - free(this->data); + Py_DECREF(this->stack); + Textbuffer_dealloc(this->textbuffer); next = this->next; free(this); this = next; } + self->ob_type->tp_free((PyObject*) self); } -static int -Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) +static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) { static char* kwlist[] = {NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) @@ -98,19 +213,16 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) self->text = Py_None; Py_INCREF(Py_None); self->topstack = NULL; - self->head = 0; - self->length = 0; - self->global = 0; + self->head = self->length = self->global = self->depth = self->cycles = 0; return 0; } /* Add a new token stack, context, and textbuffer to the list. */ -static int -Tokenizer_push(Tokenizer* self, int context) +static int Tokenizer_push(Tokenizer* self, int context) { - struct Stack* top = malloc(sizeof(struct Stack)); + Stack* top = malloc(sizeof(Stack)); if (!top) { PyErr_NoMemory(); return -1; @@ -128,32 +240,12 @@ Tokenizer_push(Tokenizer* self, int context) } /* - Return the contents of the textbuffer as a Python Unicode object. -*/ -static PyObject* -Textbuffer_render(struct Textbuffer* self) -{ - PyObject *result = PyUnicode_FromUnicode(self->data, self->size); - PyObject *left, *concat; - while (self->next) { - self = self->next; - left = PyUnicode_FromUnicode(self->data, self->size); - concat = PyUnicode_Concat(left, result); - Py_DECREF(left); - Py_DECREF(result); - result = concat; - } - return result; -} - -/* Push the textbuffer onto the stack as a Text node and clear it. */ -static int -Tokenizer_push_textbuffer(Tokenizer* self) +static int Tokenizer_push_textbuffer(Tokenizer* self) { PyObject *text, *kwargs, *token; - struct Textbuffer* buffer = self->topstack->textbuffer; + Textbuffer* buffer = self->topstack->textbuffer; if (buffer->size == 0 && !buffer->next) return 0; text = Textbuffer_render(buffer); @@ -185,10 +277,9 @@ Tokenizer_push_textbuffer(Tokenizer* self) /* Pop and deallocate the top token stack/context/textbuffer. */ -static void -Tokenizer_delete_top_of_stack(Tokenizer* self) +static void Tokenizer_delete_top_of_stack(Tokenizer* self) { - struct Stack* top = self->topstack; + Stack* top = self->topstack; Py_DECREF(top->stack); Textbuffer_dealloc(top->textbuffer); self->topstack = top->next; @@ -199,8 +290,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) /* Pop the current stack/context/textbuffer, returing the stack. */ -static PyObject* -Tokenizer_pop(Tokenizer* self) +static PyObject* Tokenizer_pop(Tokenizer* self) { PyObject* stack; if (Tokenizer_push_textbuffer(self)) @@ -215,8 +305,7 @@ Tokenizer_pop(Tokenizer* self) Pop the current stack/context/textbuffer, returing the stack. We will also replace the underlying stack's context with the current stack's. */ -static PyObject* -Tokenizer_pop_keeping_context(Tokenizer* self) +static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) { PyObject* stack; int context; @@ -234,8 +323,7 @@ Tokenizer_pop_keeping_context(Tokenizer* self) Fail the current tokenization route. Discards the current stack/context/textbuffer and raises a BadRoute exception. */ -static void* -Tokenizer_fail_route(Tokenizer* self) +static void* Tokenizer_fail_route(Tokenizer* self) { PyObject* stack = Tokenizer_pop(self); Py_XDECREF(stack); @@ -246,8 +334,7 @@ Tokenizer_fail_route(Tokenizer* self) /* Write a token to the end of the current token stack. */ -static int -Tokenizer_write(Tokenizer* self, PyObject* token) +static int Tokenizer_emit(Tokenizer* self, PyObject* token) { if (Tokenizer_push_textbuffer(self)) return -1; @@ -259,8 +346,7 @@ Tokenizer_write(Tokenizer* self, PyObject* token) /* Write a token to the beginning of the current token stack. */ -static int -Tokenizer_write_first(Tokenizer* self, PyObject* token) +static int Tokenizer_emit_first(Tokenizer* self, PyObject* token) { if (Tokenizer_push_textbuffer(self)) return -1; @@ -272,32 +358,19 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token) /* Write text to the current textbuffer. */ -static int -Tokenizer_write_text(Tokenizer* self, Py_UNICODE text) +static int Tokenizer_emit_text(Tokenizer* self, Py_UNICODE text) { - struct Textbuffer* buf = self->topstack->textbuffer; - if (buf->size == TEXTBUFFER_BLOCKSIZE) { - struct Textbuffer* new = Textbuffer_new(); - if (!new) - return -1; - new->next = buf; - self->topstack->textbuffer = new; - buf = new; - } - buf->data[buf->size] = text; - buf->size++; - return 0; + return Textbuffer_write(&(self->topstack->textbuffer), text); } /* Write a series of tokens to the current stack at once. */ -static int -Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) +static int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) { int pushed = 0; PyObject *stack, *token, *left, *right, *text; - struct Textbuffer* buffer; + Textbuffer* buffer; Py_ssize_t size; if (PyList_GET_SIZE(tokenlist) > 0) { @@ -351,15 +424,14 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) Pop the current stack, write text, and then write the stack. 'text' is a NULL-terminated array of chars. */ -static int -Tokenizer_write_text_then_stack(Tokenizer* self, const char* text) +static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) { PyObject* stack = Tokenizer_pop(self); int i = 0; while (1) { if (!text[i]) break; - if (Tokenizer_write_text(self, (Py_UNICODE) text[i])) { + if (Tokenizer_emit_text(self, (Py_UNICODE) text[i])) { Py_XDECREF(stack); return -1; } @@ -367,7 +439,7 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text) } if (stack) { if (PyList_GET_SIZE(stack) > 0) { - if (Tokenizer_write_all(self, stack)) { + if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; } @@ -381,8 +453,7 @@ Tokenizer_write_text_then_stack(Tokenizer* self, const char* text) /* Read the value at a relative point in the wikicode, forwards. */ -static PyObject* -Tokenizer_read(Tokenizer* self, Py_ssize_t delta) +static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; if (index >= self->length) @@ -393,8 +464,7 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) /* Read the value at a relative point in the wikicode, backwards. */ -static PyObject* -Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) +static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index; if (delta > self->head) @@ -404,86 +474,14 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) } /* - Parse a template or argument at the head of the wikicode string. -*/ -static int -Tokenizer_parse_template_or_argument(Tokenizer* self) -{ - unsigned int braces = 2, i; - PyObject *tokenlist; - - self->head += 2; - while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { - self->head++; - braces++; - } - if (Tokenizer_push(self, 0)) - return -1; - while (braces) { - if (braces == 1) { - if (Tokenizer_write_text_then_stack(self, "{")) - return -1; - return 0; - } - if (braces == 2) { - if (Tokenizer_parse_template(self)) - return -1; - - if (BAD_ROUTE) { - RESET_ROUTE(); - if (Tokenizer_write_text_then_stack(self, "{{")) - return -1; - return 0; - } - break; - } - if (Tokenizer_parse_argument(self)) - return -1; - if (BAD_ROUTE) { - RESET_ROUTE(); - if (Tokenizer_parse_template(self)) - return -1; - if (BAD_ROUTE) { - char text[MAX_BRACES + 1]; - RESET_ROUTE(); - for (i = 0; i < braces; i++) text[i] = *"{"; - text[braces] = *""; - if (Tokenizer_write_text_then_stack(self, text)) { - Py_XDECREF(text); - return -1; - } - Py_XDECREF(text); - return 0; - } - else - braces -= 2; - } - else - braces -= 3; - if (braces) - self->head++; - } - tokenlist = Tokenizer_pop(self); - if (!tokenlist) - return -1; - if (Tokenizer_write_all(self, tokenlist)) { - Py_DECREF(tokenlist); - return -1; - } - Py_DECREF(tokenlist); - return 0; -} - -/* Parse a template at the head of the wikicode string. */ -static int -Tokenizer_parse_template(Tokenizer* self) +static int Tokenizer_parse_template(Tokenizer* self) { PyObject *template, *token; Py_ssize_t reset = self->head; - template = Tokenizer_parse(self, LC_TEMPLATE_NAME); + template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); if (BAD_ROUTE) { self->head = reset; return 0; @@ -495,13 +493,13 @@ Tokenizer_parse_template(Tokenizer* self) Py_DECREF(template); return -1; } - if (Tokenizer_write_first(self, token)) { + if (Tokenizer_emit_first(self, token)) { Py_DECREF(token); Py_DECREF(template); return -1; } Py_DECREF(token); - if (Tokenizer_write_all(self, template)) { + if (Tokenizer_emit_all(self, template)) { Py_DECREF(template); return -1; } @@ -509,7 +507,7 @@ Tokenizer_parse_template(Tokenizer* self) token = PyObject_CallObject(TemplateClose, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -520,13 +518,12 @@ Tokenizer_parse_template(Tokenizer* self) /* Parse an argument at the head of the wikicode string. */ -static int -Tokenizer_parse_argument(Tokenizer* self) +static int Tokenizer_parse_argument(Tokenizer* self) { PyObject *argument, *token; Py_ssize_t reset = self->head; - argument = Tokenizer_parse(self, LC_ARGUMENT_NAME); + argument = Tokenizer_parse(self, LC_ARGUMENT_NAME, 1); if (BAD_ROUTE) { self->head = reset; return 0; @@ -538,13 +535,13 @@ Tokenizer_parse_argument(Tokenizer* self) Py_DECREF(argument); return -1; } - if (Tokenizer_write_first(self, token)) { + if (Tokenizer_emit_first(self, token)) { Py_DECREF(token); Py_DECREF(argument); return -1; } Py_DECREF(token); - if (Tokenizer_write_all(self, argument)) { + if (Tokenizer_emit_all(self, argument)) { Py_DECREF(argument); return -1; } @@ -552,7 +549,7 @@ Tokenizer_parse_argument(Tokenizer* self) token = PyObject_CallObject(ArgumentClose, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -561,10 +558,80 @@ Tokenizer_parse_argument(Tokenizer* self) } /* + Parse a template or argument at the head of the wikicode string. +*/ +static int Tokenizer_parse_template_or_argument(Tokenizer* self) +{ + unsigned int braces = 2, i; + PyObject *tokenlist; + + self->head += 2; + while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { + self->head++; + braces++; + } + if (Tokenizer_push(self, 0)) + return -1; + while (braces) { + if (braces == 1) { + if (Tokenizer_emit_text_then_stack(self, "{")) + return -1; + return 0; + } + if (braces == 2) { + if (Tokenizer_parse_template(self)) + return -1; + if (BAD_ROUTE) { + RESET_ROUTE(); + if (Tokenizer_emit_text_then_stack(self, "{{")) + return -1; + return 0; + } + break; + } + if (Tokenizer_parse_argument(self)) + return -1; + if (BAD_ROUTE) { + RESET_ROUTE(); + if (Tokenizer_parse_template(self)) + return -1; + if (BAD_ROUTE) { + char text[MAX_BRACES + 1]; + RESET_ROUTE(); + for (i = 0; i < braces; i++) text[i] = *"{"; + text[braces] = *""; + if (Tokenizer_emit_text_then_stack(self, text)) { + Py_XDECREF(text); + return -1; + } + Py_XDECREF(text); + return 0; + } + else + braces -= 2; + } + else + braces -= 3; + if (braces) + self->head++; + } + tokenlist = Tokenizer_pop(self); + if (!tokenlist) + return -1; + if (Tokenizer_emit_all(self, tokenlist)) { + Py_DECREF(tokenlist); + return -1; + } + Py_DECREF(tokenlist); + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; + return 0; +} + +/* Handle a template parameter at the head of the string. */ -static int -Tokenizer_handle_template_param(Tokenizer* self) +static int Tokenizer_handle_template_param(Tokenizer* self) { PyObject *stack, *token; @@ -576,7 +643,7 @@ Tokenizer_handle_template_param(Tokenizer* self) stack = Tokenizer_pop_keeping_context(self); if (!stack) return -1; - if (Tokenizer_write_all(self, stack)) { + if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; } @@ -588,7 +655,7 @@ Tokenizer_handle_template_param(Tokenizer* self) token = PyObject_CallObject(TemplateParamSeparator, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -601,15 +668,14 @@ Tokenizer_handle_template_param(Tokenizer* self) /* Handle a template parameter's value at the head of the string. */ -static int -Tokenizer_handle_template_param_value(Tokenizer* self) +static int Tokenizer_handle_template_param_value(Tokenizer* self) { PyObject *stack, *token; stack = Tokenizer_pop_keeping_context(self); if (!stack) return -1; - if (Tokenizer_write_all(self, stack)) { + if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; } @@ -619,7 +685,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self) token = PyObject_CallObject(TemplateParamEquals, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -630,8 +696,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self) /* Handle the end of a template at the head of the string. */ -static PyObject* -Tokenizer_handle_template_end(Tokenizer* self) +static PyObject* Tokenizer_handle_template_end(Tokenizer* self) { PyObject* stack; @@ -639,7 +704,7 @@ Tokenizer_handle_template_end(Tokenizer* self) stack = Tokenizer_pop_keeping_context(self); if (!stack) return NULL; - if (Tokenizer_write_all(self, stack)) { + if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return NULL; } @@ -653,8 +718,7 @@ Tokenizer_handle_template_end(Tokenizer* self) /* Handle the separator between an argument's name and default. */ -static int -Tokenizer_handle_argument_separator(Tokenizer* self) +static int Tokenizer_handle_argument_separator(Tokenizer* self) { PyObject* token; self->topstack->context ^= LC_ARGUMENT_NAME; @@ -662,7 +726,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self) token = PyObject_CallObject(ArgumentSeparator, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -673,8 +737,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self) /* Handle the end of an argument at the head of the string. */ -static PyObject* -Tokenizer_handle_argument_end(Tokenizer* self) +static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { PyObject* stack = Tokenizer_pop(self); self->head += 2; @@ -684,8 +747,7 @@ Tokenizer_handle_argument_end(Tokenizer* self) /* Parse an internal wikilink at the head of the wikicode string. */ -static int -Tokenizer_parse_wikilink(Tokenizer* self) +static int Tokenizer_parse_wikilink(Tokenizer* self) { Py_ssize_t reset; PyObject *wikilink, *token; @@ -693,12 +755,12 @@ Tokenizer_parse_wikilink(Tokenizer* self) self->head += 2; reset = self->head - 1; - wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); + wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE, 1); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; for (i = 0; i < 2; i++) { - if (Tokenizer_write_text(self, *"[")) + if (Tokenizer_emit_text(self, *"[")) return -1; } return 0; @@ -710,13 +772,13 @@ Tokenizer_parse_wikilink(Tokenizer* self) Py_DECREF(wikilink); return -1; } - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); Py_DECREF(wikilink); return -1; } Py_DECREF(token); - if (Tokenizer_write_all(self, wikilink)) { + if (Tokenizer_emit_all(self, wikilink)) { Py_DECREF(wikilink); return -1; } @@ -724,19 +786,20 @@ Tokenizer_parse_wikilink(Tokenizer* self) token = PyObject_CallObject(WikilinkClose, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } Py_DECREF(token); + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; return 0; } /* Handle the separator between a wikilink's title and its text. */ -static int -Tokenizer_handle_wikilink_separator(Tokenizer* self) +static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { PyObject* token; self->topstack->context ^= LC_WIKILINK_TITLE; @@ -744,7 +807,7 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) token = PyObject_CallObject(WikilinkSeparator, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -755,8 +818,7 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) /* Handle the end of a wikilink at the head of the string. */ -static PyObject* -Tokenizer_handle_wikilink_end(Tokenizer* self) +static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) { PyObject* stack = Tokenizer_pop(self); self->head += 1; @@ -766,8 +828,7 @@ Tokenizer_handle_wikilink_end(Tokenizer* self) /* Parse a section heading at the head of the wikicode string. */ -static int -Tokenizer_parse_heading(Tokenizer* self) +static int Tokenizer_parse_heading(Tokenizer* self) { Py_ssize_t reset = self->head; int best = 1, i, context, diff; @@ -781,18 +842,17 @@ Tokenizer_parse_heading(Tokenizer* self) self->head++; } context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1); - heading = (HeadingData*) Tokenizer_parse(self, context); + heading = (HeadingData*) Tokenizer_parse(self, context, 1); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset + best - 1; for (i = 0; i < best; i++) { - if (Tokenizer_write_text(self, *"=")) + if (Tokenizer_emit_text(self, *"=")) return -1; } self->global ^= GL_HEADING; return 0; } - level = PyInt_FromSsize_t(heading->level); if (!level) { Py_DECREF(heading->title); @@ -815,7 +875,7 @@ Tokenizer_parse_heading(Tokenizer* self) free(heading); return -1; } - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); Py_DECREF(heading->title); free(heading); @@ -825,14 +885,14 @@ Tokenizer_parse_heading(Tokenizer* self) if (heading->level < best) { diff = best - heading->level; for (i = 0; i < diff; i++) { - if (Tokenizer_write_text(self, *"=")) { + if (Tokenizer_emit_text(self, *"=")) { Py_DECREF(heading->title); free(heading); return -1; } } } - if (Tokenizer_write_all(self, heading->title)) { + if (Tokenizer_emit_all(self, heading->title)) { Py_DECREF(heading->title); free(heading); return -1; @@ -842,7 +902,7 @@ Tokenizer_parse_heading(Tokenizer* self) token = PyObject_CallObject(HeadingEnd, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -854,8 +914,7 @@ Tokenizer_parse_heading(Tokenizer* self) /* Handle the end of a section heading at the head of the string. */ -static HeadingData* -Tokenizer_handle_heading_end(Tokenizer* self) +static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) { Py_ssize_t reset = self->head, best; int i, current, level, diff; @@ -871,13 +930,13 @@ Tokenizer_handle_heading_end(Tokenizer* self) current = heading_level_from_context(self->topstack->context); level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); - after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); + after = (HeadingData*) Tokenizer_parse(self, self->topstack->context, 1); if (BAD_ROUTE) { RESET_ROUTE(); if (level < best) { diff = best - level; for (i = 0; i < diff; i++) { - if (Tokenizer_write_text(self, *"=")) + if (Tokenizer_emit_text(self, *"=")) return NULL; } } @@ -885,13 +944,13 @@ Tokenizer_handle_heading_end(Tokenizer* self) } else { for (i = 0; i < best; i++) { - if (Tokenizer_write_text(self, *"=")) { + if (Tokenizer_emit_text(self, *"=")) { Py_DECREF(after->title); free(after); return NULL; } } - if (Tokenizer_write_all(self, after->title)) { + if (Tokenizer_emit_all(self, after->title)) { Py_DECREF(after->title); free(after); return NULL; @@ -916,8 +975,7 @@ Tokenizer_handle_heading_end(Tokenizer* self) /* Actually parse an HTML entity and ensure that it is valid. */ -static int -Tokenizer_really_parse_entity(Tokenizer* self) +static int Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *token, *kwargs, *textobj; Py_UNICODE this; @@ -933,7 +991,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) token = PyObject_CallObject(HTMLEntityStart, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -949,7 +1007,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) token = PyObject_CallObject(HTMLEntityNumeric, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -970,7 +1028,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) Py_DECREF(kwargs); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -1071,7 +1129,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) Py_DECREF(kwargs); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -1079,7 +1137,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) token = PyObject_CallObject(HTMLEntityEnd, NULL); if (!token) return -1; - if (Tokenizer_write(self, token)) { + if (Tokenizer_emit(self, token)) { Py_DECREF(token); return -1; } @@ -1090,8 +1148,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) /* Parse an HTML entity at the head of the wikicode string. */ -static int -Tokenizer_parse_entity(Tokenizer* self) +static int Tokenizer_parse_entity(Tokenizer* self) { Py_ssize_t reset = self->head; PyObject *tokenlist; @@ -1103,14 +1160,14 @@ Tokenizer_parse_entity(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - if (Tokenizer_write_text(self, *"&")) + if (Tokenizer_emit_text(self, *"&")) return -1; return 0; } tokenlist = Tokenizer_pop(self); if (!tokenlist) return -1; - if (Tokenizer_write_all(self, tokenlist)) { + if (Tokenizer_emit_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; } @@ -1121,15 +1178,14 @@ Tokenizer_parse_entity(Tokenizer* self) /* Parse an HTML comment at the head of the wikicode string. */ -static int -Tokenizer_parse_comment(Tokenizer* self) +static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; PyObject *token, *comment; int i; self->head += 4; - comment = Tokenizer_parse(self, LC_COMMENT); + comment = Tokenizer_parse(self, LC_COMMENT, 1); if (BAD_ROUTE) { const char* text = "