diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index d63af02..1f2b048 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -24,7 +24,7 @@ from __future__ import unicode_literals from . import Node, Text from ..compat import str -from ..tag_defs import get_wikicode, is_visible +from ..tag_defs import is_visible from ..utils import parse_anything __all__ = ["Tag"] @@ -32,7 +32,7 @@ __all__ = ["Tag"] class Tag(Node): """Represents an HTML-style tag in wikicode, like ````.""" - def __init__(self, tag, contents=None, attrs=None, showtag=True, + def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, self_closing=False, invalid=False, implicit=False, padding="", closing_tag=None): super(Tag, self).__init__() @@ -42,7 +42,7 @@ class Tag(Node): else: self._contents = contents self._attrs = attrs if attrs else [] - self._showtag = showtag + self._wiki_markup = wiki_markup self._self_closing = self_closing self._invalid = invalid self._implicit = implicit @@ -53,12 +53,11 @@ class Tag(Node): self._closing_tag = tag def __unicode__(self): - if not self.showtag: - open_, close = get_wikicode(self.tag) + if self.wiki_markup: if self.self_closing: - return open_ + return self.wiki_markup else: - return open_ + str(self.contents) + close + return self.wiki_markup + str(self.contents) + self.wiki_markup result = ("`` or ``----`` to replace + ``
``. + """ + return self._wiki_markup @property def self_closing(self): @@ -183,9 +187,9 @@ class Tag(Node): def contents(self, value): self._contents = parse_anything(value) - @showtag.setter - def showtag(self, value): - self._showtag = bool(value) + @wiki_markup.setter + def wiki_markup(self, value): + self._wiki_markup = str(value) if value else None @self_closing.setter def self_closing(self, value): diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 9366742..196ef14 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -207,15 +207,14 @@ class Builder(object): """Handle a case where a tag is at the head of the tokens.""" close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) implicit, attrs, contents, closing_tag = False, [], None, None - showtag = token.get("showtag", True) - invalid = token.get("invalid", False) + wiki_markup, invalid = token.wiki_markup, token.invalid or False self._push() while self._tokens: token = self._tokens.pop() if isinstance(token, tokens.TagAttrStart): attrs.append(self._handle_attribute(token)) elif isinstance(token, tokens.TagCloseOpen): - padding = token.padding + padding = token.padding or "" tag = self._pop() self._push() elif isinstance(token, tokens.TagOpenClose): @@ -225,12 +224,12 @@ class Builder(object): if isinstance(token, tokens.TagCloseSelfclose): tag = self._pop() self_closing = True - padding = token.padding - implicit = token.get("implicit", False) + padding = token.padding or "" + implicit = token.implicit or False else: self_closing = False closing_tag = self._pop() - return Tag(tag, contents, attrs, showtag, self_closing, + return Tag(tag, contents, attrs, wiki_markup, self_closing, invalid, implicit, padding, closing_tag) else: self._write(self._handle_token(token)) diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 211136c..d3f0254 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -69,6 +69,15 @@ Local (stack-specific) contexts: * :py:const:`TAG_BODY` * :py:const:`TAG_CLOSE` +* :py:const:`STYLE` + + * :py:const:`STYLE_ITALICS` + * :py:const:`STYLE_BOLD` + * :py:const:`STYLE_PASS_AGAIN` + * :py:const:`STYLE_SECOND_PASS` + +* :py:const:`DL_TERM` + * :py:const:`SAFETY_CHECK` * :py:const:`HAS_TEXT` @@ -115,12 +124,20 @@ TAG_BODY = 1 << 16 TAG_CLOSE = 1 << 17 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE -HAS_TEXT = 1 << 18 -FAIL_ON_TEXT = 1 << 19 -FAIL_NEXT = 1 << 20 -FAIL_ON_LBRACE = 1 << 21 -FAIL_ON_RBRACE = 1 << 22 -FAIL_ON_EQUALS = 1 << 23 +STYLE_ITALICS = 1 << 18 +STYLE_BOLD = 1 << 19 +STYLE_PASS_AGAIN = 1 << 20 +STYLE_SECOND_PASS = 1 << 21 +STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS + +DL_TERM = 1 << 22 + +HAS_TEXT = 1 << 23 +FAIL_ON_TEXT = 1 << 24 +FAIL_NEXT = 1 << 25 +FAIL_ON_LBRACE = 1 << 26 +FAIL_ON_RBRACE = 1 << 27 +FAIL_ON_EQUALS = 1 << 28 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + FAIL_ON_RBRACE + FAIL_ON_EQUALS) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index bae5ec2..eff000a 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -29,6 +29,7 @@ SOFTWARE. static int heading_level_from_context(int n) { int level; + n /= LC_HEADING_LEVEL_1; for (level = 1; n > 1; n >>= 1) level++; @@ -72,6 +73,7 @@ static PyObject* strip_tag_name(PyObject* token) static Textbuffer* Textbuffer_new(void) { Textbuffer* buffer = malloc(sizeof(Textbuffer)); + if (!buffer) { PyErr_NoMemory(); return NULL; @@ -90,6 +92,7 @@ static Textbuffer* Textbuffer_new(void) static void Textbuffer_dealloc(Textbuffer* self) { Textbuffer* next; + while (self) { free(self->data); next = self->next; @@ -99,11 +102,12 @@ static void Textbuffer_dealloc(Textbuffer* self) } /* - Write text to the given textbuffer. + Write a Unicode codepoint to the given textbuffer. */ -static int Textbuffer_write(Textbuffer** this, Py_UNICODE text) +static int Textbuffer_write(Textbuffer** this, Py_UNICODE code) { Textbuffer* self = *this; + if (self->size == TEXTBUFFER_BLOCKSIZE) { Textbuffer* new = Textbuffer_new(); if (!new) @@ -111,7 +115,7 @@ static int Textbuffer_write(Textbuffer** this, Py_UNICODE text) new->next = self; *this = self = new; } - self->data[self->size] = text; + self->data[self->size] = code; self->size++; return 0; } @@ -123,6 +127,7 @@ static PyObject* Textbuffer_render(Textbuffer* self) { PyObject *result = PyUnicode_FromUnicode(self->data, self->size); PyObject *left, *concat; + while (self->next) { self = self->next; left = PyUnicode_FromUnicode(self->data, self->size); @@ -208,6 +213,7 @@ static void Tokenizer_dealloc(Tokenizer* self) static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) { static char* kwlist[] = {NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) return -1; self->text = Py_None; @@ -223,6 +229,7 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) static int Tokenizer_push(Tokenizer* self, int context) { Stack* top = malloc(sizeof(Stack)); + if (!top) { PyErr_NoMemory(); return -1; @@ -246,6 +253,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) { PyObject *text, *kwargs, *token; Textbuffer* buffer = self->topstack->textbuffer; + if (buffer->size == 0 && !buffer->next) return 0; text = Textbuffer_render(buffer); @@ -280,6 +288,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) static void Tokenizer_delete_top_of_stack(Tokenizer* self) { Stack* top = self->topstack; + Py_DECREF(top->stack); Textbuffer_dealloc(top->textbuffer); self->topstack = top->next; @@ -293,6 +302,7 @@ static void Tokenizer_delete_top_of_stack(Tokenizer* self) static PyObject* Tokenizer_pop(Tokenizer* self) { PyObject* stack; + if (Tokenizer_push_textbuffer(self)) return NULL; stack = self->topstack->stack; @@ -309,6 +319,7 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) { PyObject* stack; int context; + if (Tokenizer_push_textbuffer(self)) return NULL; stack = self->topstack->stack; @@ -325,9 +336,11 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) */ static void* Tokenizer_fail_route(Tokenizer* self) { + int context = self->topstack->context; PyObject* stack = Tokenizer_pop(self); + Py_XDECREF(stack); - FAIL_ROUTE(); + FAIL_ROUTE(context); return NULL; } @@ -356,11 +369,26 @@ static int Tokenizer_emit_first(Tokenizer* self, PyObject* token) } /* - Write text to the current textbuffer. + Write a Unicode codepoint to the current textbuffer. */ -static int Tokenizer_emit_text(Tokenizer* self, Py_UNICODE text) +static int Tokenizer_emit_char(Tokenizer* self, Py_UNICODE code) { - return Textbuffer_write(&(self->topstack->textbuffer), text); + return Textbuffer_write(&(self->topstack->textbuffer), code); +} + +/* + Write a string of text to the current textbuffer. +*/ +static int Tokenizer_emit_text(Tokenizer* self, const char* text) +{ + int i = 0; + + while (text[i]) { + if (Tokenizer_emit_char(self, text[i])) + return -1; + i++; + } + return 0; } /* @@ -427,15 +455,10 @@ static int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) { PyObject* stack = Tokenizer_pop(self); - int i = 0; - while (1) { - if (!text[i]) - break; - if (Tokenizer_emit_text(self, (Py_UNICODE) text[i])) { - Py_XDECREF(stack); - return -1; - } - i++; + + if (Tokenizer_emit_text(self, text)) { + Py_DECREF(stack); + return -1; } if (stack) { if (PyList_GET_SIZE(stack) > 0) { @@ -456,6 +479,7 @@ static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; + if (index >= self->length) return EMPTY; return PyList_GET_ITEM(self->text, index); @@ -467,6 +491,7 @@ static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index; + if (delta > self->head) return EMPTY; index = self->head - delta; @@ -751,7 +776,6 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) { Py_ssize_t reset; PyObject *wikilink, *token; - int i; self->head += 2; reset = self->head - 1; @@ -759,10 +783,8 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - for (i = 0; i < 2; i++) { - if (Tokenizer_emit_text(self, *"[")) - return -1; - } + if (Tokenizer_emit_text(self, "[[")) + return -1; return 0; } if (!wikilink) @@ -847,7 +869,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) RESET_ROUTE(); self->head = reset + best - 1; for (i = 0; i < best; i++) { - if (Tokenizer_emit_text(self, *"=")) + if (Tokenizer_emit_char(self, *"=")) return -1; } self->global ^= GL_HEADING; @@ -885,7 +907,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) if (heading->level < best) { diff = best - heading->level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_text(self, *"=")) { + if (Tokenizer_emit_char(self, *"=")) { Py_DECREF(heading->title); free(heading); return -1; @@ -936,7 +958,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) if (level < best) { diff = best - level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_text(self, *"=")) + if (Tokenizer_emit_char(self, *"=")) return NULL; } } @@ -944,7 +966,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) } else { for (i = 0; i < best; i++) { - if (Tokenizer_emit_text(self, *"=")) { + if (Tokenizer_emit_char(self, *"=")) { Py_DECREF(after->title); free(after); return NULL; @@ -1160,7 +1182,7 @@ static int Tokenizer_parse_entity(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - if (Tokenizer_emit_text(self, *"&")) + if (Tokenizer_emit_char(self, *"&")) return -1; return 0; } @@ -1182,24 +1204,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; PyObject *token, *comment; - int i; self->head += 4; comment = Tokenizer_parse(self, LC_COMMENT, 1); if (BAD_ROUTE) { - const char* text = "