@@ -24,7 +24,7 @@ from __future__ import unicode_literals | |||
from . import Node, Text | |||
from ..compat import str | |||
from ..tag_defs import get_wikicode, is_visible | |||
from ..tag_defs import is_visible | |||
from ..utils import parse_anything | |||
__all__ = ["Tag"] | |||
@@ -32,7 +32,7 @@ __all__ = ["Tag"] | |||
class Tag(Node): | |||
"""Represents an HTML-style tag in wikicode, like ``<ref>``.""" | |||
def __init__(self, tag, contents=None, attrs=None, showtag=True, | |||
def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, | |||
self_closing=False, invalid=False, implicit=False, padding="", | |||
closing_tag=None): | |||
super(Tag, self).__init__() | |||
@@ -42,7 +42,7 @@ class Tag(Node): | |||
else: | |||
self._contents = contents | |||
self._attrs = attrs if attrs else [] | |||
self._showtag = showtag | |||
self._wiki_markup = wiki_markup | |||
self._self_closing = self_closing | |||
self._invalid = invalid | |||
self._implicit = implicit | |||
@@ -53,12 +53,11 @@ class Tag(Node): | |||
self._closing_tag = tag | |||
def __unicode__(self): | |||
if not self.showtag: | |||
open_, close = get_wikicode(self.tag) | |||
if self.wiki_markup: | |||
if self.self_closing: | |||
return open_ | |||
return self.wiki_markup | |||
else: | |||
return open_ + str(self.contents) + close | |||
return self.wiki_markup + str(self.contents) + self.wiki_markup | |||
result = ("</" if self.invalid else "<") + str(self.tag) | |||
if self.attributes: | |||
@@ -72,7 +71,7 @@ class Tag(Node): | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
if self.showtag: | |||
if not self.wiki_markup: | |||
for child in getter(self.tag): | |||
yield self.tag, child | |||
for attr in self.attributes: | |||
@@ -84,7 +83,7 @@ class Tag(Node): | |||
if self.contents: | |||
for child in getter(self.contents): | |||
yield self.contents, child | |||
if not self.self_closing and self.showtag and self.closing_tag: | |||
if not self.self_closing and not self.wiki_markup and self.closing_tag: | |||
for child in getter(self.closing_tag): | |||
yield self.closing_tag, child | |||
@@ -131,9 +130,14 @@ class Tag(Node): | |||
return self._attrs | |||
@property | |||
def showtag(self): | |||
"""Whether to show the tag itself instead of a wikicode version.""" | |||
return self._showtag | |||
def wiki_markup(self): | |||
"""The wikified version of a tag to show instead of HTML. | |||
If set to a value, this will be displayed instead of the brackets. | |||
For example, set to ``''`` to replace ``<i>`` or ``----`` to replace | |||
``<hr>``. | |||
""" | |||
return self._wiki_markup | |||
@property | |||
def self_closing(self): | |||
@@ -183,9 +187,9 @@ class Tag(Node): | |||
def contents(self, value): | |||
self._contents = parse_anything(value) | |||
@showtag.setter | |||
def showtag(self, value): | |||
self._showtag = bool(value) | |||
@wiki_markup.setter | |||
def wiki_markup(self, value): | |||
self._wiki_markup = str(value) if value else None | |||
@self_closing.setter | |||
def self_closing(self, value): | |||
@@ -207,15 +207,14 @@ class Builder(object): | |||
"""Handle a case where a tag is at the head of the tokens.""" | |||
close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) | |||
implicit, attrs, contents, closing_tag = False, [], None, None | |||
showtag = token.get("showtag", True) | |||
invalid = token.get("invalid", False) | |||
wiki_markup, invalid = token.wiki_markup, token.invalid or False | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.TagAttrStart): | |||
attrs.append(self._handle_attribute(token)) | |||
elif isinstance(token, tokens.TagCloseOpen): | |||
padding = token.padding | |||
padding = token.padding or "" | |||
tag = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.TagOpenClose): | |||
@@ -225,12 +224,12 @@ class Builder(object): | |||
if isinstance(token, tokens.TagCloseSelfclose): | |||
tag = self._pop() | |||
self_closing = True | |||
padding = token.padding | |||
implicit = token.get("implicit", False) | |||
padding = token.padding or "" | |||
implicit = token.implicit or False | |||
else: | |||
self_closing = False | |||
closing_tag = self._pop() | |||
return Tag(tag, contents, attrs, showtag, self_closing, | |||
return Tag(tag, contents, attrs, wiki_markup, self_closing, | |||
invalid, implicit, padding, closing_tag) | |||
else: | |||
self._write(self._handle_token(token)) | |||
@@ -69,6 +69,15 @@ Local (stack-specific) contexts: | |||
* :py:const:`TAG_BODY` | |||
* :py:const:`TAG_CLOSE` | |||
* :py:const:`STYLE` | |||
* :py:const:`STYLE_ITALICS` | |||
* :py:const:`STYLE_BOLD` | |||
* :py:const:`STYLE_PASS_AGAIN` | |||
* :py:const:`STYLE_SECOND_PASS` | |||
* :py:const:`DL_TERM` | |||
* :py:const:`SAFETY_CHECK` | |||
* :py:const:`HAS_TEXT` | |||
@@ -115,12 +124,20 @@ TAG_BODY = 1 << 16 | |||
TAG_CLOSE = 1 << 17 | |||
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | |||
HAS_TEXT = 1 << 18 | |||
FAIL_ON_TEXT = 1 << 19 | |||
FAIL_NEXT = 1 << 20 | |||
FAIL_ON_LBRACE = 1 << 21 | |||
FAIL_ON_RBRACE = 1 << 22 | |||
FAIL_ON_EQUALS = 1 << 23 | |||
STYLE_ITALICS = 1 << 18 | |||
STYLE_BOLD = 1 << 19 | |||
STYLE_PASS_AGAIN = 1 << 20 | |||
STYLE_SECOND_PASS = 1 << 21 | |||
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | |||
DL_TERM = 1 << 22 | |||
HAS_TEXT = 1 << 23 | |||
FAIL_ON_TEXT = 1 << 24 | |||
FAIL_NEXT = 1 << 25 | |||
FAIL_ON_LBRACE = 1 << 26 | |||
FAIL_ON_RBRACE = 1 << 27 | |||
FAIL_ON_EQUALS = 1 << 28 | |||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | |||
@@ -29,6 +29,7 @@ SOFTWARE. | |||
static int heading_level_from_context(int n) | |||
{ | |||
int level; | |||
n /= LC_HEADING_LEVEL_1; | |||
for (level = 1; n > 1; n >>= 1) | |||
level++; | |||
@@ -72,6 +73,7 @@ static PyObject* strip_tag_name(PyObject* token) | |||
static Textbuffer* Textbuffer_new(void) | |||
{ | |||
Textbuffer* buffer = malloc(sizeof(Textbuffer)); | |||
if (!buffer) { | |||
PyErr_NoMemory(); | |||
return NULL; | |||
@@ -90,6 +92,7 @@ static Textbuffer* Textbuffer_new(void) | |||
static void Textbuffer_dealloc(Textbuffer* self) | |||
{ | |||
Textbuffer* next; | |||
while (self) { | |||
free(self->data); | |||
next = self->next; | |||
@@ -99,11 +102,12 @@ static void Textbuffer_dealloc(Textbuffer* self) | |||
} | |||
/* | |||
Write text to the given textbuffer. | |||
Write a Unicode codepoint to the given textbuffer. | |||
*/ | |||
static int Textbuffer_write(Textbuffer** this, Py_UNICODE text) | |||
static int Textbuffer_write(Textbuffer** this, Py_UNICODE code) | |||
{ | |||
Textbuffer* self = *this; | |||
if (self->size == TEXTBUFFER_BLOCKSIZE) { | |||
Textbuffer* new = Textbuffer_new(); | |||
if (!new) | |||
@@ -111,7 +115,7 @@ static int Textbuffer_write(Textbuffer** this, Py_UNICODE text) | |||
new->next = self; | |||
*this = self = new; | |||
} | |||
self->data[self->size] = text; | |||
self->data[self->size] = code; | |||
self->size++; | |||
return 0; | |||
} | |||
@@ -123,6 +127,7 @@ static PyObject* Textbuffer_render(Textbuffer* self) | |||
{ | |||
PyObject *result = PyUnicode_FromUnicode(self->data, self->size); | |||
PyObject *left, *concat; | |||
while (self->next) { | |||
self = self->next; | |||
left = PyUnicode_FromUnicode(self->data, self->size); | |||
@@ -208,6 +213,7 @@ static void Tokenizer_dealloc(Tokenizer* self) | |||
static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||
{ | |||
static char* kwlist[] = {NULL}; | |||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) | |||
return -1; | |||
self->text = Py_None; | |||
@@ -223,6 +229,7 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||
static int Tokenizer_push(Tokenizer* self, int context) | |||
{ | |||
Stack* top = malloc(sizeof(Stack)); | |||
if (!top) { | |||
PyErr_NoMemory(); | |||
return -1; | |||
@@ -246,6 +253,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) | |||
{ | |||
PyObject *text, *kwargs, *token; | |||
Textbuffer* buffer = self->topstack->textbuffer; | |||
if (buffer->size == 0 && !buffer->next) | |||
return 0; | |||
text = Textbuffer_render(buffer); | |||
@@ -280,6 +288,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) | |||
static void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||
{ | |||
Stack* top = self->topstack; | |||
Py_DECREF(top->stack); | |||
Textbuffer_dealloc(top->textbuffer); | |||
self->topstack = top->next; | |||
@@ -293,6 +302,7 @@ static void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||
static PyObject* Tokenizer_pop(Tokenizer* self) | |||
{ | |||
PyObject* stack; | |||
if (Tokenizer_push_textbuffer(self)) | |||
return NULL; | |||
stack = self->topstack->stack; | |||
@@ -309,6 +319,7 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||
{ | |||
PyObject* stack; | |||
int context; | |||
if (Tokenizer_push_textbuffer(self)) | |||
return NULL; | |||
stack = self->topstack->stack; | |||
@@ -325,9 +336,11 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||
*/ | |||
static void* Tokenizer_fail_route(Tokenizer* self) | |||
{ | |||
int context = self->topstack->context; | |||
PyObject* stack = Tokenizer_pop(self); | |||
Py_XDECREF(stack); | |||
FAIL_ROUTE(); | |||
FAIL_ROUTE(context); | |||
return NULL; | |||
} | |||
@@ -356,11 +369,26 @@ static int Tokenizer_emit_first(Tokenizer* self, PyObject* token) | |||
} | |||
/* | |||
Write text to the current textbuffer. | |||
Write a Unicode codepoint to the current textbuffer. | |||
*/ | |||
static int Tokenizer_emit_text(Tokenizer* self, Py_UNICODE text) | |||
static int Tokenizer_emit_char(Tokenizer* self, Py_UNICODE code) | |||
{ | |||
return Textbuffer_write(&(self->topstack->textbuffer), text); | |||
return Textbuffer_write(&(self->topstack->textbuffer), code); | |||
} | |||
/* | |||
Write a string of text to the current textbuffer. | |||
*/ | |||
static int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||
{ | |||
int i = 0; | |||
while (text[i]) { | |||
if (Tokenizer_emit_char(self, text[i])) | |||
return -1; | |||
i++; | |||
} | |||
return 0; | |||
} | |||
/* | |||
@@ -427,15 +455,10 @@ static int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||
static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||
{ | |||
PyObject* stack = Tokenizer_pop(self); | |||
int i = 0; | |||
while (1) { | |||
if (!text[i]) | |||
break; | |||
if (Tokenizer_emit_text(self, (Py_UNICODE) text[i])) { | |||
Py_XDECREF(stack); | |||
return -1; | |||
} | |||
i++; | |||
if (Tokenizer_emit_text(self, text)) { | |||
Py_DECREF(stack); | |||
return -1; | |||
} | |||
if (stack) { | |||
if (PyList_GET_SIZE(stack) > 0) { | |||
@@ -456,6 +479,7 @@ static int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||
static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index = self->head + delta; | |||
if (index >= self->length) | |||
return EMPTY; | |||
return PyList_GET_ITEM(self->text, index); | |||
@@ -467,6 +491,7 @@ static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index; | |||
if (delta > self->head) | |||
return EMPTY; | |||
index = self->head - delta; | |||
@@ -751,7 +776,6 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset; | |||
PyObject *wikilink, *token; | |||
int i; | |||
self->head += 2; | |||
reset = self->head - 1; | |||
@@ -759,10 +783,8 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
for (i = 0; i < 2; i++) { | |||
if (Tokenizer_emit_text(self, *"[")) | |||
return -1; | |||
} | |||
if (Tokenizer_emit_text(self, "[[")) | |||
return -1; | |||
return 0; | |||
} | |||
if (!wikilink) | |||
@@ -847,7 +869,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
RESET_ROUTE(); | |||
self->head = reset + best - 1; | |||
for (i = 0; i < best; i++) { | |||
if (Tokenizer_emit_text(self, *"=")) | |||
if (Tokenizer_emit_char(self, *"=")) | |||
return -1; | |||
} | |||
self->global ^= GL_HEADING; | |||
@@ -885,7 +907,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
if (heading->level < best) { | |||
diff = best - heading->level; | |||
for (i = 0; i < diff; i++) { | |||
if (Tokenizer_emit_text(self, *"=")) { | |||
if (Tokenizer_emit_char(self, *"=")) { | |||
Py_DECREF(heading->title); | |||
free(heading); | |||
return -1; | |||
@@ -936,7 +958,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
if (level < best) { | |||
diff = best - level; | |||
for (i = 0; i < diff; i++) { | |||
if (Tokenizer_emit_text(self, *"=")) | |||
if (Tokenizer_emit_char(self, *"=")) | |||
return NULL; | |||
} | |||
} | |||
@@ -944,7 +966,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
} | |||
else { | |||
for (i = 0; i < best; i++) { | |||
if (Tokenizer_emit_text(self, *"=")) { | |||
if (Tokenizer_emit_char(self, *"=")) { | |||
Py_DECREF(after->title); | |||
free(after); | |||
return NULL; | |||
@@ -1160,7 +1182,7 @@ static int Tokenizer_parse_entity(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (Tokenizer_emit_text(self, *"&")) | |||
if (Tokenizer_emit_char(self, *"&")) | |||
return -1; | |||
return 0; | |||
} | |||
@@ -1182,24 +1204,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head + 3; | |||
PyObject *token, *comment; | |||
int i; | |||
self->head += 4; | |||
comment = Tokenizer_parse(self, LC_COMMENT, 1); | |||
if (BAD_ROUTE) { | |||
const char* text = "<!--"; | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
i = 0; | |||
while (1) { | |||
if (!text[i]) | |||
return 0; | |||
if (Tokenizer_emit_text(self, (Py_UNICODE) text[i])) { | |||
Py_XDECREF(text); | |||
return -1; | |||
} | |||
i++; | |||
} | |||
if (Tokenizer_emit_text(self, "<!--")) | |||
return -1; | |||
return 0; | |||
} | |||
if (!comment) | |||
@@ -1317,7 +1329,7 @@ Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text) | |||
return -1; | |||
} | |||
if (ctx & TAG_QUOTED && !(ctx & TAG_NOTE_SPACE)) { | |||
if (Tokenizer_emit_text(self, text)) | |||
if (Tokenizer_emit_char(self, text)) | |||
return -1; | |||
} | |||
else if (data->context & TAG_ATTR_READY) | |||
@@ -1342,14 +1354,14 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text) | |||
} | |||
} | |||
if (!is_marker || !Tokenizer_CAN_RECURSE(self)) | |||
return Tokenizer_emit_text(self, text); | |||
return Tokenizer_emit_char(self, text); | |||
else if (text == next && next == *"{") | |||
return Tokenizer_parse_template_or_argument(self); | |||
else if (text == next && next == *"[") | |||
return Tokenizer_parse_wikilink(self); | |||
else if (text == *"<") | |||
return Tokenizer_parse_tag(self); | |||
return Tokenizer_emit_text(self, text); | |||
return Tokenizer_emit_char(self, text); | |||
} | |||
/* | |||
@@ -1574,7 +1586,7 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | |||
return NULL; | |||
return Tokenizer_parse(self, 0, 0); | |||
} | |||
if (Tokenizer_emit_text(self, this)) | |||
if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
} | |||
@@ -1776,7 +1788,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) | |||
return -1; | |||
} | |||
if (!IS_SINGLE_ONLY(name)) | |||
FAIL_ROUTE(); | |||
FAIL_ROUTE(0); | |||
break; | |||
} | |||
Textbuffer_write(&buf, this); | |||
@@ -1790,8 +1802,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
return (Tokenizer_emit_text(self, *"<") || | |||
Tokenizer_emit_text(self, *"/")); | |||
return Tokenizer_emit_text(self, "</"); | |||
} | |||
// Set invalid=True flag of TagOpenOpen | |||
if (PyObject_SetAttrString(PyList_GET_ITEM(tag, 0), "invalid", Py_True)) | |||
@@ -1812,7 +1823,7 @@ static int Tokenizer_parse_tag(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
return Tokenizer_emit_text(self, *"<"); | |||
return Tokenizer_emit_char(self, *"<"); | |||
} | |||
if (!tag) { | |||
return -1; | |||
@@ -1823,12 +1834,382 @@ static int Tokenizer_parse_tag(Tokenizer* self) | |||
} | |||
/* | |||
Write the body of a tag and the tokens that should surround it. | |||
*/ | |||
static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag, | |||
const char* ticks, PyObject* body) | |||
{ | |||
PyObject *markup, *kwargs, *token; | |||
markup = PyBytes_FromString(ticks); | |||
if (!markup) | |||
return -1; | |||
kwargs = PyDict_New(); | |||
if (!kwargs) { | |||
Py_DECREF(markup); | |||
return -1; | |||
} | |||
PyDict_SetItemString(kwargs, "wiki_markup", markup); | |||
Py_DECREF(markup); | |||
token = PyObject_Call(TagOpenOpen, NOARGS, kwargs); | |||
if (!token) { | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
Py_DECREF(kwargs); | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
if (Tokenizer_emit_text(self, tag)) | |||
return -1; | |||
token = PyObject_CallObject(TagCloseOpen, NULL); | |||
if (!token) | |||
return -1; | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
if (Tokenizer_emit_all(self, body)) | |||
return -1; | |||
token = PyObject_CallObject(TagOpenClose, NULL); | |||
if (!token) | |||
return -1; | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
if (Tokenizer_emit_text(self, tag)) | |||
return -1; | |||
token = PyObject_CallObject(TagCloseClose, NULL); | |||
if (!token) | |||
return -1; | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
Py_DECREF(body); | |||
return 0; | |||
} | |||
/* | |||
Parse wiki-style italics. | |||
*/ | |||
static int Tokenizer_parse_italics(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head; | |||
int context; | |||
PyObject *stack; | |||
stack = Tokenizer_parse(self, LC_STYLE_ITALICS, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (BAD_ROUTE_CONTEXT & LC_STYLE_PASS_AGAIN) { | |||
context = LC_STYLE_ITALICS | LC_STYLE_SECOND_PASS; | |||
stack = Tokenizer_parse(self, context, 1); | |||
} | |||
else | |||
return Tokenizer_emit_text(self, "''"); | |||
} | |||
if (!stack) | |||
return -1; | |||
return Tokenizer_emit_style_tag(self, "i", "''", stack); | |||
} | |||
/* | |||
Parse wiki-style bold. | |||
*/ | |||
static int Tokenizer_parse_bold(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head; | |||
PyObject *stack; | |||
stack = Tokenizer_parse(self, LC_STYLE_BOLD, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (self->topstack->context & LC_STYLE_SECOND_PASS) | |||
return Tokenizer_emit_char(self, *"'") ? -1 : 1; | |||
if (self->topstack->context & LC_STYLE_ITALICS) { | |||
self->topstack->context |= LC_STYLE_PASS_AGAIN; | |||
return Tokenizer_emit_text(self, "'''"); | |||
} | |||
if (Tokenizer_emit_char(self, *"'")) | |||
return -1; | |||
return Tokenizer_parse_italics(self); | |||
} | |||
if (!stack) | |||
return -1; | |||
return Tokenizer_emit_style_tag(self, "b", "'''", stack); | |||
} | |||
/* | |||
Parse wiki-style italics and bold together (i.e., five ticks). | |||
*/ | |||
static int Tokenizer_parse_italics_and_bold(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head; | |||
PyObject *stack, *stack2; | |||
stack = Tokenizer_parse(self, LC_STYLE_BOLD, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
stack = Tokenizer_parse(self, LC_STYLE_ITALICS, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
return Tokenizer_emit_text(self, "'''''"); | |||
} | |||
if (!stack) | |||
return -1; | |||
reset = self->head; | |||
stack2 = Tokenizer_parse(self, LC_STYLE_BOLD, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (Tokenizer_emit_text(self, "'''")) | |||
return -1; | |||
return Tokenizer_emit_style_tag(self, "i", "''", stack); | |||
} | |||
if (!stack2) | |||
return -1; | |||
if (Tokenizer_push(self, 0)) | |||
return -1; | |||
if (Tokenizer_emit_style_tag(self, "i", "''", stack)) | |||
return -1; | |||
if (Tokenizer_emit_all(self, stack2)) | |||
return -1; | |||
Py_DECREF(stack2); | |||
stack2 = Tokenizer_pop(self); | |||
if (!stack2) | |||
return -1; | |||
return Tokenizer_emit_style_tag(self, "b", "'''", stack2); | |||
} | |||
if (!stack) | |||
return -1; | |||
reset = self->head; | |||
stack2 = Tokenizer_parse(self, LC_STYLE_ITALICS, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (Tokenizer_emit_text(self, "''")) | |||
return -1; | |||
return Tokenizer_emit_style_tag(self, "b", "'''", stack); | |||
} | |||
if (!stack2) | |||
return -1; | |||
if (Tokenizer_push(self, 0)) | |||
return -1; | |||
if (Tokenizer_emit_style_tag(self, "b", "'''", stack)) | |||
return -1; | |||
if (Tokenizer_emit_all(self, stack2)) | |||
return -1; | |||
Py_DECREF(stack2); | |||
stack2 = Tokenizer_pop(self); | |||
if (!stack2) | |||
return -1; | |||
return Tokenizer_emit_style_tag(self, "i", "''", stack2); | |||
} | |||
/* | |||
Parse wiki-style formatting (''/''' for italics/bold). | |||
*/ | |||
static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||
{ | |||
int context = self->topstack->context, ticks = 2, i; | |||
self->head += 2; | |||
while (Tokenizer_READ(self, 0) == *"'") { | |||
self->head++; | |||
ticks++; | |||
} | |||
if (ticks > 5) { | |||
for (i = 0; i < ticks - 5; i++) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
return NULL; | |||
} | |||
ticks = 5; | |||
} | |||
else if (ticks == 4) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
return NULL; | |||
ticks = 3; | |||
} | |||
if ((context & LC_STYLE_ITALICS && (ticks == 2 || ticks == 5)) || | |||
(context & LC_STYLE_BOLD && (ticks == 3 || ticks == 5))) { | |||
if (ticks == 5) | |||
self->head -= context & LC_STYLE_ITALICS ? 3 : 2; | |||
return Tokenizer_pop(self); | |||
} | |||
if (!Tokenizer_CAN_RECURSE(self)) { | |||
if (ticks == 3) { | |||
if (context & LC_STYLE_SECOND_PASS) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
return NULL; | |||
return Tokenizer_pop(self); | |||
} | |||
self->topstack->context |= LC_STYLE_PASS_AGAIN; | |||
} | |||
for (i = 0; i < ticks; i++) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
return NULL; | |||
} | |||
} | |||
else if (ticks == 2) { | |||
if (Tokenizer_parse_italics(self)) | |||
return NULL; | |||
} | |||
else if (ticks == 3) { | |||
switch (Tokenizer_parse_bold(self)) { | |||
case 1: | |||
return Tokenizer_pop(self); | |||
case -1: | |||
return NULL; | |||
} | |||
} | |||
else { | |||
if (Tokenizer_parse_italics_and_bold(self)) | |||
return NULL; | |||
} | |||
self->head--; | |||
return Py_None; | |||
} | |||
/* | |||
Handle a list marker at the head (#, *, ;, :). | |||
*/ | |||
static int Tokenizer_handle_list_marker(Tokenizer* self) | |||
{ | |||
PyObject *markup = Tokenizer_read(self, 0), *kwargs, *token; | |||
Py_UNICODE code = *PyUnicode_AS_UNICODE(markup); | |||
if (code == *";") | |||
self->topstack->context |= LC_DLTERM; | |||
kwargs = PyDict_New(); | |||
if (!kwargs) | |||
return -1; | |||
PyDict_SetItemString(kwargs, "wiki_markup", markup); | |||
token = PyObject_Call(TagOpenOpen, NOARGS, kwargs); | |||
if (!token) { | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
Py_DECREF(kwargs); | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
if (Tokenizer_emit_text(self, GET_HTML_TAG(code))) | |||
return -1; | |||
token = PyObject_CallObject(TagCloseSelfclose, NULL); | |||
if (!token) | |||
return -1; | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
return 0; | |||
} | |||
/* | |||
Handle a wiki-style list (#, *, ;, :). | |||
*/ | |||
static int Tokenizer_handle_list(Tokenizer* self) | |||
{ | |||
Py_UNICODE marker = Tokenizer_READ(self, 1); | |||
if (Tokenizer_handle_list_marker(self)) | |||
return -1; | |||
while (marker == *"#" || marker == *"*" || marker == *";" || | |||
marker == *":") { | |||
self->head++; | |||
if (Tokenizer_handle_list_marker(self)) | |||
return -1; | |||
marker = Tokenizer_READ(self, 1); | |||
} | |||
return 0; | |||
} | |||
/* | |||
Handle a wiki-style horizontal rule (----) in the string. | |||
*/ | |||
static int Tokenizer_handle_hr(Tokenizer* self) | |||
{ | |||
PyObject *markup, *kwargs, *token; | |||
Textbuffer *buffer = Textbuffer_new(); | |||
int i; | |||
if (!buffer) | |||
return -1; | |||
self->head += 3; | |||
for (i = 0; i < 4; i++) { | |||
if (Textbuffer_write(&buffer, *"-")) | |||
return -1; | |||
} | |||
while (Tokenizer_READ(self, 1) == *"-") { | |||
if (Textbuffer_write(&buffer, *"-")) | |||
return -1; | |||
self->head++; | |||
} | |||
markup = Textbuffer_render(buffer); | |||
if (!markup) | |||
return -1; | |||
Textbuffer_dealloc(buffer); | |||
kwargs = PyDict_New(); | |||
if (!kwargs) | |||
return -1; | |||
PyDict_SetItemString(kwargs, "wiki_markup", markup); | |||
Py_DECREF(markup); | |||
token = PyObject_Call(TagOpenOpen, NOARGS, kwargs); | |||
if (!token) { | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
Py_DECREF(kwargs); | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
if (Tokenizer_emit_text(self, "hr")) | |||
return -1; | |||
token = PyObject_CallObject(TagCloseSelfclose, NULL); | |||
if (!token) | |||
return -1; | |||
if (Tokenizer_emit(self, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
return 0; | |||
} | |||
/* | |||
Handle the term in a description list ('foo' in ';foo:bar'). | |||
*/ | |||
static int Tokenizer_handle_dl_term(Tokenizer* self) | |||
{ | |||
self->topstack->context ^= LC_DLTERM; | |||
if (Tokenizer_READ(self, 0) == *":") | |||
return Tokenizer_handle_list_marker(self); | |||
return Tokenizer_emit_char(self, *"\n"); | |||
} | |||
/* | |||
Handle the end of the stream of wikitext. | |||
*/ | |||
static PyObject* Tokenizer_handle_end(Tokenizer* self, int context) | |||
{ | |||
static int fail_contexts = (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | | |||
LC_HEADING | LC_COMMENT | LC_TAG); | |||
LC_HEADING | LC_COMMENT | LC_TAG | LC_STYLE); | |||
static int double_fail = (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE); | |||
PyObject *token, *text, *trash; | |||
int single; | |||
@@ -1943,7 +2324,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
static int double_unsafe = (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE); | |||
int this_context, is_marker, i; | |||
Py_UNICODE this, next, next_next, last; | |||
PyObject* trash; | |||
PyObject* temp; | |||
if (push) { | |||
if (Tokenizer_push(self, context)) | |||
@@ -1955,8 +2336,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (this_context & unsafe_contexts) { | |||
if (Tokenizer_verify_safe(self, this_context, this) < 0) { | |||
if (this_context & double_unsafe) { | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
temp = Tokenizer_pop(self); | |||
Py_XDECREF(temp); | |||
} | |||
return Tokenizer_fail_route(self); | |||
} | |||
@@ -1969,7 +2350,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
} | |||
} | |||
if (!is_marker) { | |||
if (Tokenizer_emit_text(self, this)) | |||
if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
self->head++; | |||
continue; | |||
@@ -1977,12 +2358,13 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (this == *"") | |||
return Tokenizer_handle_end(self, this_context); | |||
next = Tokenizer_READ(self, 1); | |||
last = Tokenizer_READ_BACKWARDS(self, 1); | |||
if (this_context & LC_COMMENT) { | |||
if (this == next && next == *"-") { | |||
if (Tokenizer_READ(self, 2) == *">") | |||
return Tokenizer_pop(self); | |||
} | |||
if (Tokenizer_emit_text(self, this)) | |||
if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"{") { | |||
@@ -1990,7 +2372,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (Tokenizer_parse_template_or_argument(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_text(self, this)) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"|" && this_context & LC_TEMPLATE) { | |||
@@ -2011,7 +2393,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (Tokenizer_READ(self, 2) == *"}") { | |||
return Tokenizer_handle_argument_end(self); | |||
} | |||
if (Tokenizer_emit_text(self, this)) | |||
if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"[") { | |||
@@ -2020,7 +2402,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (Tokenizer_parse_wikilink(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_text(self, this)) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { | |||
@@ -2030,12 +2412,11 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
else if (this == next && next == *"]" && this_context & LC_WIKILINK) | |||
return Tokenizer_handle_wikilink_end(self); | |||
else if (this == *"=" && !(self->global & GL_HEADING)) { | |||
last = Tokenizer_READ_BACKWARDS(self, 1); | |||
if (last == *"\n" || last == *"") { | |||
if (Tokenizer_parse_heading(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_text(self, this)) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"=" && this_context & LC_HEADING) | |||
@@ -2052,7 +2433,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (Tokenizer_parse_comment(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_text(self, this)) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && next == *"/" && | |||
@@ -2072,12 +2453,35 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (Tokenizer_parse_tag(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_text(self, this)) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *">" && this_context & LC_TAG_CLOSE) | |||
return Tokenizer_handle_tag_close_close(self); | |||
else if (Tokenizer_emit_text(self, this)) | |||
else if (this == next && next == *"'") { | |||
temp = Tokenizer_parse_style(self); | |||
if (temp != Py_None) | |||
return temp; | |||
} | |||
else if (last == *"\n" || last == *"") { | |||
if (this == *"#" || this == *"*" || this == *";" || this == *":") { | |||
if (Tokenizer_handle_list(self)) | |||
return NULL; | |||
} | |||
else if (this == *"-" && this == next && | |||
this == Tokenizer_READ(self, 2) && | |||
this == Tokenizer_READ(self, 3)) { | |||
if (Tokenizer_handle_hr(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if ((this == *"\n" || this == *":") && this_context & LC_DLTERM) { | |||
if (Tokenizer_handle_dl_term(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
self->head++; | |||
} | |||
@@ -41,20 +41,21 @@ SOFTWARE. | |||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||
static const char* MARKERS[] = { | |||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", | |||
"\n", ""}; | |||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", ":", "/", | |||
"-", "\n", ""}; | |||
#define NUM_MARKERS 17 | |||
#define NUM_MARKERS 18 | |||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||
#define MAX_DEPTH 40 | |||
#define MAX_CYCLES 100000 | |||
#define MAX_BRACES 255 | |||
#define MAX_ENTITY_SIZE 8 | |||
static int route_state = 0; | |||
#define BAD_ROUTE (route_state) | |||
#define FAIL_ROUTE() (route_state = 1) | |||
#define RESET_ROUTE() (route_state = 0) | |||
static int route_state = 0, route_context = 0; | |||
#define BAD_ROUTE route_state | |||
#define BAD_ROUTE_CONTEXT route_context | |||
#define FAIL_ROUTE(context) route_state = 1; route_context = context | |||
#define RESET_ROUTE() route_state = 0 | |||
static char** entitydefs; | |||
@@ -102,42 +103,50 @@ static PyObject* TagCloseClose; | |||
/* Local contexts: */ | |||
#define LC_TEMPLATE 0x000007 | |||
#define LC_TEMPLATE_NAME 0x000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x000004 | |||
#define LC_ARGUMENT 0x000018 | |||
#define LC_ARGUMENT_NAME 0x000008 | |||
#define LC_ARGUMENT_DEFAULT 0x000010 | |||
#define LC_WIKILINK 0x000060 | |||
#define LC_WIKILINK_TITLE 0x000020 | |||
#define LC_WIKILINK_TEXT 0x000040 | |||
#define LC_HEADING 0x001F80 | |||
#define LC_HEADING_LEVEL_1 0x000080 | |||
#define LC_HEADING_LEVEL_2 0x000100 | |||
#define LC_HEADING_LEVEL_3 0x000200 | |||
#define LC_HEADING_LEVEL_4 0x000400 | |||
#define LC_HEADING_LEVEL_5 0x000800 | |||
#define LC_HEADING_LEVEL_6 0x001000 | |||
#define LC_COMMENT 0x002000 | |||
#define LC_TAG 0x03C000 | |||
#define LC_TAG_OPEN 0x004000 | |||
#define LC_TAG_ATTR 0x008000 | |||
#define LC_TAG_BODY 0x010000 | |||
#define LC_TAG_CLOSE 0x020000 | |||
#define LC_SAFETY_CHECK 0xFC0000 | |||
#define LC_HAS_TEXT 0x040000 | |||
#define LC_FAIL_ON_TEXT 0x080000 | |||
#define LC_FAIL_NEXT 0x100000 | |||
#define LC_FAIL_ON_LBRACE 0x200000 | |||
#define LC_FAIL_ON_RBRACE 0x400000 | |||
#define LC_FAIL_ON_EQUALS 0x800000 | |||
#define LC_TEMPLATE 0x00000007 | |||
#define LC_TEMPLATE_NAME 0x00000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x00000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x00000004 | |||
#define LC_ARGUMENT 0x00000018 | |||
#define LC_ARGUMENT_NAME 0x00000008 | |||
#define LC_ARGUMENT_DEFAULT 0x00000010 | |||
#define LC_WIKILINK 0x00000060 | |||
#define LC_WIKILINK_TITLE 0x00000020 | |||
#define LC_WIKILINK_TEXT 0x00000040 | |||
#define LC_HEADING 0x00001F80 | |||
#define LC_HEADING_LEVEL_1 0x00000080 | |||
#define LC_HEADING_LEVEL_2 0x00000100 | |||
#define LC_HEADING_LEVEL_3 0x00000200 | |||
#define LC_HEADING_LEVEL_4 0x00000400 | |||
#define LC_HEADING_LEVEL_5 0x00000800 | |||
#define LC_HEADING_LEVEL_6 0x00001000 | |||
#define LC_COMMENT 0x00002000 | |||
#define LC_TAG 0x0003C000 | |||
#define LC_TAG_OPEN 0x00004000 | |||
#define LC_TAG_ATTR 0x00008000 | |||
#define LC_TAG_BODY 0x00010000 | |||
#define LC_TAG_CLOSE 0x00020000 | |||
#define LC_STYLE 0x003C0000 | |||
#define LC_STYLE_ITALICS 0x00040000 | |||
#define LC_STYLE_BOLD 0x00080000 | |||
#define LC_STYLE_PASS_AGAIN 0x00100000 | |||
#define LC_STYLE_SECOND_PASS 0x00200000 | |||
#define LC_DLTERM 0x00400000 | |||
#define LC_SAFETY_CHECK 0x1F800000 | |||
#define LC_HAS_TEXT 0x00800000 | |||
#define LC_FAIL_ON_TEXT 0x01000000 | |||
#define LC_FAIL_NEXT 0x02000000 | |||
#define LC_FAIL_ON_LBRACE 0x04000000 | |||
#define LC_FAIL_ON_RBRACE 0x08000000 | |||
#define LC_FAIL_ON_EQUALS 0x10000000 | |||
/* Global contexts: */ | |||
@@ -211,6 +220,7 @@ typedef struct { | |||
/* Macros for accessing HTML tag definitions: */ | |||
#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li") | |||
#define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag)) | |||
#define IS_SINGLE(tag) (call_tag_def_func("is_single", tag)) | |||
#define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag)) | |||
@@ -26,13 +26,15 @@ import re | |||
from . import contexts, tokens | |||
from ..compat import htmlentities | |||
from ..tag_defs import is_parsable, is_single, is_single_only | |||
from ..tag_defs import get_html_tag, is_parsable, is_single, is_single_only | |||
__all__ = ["Tokenizer"] | |||
class BadRoute(Exception): | |||
"""Raised internally when the current tokenization route is invalid.""" | |||
pass | |||
def __init__(self, context=0): | |||
self.context = context | |||
class _TagOpenData(object): | |||
@@ -57,11 +59,11 @@ class Tokenizer(object): | |||
USES_C = False | |||
START = object() | |||
END = object() | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | |||
"/", "-", "\n", END] | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", | |||
":", "/", "-", "\n", END] | |||
MAX_DEPTH = 40 | |||
MAX_CYCLES = 100000 | |||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | |||
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | |||
tag_splitter = re.compile(r"([\s\"\\]+)") | |||
def __init__(self): | |||
@@ -132,8 +134,9 @@ class Tokenizer(object): | |||
Discards the current stack/context/textbuffer and raises | |||
:py:exc:`~.BadRoute`. | |||
""" | |||
context = self._context | |||
self._pop() | |||
raise BadRoute() | |||
raise BadRoute(context) | |||
def _emit(self, token): | |||
"""Write a token to the end of the current token stack.""" | |||
@@ -629,10 +632,164 @@ class Tokenizer(object): | |||
else: | |||
self._emit_all(tag) | |||
def _emit_style_tag(self, tag, markup, body): | |||
"""Write the body of a tag and the tokens that should surround it.""" | |||
self._emit(tokens.TagOpenOpen(wiki_markup=markup)) | |||
self._emit_text(tag) | |||
self._emit(tokens.TagCloseOpen()) | |||
self._emit_all(body) | |||
self._emit(tokens.TagOpenClose()) | |||
self._emit_text(tag) | |||
self._emit(tokens.TagCloseClose()) | |||
def _parse_italics(self): | |||
"""Parse wiki-style italics.""" | |||
reset = self._head | |||
try: | |||
stack = self._parse(contexts.STYLE_ITALICS) | |||
except BadRoute as route: | |||
self._head = reset | |||
if route.context & contexts.STYLE_PASS_AGAIN: | |||
stack = self._parse(route.context | contexts.STYLE_SECOND_PASS) | |||
else: | |||
return self._emit_text("''") | |||
self._emit_style_tag("i", "''", stack) | |||
def _parse_bold(self): | |||
"""Parse wiki-style bold.""" | |||
reset = self._head | |||
try: | |||
stack = self._parse(contexts.STYLE_BOLD) | |||
except BadRoute: | |||
self._head = reset | |||
if self._context & contexts.STYLE_SECOND_PASS: | |||
self._emit_text("'") | |||
return True | |||
elif self._context & contexts.STYLE_ITALICS: | |||
self._context |= contexts.STYLE_PASS_AGAIN | |||
self._emit_text("'''") | |||
else: | |||
self._emit_text("'") | |||
self._parse_italics() | |||
else: | |||
self._emit_style_tag("b", "'''", stack) | |||
def _parse_italics_and_bold(self): | |||
"""Parse wiki-style italics and bold together (i.e., five ticks).""" | |||
reset = self._head | |||
try: | |||
stack = self._parse(contexts.STYLE_BOLD) | |||
except BadRoute: | |||
self._head = reset | |||
try: | |||
stack = self._parse(contexts.STYLE_ITALICS) | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("'''''") | |||
else: | |||
reset = self._head | |||
try: | |||
stack2 = self._parse(contexts.STYLE_BOLD) | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("'''") | |||
self._emit_style_tag("i", "''", stack) | |||
else: | |||
self._push() | |||
self._emit_style_tag("i", "''", stack) | |||
self._emit_all(stack2) | |||
self._emit_style_tag("b", "'''", self._pop()) | |||
else: | |||
reset = self._head | |||
try: | |||
stack2 = self._parse(contexts.STYLE_ITALICS) | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("''") | |||
self._emit_style_tag("b", "'''", stack) | |||
else: | |||
self._push() | |||
self._emit_style_tag("b", "'''", stack) | |||
self._emit_all(stack2) | |||
self._emit_style_tag("i", "''", self._pop()) | |||
def _parse_style(self): | |||
"""Parse wiki-style formatting (``''``/``'''`` for italics/bold).""" | |||
self._head += 2 | |||
ticks = 2 | |||
while self._read() == "'": | |||
self._head += 1 | |||
ticks += 1 | |||
italics = self._context & contexts.STYLE_ITALICS | |||
bold = self._context & contexts.STYLE_BOLD | |||
if ticks > 5: | |||
self._emit_text("'" * (ticks - 5)) | |||
ticks = 5 | |||
elif ticks == 4: | |||
self._emit_text("'") | |||
ticks = 3 | |||
if (italics and ticks in (2, 5)) or (bold and ticks in (3, 5)): | |||
if ticks == 5: | |||
self._head -= 3 if italics else 2 | |||
return self._pop() | |||
elif not self._can_recurse(): | |||
if ticks == 3: | |||
if self._context & contexts.STYLE_SECOND_PASS: | |||
self._emit_text("'") | |||
return self._pop() | |||
self._context |= contexts.STYLE_PASS_AGAIN | |||
self._emit_text("'" * ticks) | |||
elif ticks == 2: | |||
self._parse_italics() | |||
elif ticks == 3: | |||
if self._parse_bold(): | |||
return self._pop() | |||
elif ticks == 5: | |||
self._parse_italics_and_bold() | |||
self._head -= 1 | |||
def _handle_list_marker(self): | |||
"""Handle a list marker at the head (``#``, ``*``, ``;``, ``:``).""" | |||
markup = self._read() | |||
if markup == ";": | |||
self._context |= contexts.DL_TERM | |||
self._emit(tokens.TagOpenOpen(wiki_markup=markup)) | |||
self._emit_text(get_html_tag(markup)) | |||
self._emit(tokens.TagCloseSelfclose()) | |||
def _handle_list(self): | |||
"""Handle a wiki-style list (``#``, ``*``, ``;``, ``:``).""" | |||
self._handle_list_marker() | |||
while self._read(1) in ("#", "*", ";", ":"): | |||
self._head += 1 | |||
self._handle_list_marker() | |||
def _handle_hr(self): | |||
"""Handle a wiki-style horizontal rule (``----``) in the string.""" | |||
length = 4 | |||
self._head += 3 | |||
while self._read(1) == "-": | |||
length += 1 | |||
self._head += 1 | |||
self._emit(tokens.TagOpenOpen(wiki_markup="-" * length)) | |||
self._emit_text("hr") | |||
self._emit(tokens.TagCloseSelfclose()) | |||
def _handle_dl_term(self): | |||
"""Handle the term in a description list (``foo`` in ``;foo:bar``).""" | |||
self._context ^= contexts.DL_TERM | |||
if self._read() == ":": | |||
self._handle_list_marker() | |||
else: | |||
self._emit_text("\n") | |||
def _handle_end(self): | |||
"""Handle the end of the stream of wikitext.""" | |||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | contexts.WIKILINK | | |||
contexts.HEADING | contexts.COMMENT | contexts.TAG) | |||
contexts.HEADING | contexts.COMMENT | contexts.TAG | | |||
contexts.STYLE) | |||
double_fail = (contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE) | |||
if self._context & fail: | |||
if self._context & contexts.TAG_BODY: | |||
@@ -782,6 +939,19 @@ class Tokenizer(object): | |||
self._emit_text("<") | |||
elif this == ">" and self._context & contexts.TAG_CLOSE: | |||
return self._handle_tag_close_close() | |||
elif this == next == "'": | |||
result = self._parse_style() | |||
if result is not None: | |||
return result | |||
elif self._read(-1) in ("\n", self.START): | |||
if this in ("#", "*", ";", ":"): | |||
self._handle_list() | |||
elif this == next == self._read(2) == self._read(3) == "-": | |||
self._handle_hr() | |||
else: | |||
self._emit_text(this) | |||
elif this in ("\n", ":") and self._context & contexts.DL_TERM: | |||
self._handle_dl_term() | |||
else: | |||
self._emit_text(this) | |||
self._head += 1 | |||
@@ -55,7 +55,7 @@ class Token(object): | |||
return False | |||
def __getattr__(self, key): | |||
return self._kwargs[key] | |||
return self._kwargs.get(key) | |||
def __setattr__(self, key, value): | |||
self._kwargs[key] = value | |||
@@ -63,10 +63,6 @@ class Token(object): | |||
def __delattr__(self, key): | |||
del self._kwargs[key] | |||
def get(self, key, default=None): | |||
"""Same as :py:meth:`__getattr__`, but has a *default* if missing.""" | |||
return self._kwargs.get(key, default) | |||
def make(name): | |||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | |||
@@ -24,7 +24,7 @@ | |||
from __future__ import unicode_literals | |||
__all__ = ["get_wikicode", "is_parsable", "is_visible", "is_single", | |||
__all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", | |||
"is_single_only"] | |||
PARSER_BLACKLIST = [ | |||
@@ -44,20 +44,16 @@ INVISIBLE_TAGS = [ | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | |||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd"] | |||
WIKICODE = { | |||
"i": {"open": "''", "close": "''"}, | |||
"b": {"open": "'''", "close": "'''"}, | |||
"ul": {"open": "*"}, | |||
"ol": {"open": "#"}, | |||
"dt": {"open": ";"}, | |||
"dd": {"open": ":"}, | |||
"hr": {"open": "----"}, | |||
MARKUP_TO_HTML = { | |||
"#": "li", | |||
"*": "li", | |||
";": "dt", | |||
":": "dd" | |||
} | |||
def get_wikicode(tag): | |||
"""Return the appropriate wikicode before and after the given *tag*.""" | |||
data = WIKICODE[tag.lower()] | |||
return (data.get("open"), data.get("close")) | |||
def get_html_tag(markup): | |||
"""Return the HTML tag associated with the given wiki-markup.""" | |||
return MARKUP_TO_HTML[markup] | |||
def is_parsable(tag): | |||
"""Return if the given *tag*'s contents should be passed to the parser.""" | |||
@@ -106,7 +106,7 @@ class TreeEqualityTestCase(TestCase): | |||
self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | |||
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | |||
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | |||
self.assertIs(expected.showtag, actual.showtag) | |||
self.assertIs(expected.wiki_markup, actual.wiki_markup) | |||
self.assertIs(expected.self_closing, actual.self_closing) | |||
self.assertIs(expected.invalid, actual.invalid) | |||
self.assertIs(expected.implicit, actual.implicit) | |||
@@ -303,6 +303,20 @@ class TestBuilder(TreeEqualityTestCase): | |||
Text(" "), Wikilink(wraptext("q")), Text(" "), | |||
Template(wraptext("r"))]), True, " \n ", " ", | |||
" ")])])), | |||
# "''italic text''" | |||
([tokens.TagOpenOpen(wiki_markup="''"), tokens.Text(text="i"), | |||
tokens.TagCloseOpen(), tokens.Text(text="italic text"), | |||
tokens.TagOpenClose(), tokens.Text(text="i"), | |||
tokens.TagCloseClose()], | |||
wrap([Tag(wraptext("i"), wraptext("italic text"), | |||
wiki_markup="''")])), | |||
# * bullet | |||
([tokens.TagOpenOpen(wiki_markup="*"), tokens.Text(text="li"), | |||
tokens.TagCloseSelfclose(), tokens.Text(text=" bullet")], | |||
wrap([Tag(wraptext("li"), wiki_markup="*", self_closing=True), | |||
Text(" bullet")])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
@@ -50,8 +50,8 @@ class TestTag(TreeEqualityTestCase): | |||
implicit=True) | |||
node7 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||
padding=" ") | |||
node8 = Tag(wraptext("hr"), showtag=False, self_closing=True) | |||
node9 = Tag(wraptext("i"), wraptext("italics!"), showtag=False) | |||
node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True) | |||
node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''") | |||
self.assertEqual("<ref></ref>", str(node1)) | |||
self.assertEqual('<span style="color: red;">foo</span>', str(node2)) | |||
@@ -72,7 +72,7 @@ class TestTag(TreeEqualityTestCase): | |||
# <ref>foobar</ref> | |||
node1 = Tag(wrap([node1n1]), wrap([node1n2])) | |||
# '''bold text''' | |||
node2 = Tag(wraptext("i"), wrap([node2n1]), showtag=False) | |||
node2 = Tag(wraptext("b"), wrap([node2n1]), wiki_markup="'''") | |||
# <img id="foo" class="bar" /> | |||
node3 = Tag(wrap([node3n1]), | |||
attrs=[Attribute(wrap([node3n2]), wrap([node3n3])), | |||
@@ -158,15 +158,15 @@ class TestTag(TreeEqualityTestCase): | |||
self.assertEqual([], node1.attributes) | |||
self.assertIs(attrs, node2.attributes) | |||
def test_showtag(self): | |||
"""test getter/setter for the showtag attribute""" | |||
def test_wiki_markup(self): | |||
"""test getter/setter for the wiki_markup attribute""" | |||
node = Tag(wraptext("i"), wraptext("italic text")) | |||
self.assertTrue(node.showtag) | |||
node.showtag = False | |||
self.assertFalse(node.showtag) | |||
self.assertIs(None, node.wiki_markup) | |||
node.wiki_markup = "''" | |||
self.assertEqual("''", node.wiki_markup) | |||
self.assertEqual("''italic text''", node) | |||
node.showtag = 1 | |||
self.assertTrue(node.showtag) | |||
node.wiki_markup = False | |||
self.assertFalse(node.wiki_markup) | |||
self.assertEqual("<i>italic text</i>", node) | |||
def test_self_closing(self): | |||
@@ -44,8 +44,8 @@ class TestTokens(unittest.TestCase): | |||
self.assertEqual("bar", token2.foo) | |||
self.assertEqual(123, token2.baz) | |||
self.assertRaises(KeyError, lambda: token1.foo) | |||
self.assertRaises(KeyError, lambda: token2.bar) | |||
self.assertFalse(token1.foo) | |||
self.assertFalse(token2.bar) | |||
token1.spam = "eggs" | |||
token2.foo = "ham" | |||
@@ -53,7 +53,7 @@ class TestTokens(unittest.TestCase): | |||
self.assertEqual("eggs", token1.spam) | |||
self.assertEqual("ham", token2.foo) | |||
self.assertRaises(KeyError, lambda: token2.baz) | |||
self.assertFalse(token2.baz) | |||
self.assertRaises(KeyError, delattr, token2, "baz") | |||
def test_repr(self): | |||
@@ -0,0 +1,516 @@ | |||
name: basic_italics | |||
label: basic italic text | |||
input: "''text''" | |||
output: [TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="text"), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: basic_bold | |||
label: basic bold text | |||
input: "'''text'''" | |||
output: [TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="text"), TagOpenClose(), Text(text="b"), TagCloseClose()] | |||
--- | |||
name: basic_ul | |||
label: basic unordered list | |||
input: "*text" | |||
output: [TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="text")] | |||
--- | |||
name: basic_ol | |||
label: basic ordered list | |||
input: "#text" | |||
output: [TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="text")] | |||
--- | |||
name: basic_dt | |||
label: basic description term | |||
input: ";text" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="text")] | |||
--- | |||
name: basic_dd | |||
label: basic description item | |||
input: ":text" | |||
output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="text")] | |||
--- | |||
name: basic_hr | |||
label: basic horizontal rule | |||
input: "----" | |||
output: [TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose()] | |||
--- | |||
name: complex_italics | |||
label: italics with a lot in them | |||
input: "''this is a test of [[Italic text|italics]] with {{plenty|of|stuff}}''" | |||
output: [TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="this is a"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="test of "), WikilinkOpen(), Text(text="Italic text"), WikilinkSeparator(), Text(text="italics"), WikilinkClose(), Text(text=" with "), TemplateOpen(), Text(text="plenty"), TemplateParamSeparator(), Text(text="of"), TemplateParamSeparator(), Text(text="stuff"), TemplateClose(), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: multiline_italics | |||
label: italics spanning mulitple lines | |||
input: "foo\nbar''testing\ntext\nspanning\n\n\n\n\nmultiple\nlines''foo\n\nbar" | |||
output: [Text(text="foo\nbar"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="testing\ntext\nspanning\n\n\n\n\nmultiple\nlines"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="foo\n\nbar")] | |||
--- | |||
name: unending_italics | |||
label: italics without an ending tag | |||
input: "''unending formatting!" | |||
output: [Text(text="''unending formatting!")] | |||
--- | |||
name: misleading_italics_end | |||
label: italics with something that looks like an end but isn't | |||
input: "''this is 'not' the en'd'<nowiki>''</nowiki>" | |||
output: [Text(text="''this is 'not' the en'd'"), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="''"), TagOpenClose(), Text(text="nowiki"), TagCloseClose()] | |||
] | |||
--- | |||
name: italics_start_outside_end_inside | |||
label: italics that start outside a link and end inside it | |||
input: "''foo[[bar|baz'']]spam" | |||
output: [Text(text="''foo"), WikilinkOpen(), Text(text="bar"), WikilinkSeparator(), Text(text="baz''"), WikilinkClose(), Text(text="spam")] | |||
--- | |||
name: italics_start_inside_end_outside | |||
label: italics that start inside a link and end outside it | |||
input: "[[foo|''bar]]baz''spam" | |||
output: [Text(text="[[foo|"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar]]baz"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="spam")] | |||
--- | |||
name: complex_bold | |||
label: bold with a lot in it | |||
input: "'''this is a test of [[Bold text|bold]] with {{plenty|of|stuff}}'''" | |||
output: [TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="this is a"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="test of "), WikilinkOpen(), Text(text="Bold text"), WikilinkSeparator(), Text(text="bold"), WikilinkClose(), Text(text=" with "), TemplateOpen(), Text(text="plenty"), TemplateParamSeparator(), Text(text="of"), TemplateParamSeparator(), Text(text="stuff"), TemplateClose(), TagOpenClose(), Text(text="b"), TagCloseClose()] | |||
--- | |||
name: multiline_bold | |||
label: bold spanning mulitple lines | |||
input: "foo\nbar'''testing\ntext\nspanning\n\n\n\n\nmultiple\nlines'''foo\n\nbar" | |||
output: [Text(text="foo\nbar"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="testing\ntext\nspanning\n\n\n\n\nmultiple\nlines"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="foo\n\nbar")] | |||
--- | |||
name: unending_bold | |||
label: bold without an ending tag | |||
input: "'''unending formatting!" | |||
output: [Text(text="'''unending formatting!")] | |||
--- | |||
name: misleading_bold_end | |||
label: bold with something that looks like an end but isn't | |||
input: "'''this is 'not' the en''d'<nowiki>'''</nowiki>" | |||
output: [Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="this is 'not' the en"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="d'"), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="'''"), TagOpenClose(), Text(text="nowiki"), TagCloseClose()] | |||
--- | |||
name: bold_start_outside_end_inside | |||
label: bold that start outside a link and end inside it | |||
input: "'''foo[[bar|baz''']]spam" | |||
output: [Text(text="'''foo"), WikilinkOpen(), Text(text="bar"), WikilinkSeparator(), Text(text="baz'''"), WikilinkClose(), Text(text="spam")] | |||
--- | |||
name: bold_start_inside_end_outside | |||
label: bold that start inside a link and end outside it | |||
input: "[[foo|'''bar]]baz'''spam" | |||
output: [Text(text="[[foo|"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="bar]]baz"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="spam")] | |||
--- | |||
name: bold_and_italics | |||
label: bold and italics together | |||
input: "this is '''''bold and italic text'''''!" | |||
output: [Text(text="this is "), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="bold and italic text"), TagOpenClose(), Text(text="b"), TagCloseClose(), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="!")] | |||
--- | |||
name: both_then_bold | |||
label: text that starts bold/italic, then is just bold | |||
input: "'''''both''bold'''" | |||
output: [TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="both"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="bold"), TagOpenClose(), Text(text="b"), TagCloseClose()] | |||
--- | |||
name: both_then_italics | |||
label: text that starts bold/italic, then is just italic | |||
input: "'''''both'''italics''" | |||
output: [TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="both"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="italics"), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: bold_then_both | |||
label: text that starts just bold, then is bold/italic | |||
input: "'''bold''both'''''" | |||
output: [TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="bold"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="both"), TagOpenClose(), Text(text="i"), TagCloseClose(), TagOpenClose(), Text(text="b"), TagCloseClose()] | |||
--- | |||
name: italics_then_both | |||
label: text that starts just italic, then is bold/italic | |||
input: "''italics'''both'''''" | |||
output: [TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="italics"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="both"), TagOpenClose(), Text(text="b"), TagCloseClose(), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: italics_then_bold | |||
label: text that starts italic, then is bold | |||
input: "none''italics'''''bold'''none" | |||
output: [Text(text="none"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="italics"), TagOpenClose(), Text(text="i"), TagCloseClose(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="bold"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="none")] | |||
--- | |||
name: bold_then_italics | |||
label: text that starts bold, then is italic | |||
input: "none'''bold'''''italics''none" | |||
output: [Text(text="none"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="bold"), TagOpenClose(), Text(text="b"), TagCloseClose(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="italics"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text="none")] | |||
--- | |||
name: five_three | |||
label: five ticks to open, three to close (bold) | |||
input: "'''''foobar'''" | |||
output: [Text(text="''"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="foobar"), TagOpenClose(), Text(text="b"), TagCloseClose()] | |||
--- | |||
name: five_two | |||
label: five ticks to open, two to close (bold) | |||
input: "'''''foobar''" | |||
output: [Text(text="'''"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="foobar"), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: four | |||
label: four ticks | |||
input: "foo ''''bar'''' baz" | |||
output: [Text(text="foo '"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="bar'"), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text=" baz")] | |||
--- | |||
name: four_two | |||
label: four ticks to open, two to close | |||
input: "foo ''''bar'' baz" | |||
output: [Text(text="foo ''"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text=" baz")] | |||
--- | |||
name: two_three | |||
label: two ticks to open, three to close | |||
input: "foo ''bar''' baz" | |||
output: [Text(text="foo "), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar'"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text=" baz")] | |||
--- | |||
name: two_four | |||
label: two ticks to open, four to close | |||
input: "foo ''bar'''' baz" | |||
output: [Text(text="foo "), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar''"), TagOpenClose(), Text(text="i"), TagCloseClose(), Text(text=" baz")] | |||
--- | |||
name: two_three_two | |||
label: two ticks to open, three to close, two afterwards | |||
input: "foo ''bar''' baz''" | |||
output: [Text(text="foo "), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar''' baz"), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: two_four_four | |||
label: two ticks to open, four to close, four afterwards | |||
input: "foo ''bar'''' baz''''" | |||
output: [Text(text="foo ''bar'"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text=" baz'"), TagOpenClose(), Text(text="b"), TagCloseClose()] | |||
--- | |||
name: seven | |||
label: seven ticks | |||
input: "'''''''seven'''''''" | |||
output: [Text(text="''"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="seven''"), TagOpenClose(), Text(text="b"), TagCloseClose(), TagOpenClose(), Text(text="i"), TagCloseClose()] | |||
--- | |||
name: complex_ul | |||
label: ul with a lot in it | |||
input: "* this is a test of an [[Unordered list|ul]] with {{plenty|of|stuff}}" | |||
output: [TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text=" this is a"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="test of an "), WikilinkOpen(), Text(text="Unordered list"), WikilinkSeparator(), Text(text="ul"), WikilinkClose(), Text(text=" with "), TemplateOpen(), Text(text="plenty"), TemplateParamSeparator(), Text(text="of"), TemplateParamSeparator(), Text(text="stuff"), TemplateClose()] | |||
--- | |||
name: ul_multiline_template | |||
label: ul with a template that spans multiple lines | |||
input: "* this has a template with a {{line|\nbreak}}\nthis is not part of the list" | |||
output: [TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text=" this has a template with a "), TemplateOpen(), Text(text="line"), TemplateParamSeparator(), Text(text="\nbreak"), TemplateClose(), Text(text="\nthis is not part of the list")] | |||
--- | |||
name: ul_adjacent | |||
label: multiple adjacent uls | |||
input: "a\n*b\n*c\nd\n*e\nf" | |||
output: [Text(text="a\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="c\nd\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="e\nf")] | |||
--- | |||
name: ul_depths | |||
label: multiple adjacent uls, with differing depths | |||
input: "*a\n**b\n***c\n********d\n**e\nf\n***g" | |||
output: [TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="a\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="c\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="d\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="e\nf\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="g")] | |||
--- | |||
name: ul_space_before | |||
label: uls with space before them | |||
input: "foo *bar\n *baz\n*buzz" | |||
output: [Text(text="foo *bar\n *baz\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="buzz")] | |||
--- | |||
name: ul_interruption | |||
label: high-depth ul with something blocking it | |||
input: "**f*oobar" | |||
output: [TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="f*oobar")] | |||
--- | |||
name: complex_ol | |||
label: ol with a lot in it | |||
input: "# this is a test of an [[Ordered list|ol]] with {{plenty|of|stuff}}" | |||
output: [TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text=" this is a"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="test of an "), WikilinkOpen(), Text(text="Ordered list"), WikilinkSeparator(), Text(text="ol"), WikilinkClose(), Text(text=" with "), TemplateOpen(), Text(text="plenty"), TemplateParamSeparator(), Text(text="of"), TemplateParamSeparator(), Text(text="stuff"), TemplateClose()] | |||
--- | |||
name: ol_multiline_template | |||
label: ol with a template that spans moltiple lines | |||
input: "# this has a template with a {{line|\nbreak}}\nthis is not part of the list" | |||
output: [TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text=" this has a template with a "), TemplateOpen(), Text(text="line"), TemplateParamSeparator(), Text(text="\nbreak"), TemplateClose(), Text(text="\nthis is not part of the list")] | |||
--- | |||
name: ol_adjacent | |||
label: moltiple adjacent ols | |||
input: "a\n#b\n#c\nd\n#e\nf" | |||
output: [Text(text="a\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="c\nd\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="e\nf")] | |||
--- | |||
name: ol_depths | |||
label: moltiple adjacent ols, with differing depths | |||
input: "#a\n##b\n###c\n########d\n##e\nf\n###g" | |||
output: [TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="a\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="c\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="d\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="e\nf\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="g")] | |||
--- | |||
name: ol_space_before | |||
label: ols with space before them | |||
input: "foo #bar\n #baz\n#buzz" | |||
output: [Text(text="foo #bar\n #baz\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="buzz")] | |||
--- | |||
name: ol_interruption | |||
label: high-depth ol with something blocking it | |||
input: "##f#oobar" | |||
output: [TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="f#oobar")] | |||
--- | |||
name: ul_ol_mix | |||
label: a mix of adjacent uls and ols | |||
input: "*a\n*#b\n*##c\n*##*#*#*d\n*#e\nf\n##*g" | |||
output: [TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="a\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="c\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="d\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="e\nf\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="g")] | |||
--- | |||
name: complex_dt | |||
label: dt with a lot in it | |||
input: "; this is a test of an [[description term|dt]] with {{plenty|of|stuff}}" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text=" this is a"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="test of an "), WikilinkOpen(), Text(text="description term"), WikilinkSeparator(), Text(text="dt"), WikilinkClose(), Text(text=" with "), TemplateOpen(), Text(text="plenty"), TemplateParamSeparator(), Text(text="of"), TemplateParamSeparator(), Text(text="stuff"), TemplateClose()] | |||
--- | |||
name: dt_multiline_template | |||
label: dt with a template that spans mdttiple lines | |||
input: "; this has a template with a {{line|\nbreak}}\nthis is not part of the list" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text=" this has a template with a "), TemplateOpen(), Text(text="line"), TemplateParamSeparator(), Text(text="\nbreak"), TemplateClose(), Text(text="\nthis is not part of the list")] | |||
--- | |||
name: dt_adjacent | |||
label: mdttiple adjacent dts | |||
input: "a\n;b\n;c\nd\n;e\nf" | |||
output: [Text(text="a\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="c\nd\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="e\nf")] | |||
--- | |||
name: dt_depths | |||
label: mdttiple adjacent dts, with differing depths | |||
input: ";a\n;;b\n;;;c\n;;;;;;;;d\n;;e\nf\n;;;g" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="a\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="c\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="d\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="e\nf\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="g")] | |||
--- | |||
name: dt_space_before | |||
label: dts with space before them | |||
input: "foo ;bar\n ;baz\n;buzz" | |||
output: [Text(text="foo ;bar\n ;baz\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="buzz")] | |||
--- | |||
name: dt_interruption | |||
label: high-depth dt with something blocking it | |||
input: ";;f;oobar" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="f;oobar")] | |||
--- | |||
name: complex_dd | |||
label: dd with a lot in it | |||
input: ": this is a test of an [[description item|dd]] with {{plenty|of|stuff}}" | |||
output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text=" this is a"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="test of an "), WikilinkOpen(), Text(text="description item"), WikilinkSeparator(), Text(text="dd"), WikilinkClose(), Text(text=" with "), TemplateOpen(), Text(text="plenty"), TemplateParamSeparator(), Text(text="of"), TemplateParamSeparator(), Text(text="stuff"), TemplateClose()] | |||
--- | |||
name: dd_multiline_template | |||
label: dd with a template that spans mddtiple lines | |||
input: ": this has a template with a {{line|\nbreak}}\nthis is not part of the list" | |||
output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text=" this has a template with a "), TemplateOpen(), Text(text="line"), TemplateParamSeparator(), Text(text="\nbreak"), TemplateClose(), Text(text="\nthis is not part of the list")] | |||
--- | |||
name: dd_adjacent | |||
label: mddtiple adjacent dds | |||
input: "a\n:b\n:c\nd\n:e\nf" | |||
output: [Text(text="a\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="c\nd\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="e\nf")] | |||
--- | |||
name: dd_depths | |||
label: mddtiple adjacent dds, with differing depths | |||
input: ":a\n::b\n:::c\n::::::::d\n::e\nf\n:::g" | |||
output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="a\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="c\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="d\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="e\nf\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="g")] | |||
--- | |||
name: dd_space_before | |||
label: dds with space before them | |||
input: "foo :bar\n :baz\n:buzz" | |||
output: [Text(text="foo :bar\n :baz\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="buzz")] | |||
--- | |||
name: dd_interruption | |||
label: high-depth dd with something blocking it | |||
input: "::f:oobar" | |||
output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="f:oobar")] | |||
--- | |||
name: dt_dd_mix | |||
label: a mix of adjacent dts and dds | |||
input: ";a\n;:b\n;::c\n;::;:;:;d\n;:e\nf\n::;g" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="a\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="b\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="c\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="d\n"), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="e\nf\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="g")] | |||
--- | |||
name: dt_dd_mix2 | |||
label: the correct usage of a dt/dd unit, as in a dl | |||
input: ";foo:bar:baz" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="foo"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="bar:baz")] | |||
--- | |||
name: dt_dd_mix3 | |||
label: another example of correct (but strange) dt/dd usage | |||
input: ":;;::foo:bar:baz" | |||
output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="foo"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="bar:baz")] | |||
--- | |||
name: ul_ol_dt_dd_mix | |||
label: an assortment of uls, ols, dds, and dts | |||
input: ";:#*foo\n:#*;foo\n#*;:foo\n*;:#foo" | |||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), Text(text="foo\n"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="foo\n"), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="foo\n"), TagOpenOpen(wiki_markup="*"), Text(text="li"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), TagOpenOpen(wiki_markup="#"), Text(text="li"), TagCloseSelfclose(), Text(text="foo")] | |||
--- | |||
name: hr_text_before | |||
label: text before an otherwise-valid hr | |||
input: "foo----" | |||
output: [Text(text="foo----")] | |||
--- | |||
name: hr_text_after | |||
label: text after a valid hr | |||
input: "----bar" | |||
output: [TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose(), Text(text="bar")] | |||
--- | |||
name: hr_text_before_after | |||
label: text at both ends of an otherwise-valid hr | |||
input: "foo----bar" | |||
output: [Text(text="foo----bar")] | |||
--- | |||
name: hr_newlines | |||
label: newlines surrounding a valid hr | |||
input: "foo\n----\nbar" | |||
output: [Text(text="foo\n"), TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose(), Text(text="\nbar")] | |||
--- | |||
name: hr_adjacent | |||
label: two adjacent hrs | |||
input: "----\n----" | |||
output: [TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose(), Text(text="\n"), TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose()] | |||
--- | |||
name: hr_adjacent_space | |||
label: two adjacent hrs, with a space before the second one, making it invalid | |||
input: "----\n ----" | |||
output: [TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose(), Text(text="\n ----")] | |||
--- | |||
name: hr_short | |||
label: an invalid three-hyphen-long hr | |||
input: "---" | |||
output: [Text(text="---")] | |||
--- | |||
name: hr_long | |||
label: a very long, valid hr | |||
input: "------------------------------------------" | |||
output: [TagOpenOpen(wiki_markup="------------------------------------------"), Text(text="hr"), TagCloseSelfclose()] | |||
--- | |||
name: hr_interruption_short | |||
label: a hr that is interrupted, making it invalid | |||
input: "---x-" | |||
output: [Text(text="---x-")] | |||
--- | |||
name: hr_interruption_long | |||
label: a hr that is interrupted, but the first part remains valid because it is long enough | |||
input: "----x--" | |||
output: [TagOpenOpen(wiki_markup="----"), Text(text="hr"), TagCloseSelfclose(), Text(text="x--")] |