@@ -46,6 +46,7 @@ nodes Package | |||
.. automodule:: mwparserfromhell.nodes.tag | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`template` Module | |||
@@ -30,6 +30,12 @@ mwparserfromhell Package | |||
:members: | |||
:undoc-members: | |||
:mod:`tag_defs` Module | |||
---------------------- | |||
.. automodule:: mwparserfromhell.tag_defs | |||
:members: | |||
:mod:`utils` Module | |||
------------------- | |||
@@ -36,18 +36,23 @@ class Attribute(StringMixIn): | |||
whose value is ``"foo"``. | |||
""" | |||
def __init__(self, name, value=None, quoted=True): | |||
def __init__(self, name, value=None, quoted=True, pad_first="", | |||
pad_before_eq="", pad_after_eq=""): | |||
super(Attribute, self).__init__() | |||
self._name = name | |||
self._value = value | |||
self._quoted = quoted | |||
self._pad_first = pad_first | |||
self._pad_before_eq = pad_before_eq | |||
self._pad_after_eq = pad_after_eq | |||
def __unicode__(self): | |||
base = self.pad_first + str(self.name) + self.pad_before_eq | |||
if self.value: | |||
if self.quoted: | |||
return str(self.name) + '="' + str(self.value) + '"' | |||
return str(self.name) + "=" + str(self.value) | |||
return str(self.name) | |||
return base + '="' + self.pad_after_eq + str(self.value) + '"' | |||
return base + "=" + self.pad_after_eq + str(self.value) | |||
return base | |||
@property | |||
def name(self): | |||
@@ -64,14 +69,41 @@ class Attribute(StringMixIn): | |||
"""Whether the attribute's value is quoted with double quotes.""" | |||
return self._quoted | |||
@property | |||
def pad_first(self): | |||
"""Spacing to insert right before the attribute.""" | |||
return self._pad_first | |||
@property | |||
def pad_before_eq(self): | |||
"""Spacing to insert right before the equal sign.""" | |||
return self._pad_before_eq | |||
@property | |||
def pad_after_eq(self): | |||
"""Spacing to insert right after the equal sign.""" | |||
return self._pad_after_eq | |||
@name.setter | |||
def name(self, newval): | |||
self._name = parse_anything(newval) | |||
def name(self, value): | |||
self._name = parse_anything(value) | |||
@value.setter | |||
def value(self, newval): | |||
self._value = parse_anything(newval) | |||
@quoted.setter | |||
def quoted(self, newval): | |||
self._quoted = bool(newval) | |||
def quoted(self, value): | |||
self._quoted = bool(value) | |||
@pad_first.setter | |||
def pad_first(self, value): | |||
self._pad_first = str(value) | |||
@pad_before_eq.setter | |||
def pad_before_eq(self, value): | |||
self._pad_before_eq = str(value) | |||
@pad_after_eq.setter | |||
def pad_after_eq(self, value): | |||
self._pad_after_eq = str(value) |
@@ -24,6 +24,7 @@ from __future__ import unicode_literals | |||
from . import Node, Text | |||
from ..compat import str | |||
from ..tag_defs import get_wikicode, is_visible | |||
from ..utils import parse_anything | |||
__all__ = ["Tag"] | |||
@@ -31,79 +32,39 @@ __all__ = ["Tag"] | |||
class Tag(Node): | |||
"""Represents an HTML-style tag in wikicode, like ``<ref>``.""" | |||
TAG_UNKNOWN = 0 | |||
# Basic HTML: | |||
TAG_ITALIC = 1 | |||
TAG_BOLD = 2 | |||
TAG_UNDERLINE = 3 | |||
TAG_STRIKETHROUGH = 4 | |||
TAG_UNORDERED_LIST = 5 | |||
TAG_ORDERED_LIST = 6 | |||
TAG_DEF_TERM = 7 | |||
TAG_DEF_ITEM = 8 | |||
TAG_BLOCKQUOTE = 9 | |||
TAG_RULE = 10 | |||
TAG_BREAK = 11 | |||
TAG_ABBR = 12 | |||
TAG_PRE = 13 | |||
TAG_MONOSPACE = 14 | |||
TAG_CODE = 15 | |||
TAG_SPAN = 16 | |||
TAG_DIV = 17 | |||
TAG_FONT = 18 | |||
TAG_SMALL = 19 | |||
TAG_BIG = 20 | |||
TAG_CENTER = 21 | |||
# MediaWiki parser hooks: | |||
TAG_REF = 101 | |||
TAG_GALLERY = 102 | |||
TAG_MATH = 103 | |||
TAG_NOWIKI = 104 | |||
TAG_NOINCLUDE = 105 | |||
TAG_INCLUDEONLY = 106 | |||
TAG_ONLYINCLUDE = 107 | |||
# Additional parser hooks: | |||
TAG_SYNTAXHIGHLIGHT = 201 | |||
TAG_POEM = 202 | |||
# Lists of tags: | |||
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | |||
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | |||
def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | |||
self_closing=False, open_padding=0, close_padding=0): | |||
def __init__(self, tag, contents=None, attrs=None, showtag=True, | |||
self_closing=False, invalid=False, implicit=False, padding="", | |||
closing_tag=None): | |||
super(Tag, self).__init__() | |||
self._type = type_ | |||
self._tag = tag | |||
self._contents = contents | |||
if attrs: | |||
self._attrs = attrs | |||
else: | |||
self._attrs = [] | |||
self._attrs = attrs if attrs else [] | |||
self._showtag = showtag | |||
self._self_closing = self_closing | |||
self._open_padding = open_padding | |||
self._close_padding = close_padding | |||
self._invalid = invalid | |||
self._implicit = implicit | |||
self._padding = padding | |||
if closing_tag: | |||
self._closing_tag = closing_tag | |||
elif not self_closing: | |||
self._closing_tag = tag | |||
def __unicode__(self): | |||
if not self.showtag: | |||
open_, close = self._translate() | |||
open_, close = get_wikicode[self.tag] | |||
if self.self_closing: | |||
return open_ | |||
else: | |||
return open_ + str(self.contents) + close | |||
result = "<" + str(self.tag) | |||
if self.attrs: | |||
result += " " + " ".join([str(attr) for attr in self.attrs]) | |||
result = ("</" if self.invalid else "<") + str(self.tag) | |||
if self.attributes: | |||
result += "".join([str(attr) for attr in self.attributes]) | |||
if self.self_closing: | |||
result += " " * self.open_padding + "/>" | |||
result += self.padding + (">" if self.implicit else "/>") | |||
else: | |||
result += " " * self.open_padding + ">" + str(self.contents) | |||
result += "</" + str(self.tag) + " " * self.close_padding + ">" | |||
result += self.padding + ">" + str(self.contents) | |||
result += "</" + str(self.closing_tag) + ">" | |||
return result | |||
def __iternodes__(self, getter): | |||
@@ -111,66 +72,43 @@ class Tag(Node): | |||
if self.showtag: | |||
for child in getter(self.tag): | |||
yield self.tag, child | |||
for attr in self.attrs: | |||
for attr in self.attributes: | |||
for child in getter(attr.name): | |||
yield attr.name, child | |||
if attr.value: | |||
for child in getter(attr.value): | |||
yield attr.value, child | |||
for child in getter(self.contents): | |||
yield self.contents, child | |||
if self.contents: | |||
for child in getter(self.contents): | |||
yield self.contents, child | |||
if not self.self_closing and self.closing_tag: | |||
for child in getter(self.closing_tag): | |||
yield self.closing_tag, child | |||
def __strip__(self, normalize, collapse): | |||
if self.type in self.TAGS_VISIBLE: | |||
if is_visible(self.tag): | |||
return self.contents.strip_code(normalize, collapse) | |||
return None | |||
def __showtree__(self, write, get, mark): | |||
tagnodes = self.tag.nodes | |||
if (not self.attrs and len(tagnodes) == 1 and isinstance(tagnodes[0], Text)): | |||
write("<" + str(tagnodes[0]) + ">") | |||
write("</" if self.invalid else "<") | |||
get(self.tag) | |||
for attr in self.attributes: | |||
get(attr.name) | |||
if not attr.value: | |||
continue | |||
write(" = ") | |||
mark() | |||
get(attr.value) | |||
if self.self_closing: | |||
write(">" if self.implicit else "/>") | |||
else: | |||
write("<") | |||
get(self.tag) | |||
for attr in self.attrs: | |||
get(attr.name) | |||
if not attr.value: | |||
continue | |||
write(" = ") | |||
mark() | |||
get(attr.value) | |||
write(">") | |||
get(self.contents) | |||
if len(tagnodes) == 1 and isinstance(tagnodes[0], Text): | |||
write("</" + str(tagnodes[0]) + ">") | |||
else: | |||
get(self.contents) | |||
write("</") | |||
get(self.tag) | |||
get(self.closing_tag) | |||
write(">") | |||
def _translate(self): | |||
"""If the HTML-style tag has a wikicode representation, return that. | |||
For example, ``<b>Foo</b>`` can be represented as ``'''Foo'''``. This | |||
returns a tuple of the character starting the sequence and the | |||
character ending it. | |||
""" | |||
translations = { | |||
self.TAG_ITALIC: ("''", "''"), | |||
self.TAG_BOLD: ("'''", "'''"), | |||
self.TAG_UNORDERED_LIST: ("*", ""), | |||
self.TAG_ORDERED_LIST: ("#", ""), | |||
self.TAG_DEF_TERM: (";", ""), | |||
self.TAG_DEF_ITEM: (":", ""), | |||
self.TAG_RULE: ("----", ""), | |||
} | |||
return translations[self.type] | |||
@property | |||
def type(self): | |||
"""The tag type.""" | |||
return self._type | |||
@property | |||
def tag(self): | |||
"""The tag itself, as a :py:class:`~.Wikicode` object.""" | |||
@@ -182,7 +120,7 @@ class Tag(Node): | |||
return self._contents | |||
@property | |||
def attrs(self): | |||
def attributes(self): | |||
"""The list of attributes affecting the tag. | |||
Each attribute is an instance of :py:class:`~.Attribute`. | |||
@@ -196,29 +134,47 @@ class Tag(Node): | |||
@property | |||
def self_closing(self): | |||
"""Whether the tag is self-closing with no content.""" | |||
"""Whether the tag is self-closing with no content (like ``<br/>``).""" | |||
return self._self_closing | |||
@property | |||
def open_padding(self): | |||
"""How much spacing to insert before the first closing >.""" | |||
return self._open_padding | |||
def invalid(self): | |||
"""Whether the tag starts with a backslash after the opening bracket. | |||
This makes the tag look like a lone close tag. It is technically | |||
invalid and is only parsable Wikicode when the tag itself is | |||
single-only, like ``<br>`` and ``<img>``. See | |||
:py:func:`tag_defs.is_single_only`. | |||
""" | |||
return self._invalid | |||
@property | |||
def close_padding(self): | |||
"""How much spacing to insert before the last closing >.""" | |||
return self._close_padding | |||
def implicit(self): | |||
"""Whether the tag is implicitly self-closing, with no ending slash. | |||
@type.setter | |||
def type(self, value): | |||
value = int(value) | |||
if value not in self.TAGS_INVISIBLE | self.TAGS_VISIBLE: | |||
raise ValueError(value) | |||
self._type = value | |||
This is only possible for specific "single" tags like ``<br>`` and | |||
``<li>``. See :py:func:`tag_defs.is_single`. This field only has an | |||
effect if :py:attr:`self_closing` is also ``True``. | |||
""" | |||
return self._implicit | |||
@property | |||
def padding(self): | |||
"""Spacing to insert before the first closing ``>``.""" | |||
return self._padding | |||
@property | |||
def closing_tag(self): | |||
"""The closing tag, as a :py:class:`~.Wikicode` object. | |||
This will usually equal :py:attr:`tag`, unless there is additional | |||
spacing, comments, or the like. | |||
""" | |||
return self._closing_tag | |||
@tag.setter | |||
def tag(self, value): | |||
self._tag = parse_anything(value) | |||
self._tag = self._closing_tag = parse_anything(value) | |||
@contents.setter | |||
def contents(self, value): | |||
@@ -232,10 +188,18 @@ class Tag(Node): | |||
def self_closing(self, value): | |||
self._self_closing = bool(value) | |||
@open_padding.setter | |||
def open_padding(self, value): | |||
self._open_padding = int(value) | |||
@invalid.setter | |||
def invalid(self, value): | |||
self._invalid = bool(value) | |||
@implicit.setter | |||
def implicit(self, value): | |||
self._implicit = bool(value) | |||
@padding.setter | |||
def padding(self, value): | |||
self._padding = str(value) | |||
@close_padding.setter | |||
def close_padding(self, value): | |||
self._close_padding = int(value) | |||
@closing_tag.setter | |||
def closing_tag(self, value): | |||
self._closing_tag = parse_anything(value) |
@@ -170,7 +170,7 @@ class Builder(object): | |||
self._write(self._handle_token(token)) | |||
def _handle_comment(self): | |||
"""Handle a case where a hidden comment is at the head of the tokens.""" | |||
"""Handle a case where an HTML comment is at the head of the tokens.""" | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
@@ -180,7 +180,7 @@ class Builder(object): | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_attribute(self): | |||
def _handle_attribute(self, start): | |||
"""Handle a case where a tag attribute is at the head of the tokens.""" | |||
name, quoted = None, False | |||
self._push() | |||
@@ -191,37 +191,47 @@ class Builder(object): | |||
self._push() | |||
elif isinstance(token, tokens.TagAttrQuote): | |||
quoted = True | |||
elif isinstance(token, (tokens.TagAttrStart, | |||
tokens.TagCloseOpen)): | |||
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, | |||
tokens.TagCloseSelfclose)): | |||
self._tokens.append(token) | |||
if name is not None: | |||
return Attribute(name, self._pop(), quoted) | |||
return Attribute(self._pop(), quoted=quoted) | |||
if name: | |||
value = self._pop() | |||
else: | |||
name, value = self._pop(), None | |||
return Attribute(name, value, quoted, start.pad_first, | |||
start.pad_before_eq, start.pad_after_eq) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_tag(self, token): | |||
"""Handle a case where a tag is at the head of the tokens.""" | |||
type_, showtag = token.type, token.showtag | |||
attrs = [] | |||
close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) | |||
implicit, attrs, contents, closing_tag = False, [], None, None | |||
showtag = token.get("showtag", True) | |||
invalid = token.get("invalid", False) | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.TagAttrStart): | |||
attrs.append(self._handle_attribute()) | |||
attrs.append(self._handle_attribute(token)) | |||
elif isinstance(token, tokens.TagCloseOpen): | |||
open_pad = token.padding | |||
padding = token.padding | |||
tag = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.TagCloseSelfclose): | |||
tag = self._pop() | |||
return Tag(type_, tag, attrs=attrs, showtag=showtag, | |||
self_closing=True, open_padding=token.padding) | |||
elif isinstance(token, tokens.TagOpenClose): | |||
contents = self._pop() | |||
elif isinstance(token, tokens.TagCloseClose): | |||
return Tag(type_, tag, contents, attrs, showtag, False, | |||
open_pad, token.padding) | |||
self._push() | |||
elif isinstance(token, close_tokens): | |||
if isinstance(token, tokens.TagCloseSelfclose): | |||
tag = self._pop() | |||
self_closing = True | |||
padding = token.padding | |||
implicit = token.get("implicit", False) | |||
else: | |||
self_closing = False | |||
closing_tag = self._pop() | |||
return Tag(tag, contents, attrs, showtag, self_closing, | |||
invalid, implicit, padding, closing_tag) | |||
else: | |||
self._write(self._handle_token(token)) | |||
@@ -62,6 +62,13 @@ Local (stack-specific) contexts: | |||
* :py:const:`COMMENT` | |||
* :py:const:`TAG` | |||
* :py:const:`TAG_OPEN` | |||
* :py:const:`TAG_ATTR` | |||
* :py:const:`TAG_BODY` | |||
* :py:const:`TAG_CLOSE` | |||
* :py:const:`SAFETY_CHECK` | |||
* :py:const:`HAS_TEXT` | |||
@@ -78,37 +85,45 @@ Global contexts: | |||
# Local contexts: | |||
TEMPLATE = 0b00000000000000000111 | |||
TEMPLATE_NAME = 0b00000000000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000000000100 | |||
ARGUMENT = 0b00000000000000011000 | |||
ARGUMENT_NAME = 0b00000000000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000000000010000 | |||
WIKILINK = 0b00000000000001100000 | |||
WIKILINK_TITLE = 0b00000000000000100000 | |||
WIKILINK_TEXT = 0b00000000000001000000 | |||
HEADING = 0b00000001111110000000 | |||
HEADING_LEVEL_1 = 0b00000000000010000000 | |||
HEADING_LEVEL_2 = 0b00000000000100000000 | |||
HEADING_LEVEL_3 = 0b00000000001000000000 | |||
HEADING_LEVEL_4 = 0b00000000010000000000 | |||
HEADING_LEVEL_5 = 0b00000000100000000000 | |||
HEADING_LEVEL_6 = 0b00000001000000000000 | |||
COMMENT = 0b00000010000000000000 | |||
SAFETY_CHECK = 0b11111100000000000000 | |||
HAS_TEXT = 0b00000100000000000000 | |||
FAIL_ON_TEXT = 0b00001000000000000000 | |||
FAIL_NEXT = 0b00010000000000000000 | |||
FAIL_ON_LBRACE = 0b00100000000000000000 | |||
FAIL_ON_RBRACE = 0b01000000000000000000 | |||
FAIL_ON_EQUALS = 0b10000000000000000000 | |||
TEMPLATE_NAME = 1 << 0 | |||
TEMPLATE_PARAM_KEY = 1 << 1 | |||
TEMPLATE_PARAM_VALUE = 1 << 2 | |||
TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE | |||
ARGUMENT_NAME = 1 << 3 | |||
ARGUMENT_DEFAULT = 1 << 4 | |||
ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT | |||
WIKILINK_TITLE = 1 << 5 | |||
WIKILINK_TEXT = 1 << 6 | |||
WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | |||
HEADING_LEVEL_1 = 1 << 7 | |||
HEADING_LEVEL_2 = 1 << 8 | |||
HEADING_LEVEL_3 = 1 << 9 | |||
HEADING_LEVEL_4 = 1 << 10 | |||
HEADING_LEVEL_5 = 1 << 11 | |||
HEADING_LEVEL_6 = 1 << 12 | |||
HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + | |||
HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) | |||
COMMENT = 1 << 13 | |||
TAG_OPEN = 1 << 14 | |||
TAG_ATTR = 1 << 15 | |||
TAG_BODY = 1 << 16 | |||
TAG_CLOSE = 1 << 17 | |||
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | |||
HAS_TEXT = 1 << 18 | |||
FAIL_ON_TEXT = 1 << 19 | |||
FAIL_NEXT = 1 << 20 | |||
FAIL_ON_LBRACE = 1 << 21 | |||
FAIL_ON_RBRACE = 1 << 22 | |||
FAIL_ON_EQUALS = 1 << 23 | |||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | |||
# Global contexts: | |||
GL_HEADING = 0b1 | |||
GL_HEADING = 1 << 0 |
@@ -41,10 +41,10 @@ SOFTWARE. | |||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||
static const char* MARKERS[] = { | |||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", | |||
"!", "\n", ""}; | |||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", | |||
"\n", ""}; | |||
#define NUM_MARKERS 18 | |||
#define NUM_MARKERS 17 | |||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||
#define MAX_DEPTH 40 | |||
#define MAX_CYCLES 100000 | |||
@@ -60,10 +60,10 @@ static char** entitydefs; | |||
static PyObject* EMPTY; | |||
static PyObject* NOARGS; | |||
static PyObject* tokens; | |||
static PyObject* tag_defs; | |||
/* Tokens */ | |||
/* Tokens: */ | |||
static PyObject* Text; | |||
@@ -102,41 +102,58 @@ static PyObject* TagCloseClose; | |||
/* Local contexts: */ | |||
#define LC_TEMPLATE 0x00007 | |||
#define LC_TEMPLATE_NAME 0x00001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x00002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x00004 | |||
#define LC_ARGUMENT 0x00018 | |||
#define LC_ARGUMENT_NAME 0x00008 | |||
#define LC_ARGUMENT_DEFAULT 0x00010 | |||
#define LC_WIKILINK 0x00060 | |||
#define LC_WIKILINK_TITLE 0x00020 | |||
#define LC_WIKILINK_TEXT 0x00040 | |||
#define LC_HEADING 0x01F80 | |||
#define LC_HEADING_LEVEL_1 0x00080 | |||
#define LC_HEADING_LEVEL_2 0x00100 | |||
#define LC_HEADING_LEVEL_3 0x00200 | |||
#define LC_HEADING_LEVEL_4 0x00400 | |||
#define LC_HEADING_LEVEL_5 0x00800 | |||
#define LC_HEADING_LEVEL_6 0x01000 | |||
#define LC_COMMENT 0x02000 | |||
#define LC_SAFETY_CHECK 0xFC000 | |||
#define LC_HAS_TEXT 0x04000 | |||
#define LC_FAIL_ON_TEXT 0x08000 | |||
#define LC_FAIL_NEXT 0x10000 | |||
#define LC_FAIL_ON_LBRACE 0x20000 | |||
#define LC_FAIL_ON_RBRACE 0x40000 | |||
#define LC_FAIL_ON_EQUALS 0x80000 | |||
#define LC_TEMPLATE 0x000007 | |||
#define LC_TEMPLATE_NAME 0x000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x000004 | |||
#define LC_ARGUMENT 0x000018 | |||
#define LC_ARGUMENT_NAME 0x000008 | |||
#define LC_ARGUMENT_DEFAULT 0x000010 | |||
#define LC_WIKILINK 0x000060 | |||
#define LC_WIKILINK_TITLE 0x000020 | |||
#define LC_WIKILINK_TEXT 0x000040 | |||
#define LC_HEADING 0x001F80 | |||
#define LC_HEADING_LEVEL_1 0x000080 | |||
#define LC_HEADING_LEVEL_2 0x000100 | |||
#define LC_HEADING_LEVEL_3 0x000200 | |||
#define LC_HEADING_LEVEL_4 0x000400 | |||
#define LC_HEADING_LEVEL_5 0x000800 | |||
#define LC_HEADING_LEVEL_6 0x001000 | |||
#define LC_COMMENT 0x002000 | |||
#define LC_TAG 0x03C000 | |||
#define LC_TAG_OPEN 0x004000 | |||
#define LC_TAG_ATTR 0x008000 | |||
#define LC_TAG_BODY 0x010000 | |||
#define LC_TAG_CLOSE 0x020000 | |||
#define LC_SAFETY_CHECK 0xFC0000 | |||
#define LC_HAS_TEXT 0x040000 | |||
#define LC_FAIL_ON_TEXT 0x080000 | |||
#define LC_FAIL_NEXT 0x100000 | |||
#define LC_FAIL_ON_LBRACE 0x200000 | |||
#define LC_FAIL_ON_RBRACE 0x400000 | |||
#define LC_FAIL_ON_EQUALS 0x800000 | |||
/* Global contexts: */ | |||
#define GL_HEADING 0x1 | |||
/* Tag contexts: */ | |||
#define TAG_NAME 0x01 | |||
#define TAG_ATTR_READY 0x02 | |||
#define TAG_ATTR_NAME 0x04 | |||
#define TAG_ATTR_VALUE 0x08 | |||
#define TAG_QUOTED 0x10 | |||
#define TAG_NOTE_SPACE 0x20 | |||
#define TAG_NOTE_EQUALS 0x40 | |||
#define TAG_NOTE_QUOTE 0x80 | |||
/* Miscellaneous structs: */ | |||
@@ -158,13 +175,24 @@ typedef struct { | |||
int level; | |||
} HeadingData; | |||
typedef struct { | |||
int context; | |||
struct Textbuffer* pad_first; | |||
struct Textbuffer* pad_before_eq; | |||
struct Textbuffer* pad_after_eq; | |||
Py_ssize_t reset; | |||
} TagData; | |||
typedef struct Textbuffer Textbuffer; | |||
typedef struct Stack Stack; | |||
/* Tokenizer object definition: */ | |||
typedef struct { | |||
PyObject_HEAD | |||
PyObject* text; /* text to tokenize */ | |||
struct Stack* topstack; /* topmost stack */ | |||
Stack* topstack; /* topmost stack */ | |||
Py_ssize_t head; /* current position in text */ | |||
Py_ssize_t length; /* length of text */ | |||
int global; /* global context */ | |||
@@ -176,49 +204,31 @@ typedef struct { | |||
/* Macros for accessing Tokenizer data: */ | |||
#define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | |||
#define Tokenizer_READ_BACKWARDS(self, delta) \ | |||
(*PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, delta))) | |||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||
/* Macros for accessing HTML tag definitions: */ | |||
#define IS_PARSABLE(tag) (call_tag_def_func("is_parsable", tag)) | |||
#define IS_SINGLE(tag) (call_tag_def_func("is_single", tag)) | |||
#define IS_SINGLE_ONLY(tag) (call_tag_def_func("is_single_only", tag)) | |||
/* Function prototypes: */ | |||
static int heading_level_from_context(int); | |||
static Textbuffer* Textbuffer_new(void); | |||
static void Textbuffer_dealloc(Textbuffer*); | |||
static TagData* TagData_new(void); | |||
static void TagData_dealloc(TagData*); | |||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | |||
static struct Textbuffer* Textbuffer_new(void); | |||
static void Tokenizer_dealloc(Tokenizer*); | |||
static void Textbuffer_dealloc(struct Textbuffer*); | |||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||
static int Tokenizer_push(Tokenizer*, int); | |||
static PyObject* Textbuffer_render(struct Textbuffer*); | |||
static int Tokenizer_push_textbuffer(Tokenizer*); | |||
static void Tokenizer_delete_top_of_stack(Tokenizer*); | |||
static PyObject* Tokenizer_pop(Tokenizer*); | |||
static PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | |||
static void* Tokenizer_fail_route(Tokenizer*); | |||
static int Tokenizer_write(Tokenizer*, PyObject*); | |||
static int Tokenizer_write_first(Tokenizer*, PyObject*); | |||
static int Tokenizer_write_text(Tokenizer*, Py_UNICODE); | |||
static int Tokenizer_write_all(Tokenizer*, PyObject*); | |||
static int Tokenizer_write_text_then_stack(Tokenizer*, const char*); | |||
static PyObject* Tokenizer_read(Tokenizer*, Py_ssize_t); | |||
static PyObject* Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||
static int Tokenizer_parse_template_or_argument(Tokenizer*); | |||
static int Tokenizer_parse_template(Tokenizer*); | |||
static int Tokenizer_parse_argument(Tokenizer*); | |||
static int Tokenizer_handle_template_param(Tokenizer*); | |||
static int Tokenizer_handle_template_param_value(Tokenizer*); | |||
static PyObject* Tokenizer_handle_template_end(Tokenizer*); | |||
static int Tokenizer_handle_argument_separator(Tokenizer*); | |||
static PyObject* Tokenizer_handle_argument_end(Tokenizer*); | |||
static int Tokenizer_parse_wikilink(Tokenizer*); | |||
static int Tokenizer_handle_wikilink_separator(Tokenizer*); | |||
static PyObject* Tokenizer_handle_wikilink_end(Tokenizer*); | |||
static int Tokenizer_parse_heading(Tokenizer*); | |||
static HeadingData* Tokenizer_handle_heading_end(Tokenizer*); | |||
static int Tokenizer_really_parse_entity(Tokenizer*); | |||
static int Tokenizer_parse_entity(Tokenizer*); | |||
static int Tokenizer_parse_comment(Tokenizer*); | |||
static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); | |||
static PyObject* Tokenizer_parse(Tokenizer*, int); | |||
static int Tokenizer_parse_tag(Tokenizer*); | |||
static PyObject* Tokenizer_parse(Tokenizer*, int, int); | |||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||
@@ -24,9 +24,9 @@ from __future__ import unicode_literals | |||
from math import log | |||
import re | |||
from . import contexts | |||
from . import tokens | |||
from . import contexts, tokens | |||
from ..compat import htmlentities | |||
from ..tag_defs import is_parsable, is_single, is_single_only | |||
__all__ = ["Tokenizer"] | |||
@@ -35,16 +35,34 @@ class BadRoute(Exception): | |||
pass | |||
class _TagOpenData(object): | |||
"""Stores data about an HTML open tag, like ``<ref name="foo">``.""" | |||
CX_NAME = 1 << 0 | |||
CX_ATTR_READY = 1 << 1 | |||
CX_ATTR_NAME = 1 << 2 | |||
CX_ATTR_VALUE = 1 << 3 | |||
CX_QUOTED = 1 << 4 | |||
CX_NOTE_SPACE = 1 << 5 | |||
CX_NOTE_EQUALS = 1 << 6 | |||
CX_NOTE_QUOTE = 1 << 7 | |||
def __init__(self): | |||
self.context = self.CX_NAME | |||
self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""} | |||
self.reset = 0 | |||
class Tokenizer(object): | |||
"""Creates a list of tokens from a string of wikicode.""" | |||
USES_C = False | |||
START = object() | |||
END = object() | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | |||
"/", "-", "!", "\n", END] | |||
"/", "-", "\n", END] | |||
MAX_DEPTH = 40 | |||
MAX_CYCLES = 100000 | |||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) | |||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | |||
tag_splitter = re.compile(r"([\s\"\\]+)") | |||
def __init__(self): | |||
self._text = None | |||
@@ -117,33 +135,33 @@ class Tokenizer(object): | |||
self._pop() | |||
raise BadRoute() | |||
def _write(self, token): | |||
def _emit(self, token): | |||
"""Write a token to the end of the current token stack.""" | |||
self._push_textbuffer() | |||
self._stack.append(token) | |||
def _write_first(self, token): | |||
def _emit_first(self, token): | |||
"""Write a token to the beginning of the current token stack.""" | |||
self._push_textbuffer() | |||
self._stack.insert(0, token) | |||
def _write_text(self, text): | |||
def _emit_text(self, text): | |||
"""Write text to the current textbuffer.""" | |||
self._textbuffer.append(text) | |||
def _write_all(self, tokenlist): | |||
def _emit_all(self, tokenlist): | |||
"""Write a series of tokens to the current stack at once.""" | |||
if tokenlist and isinstance(tokenlist[0], tokens.Text): | |||
self._write_text(tokenlist.pop(0).text) | |||
self._emit_text(tokenlist.pop(0).text) | |||
self._push_textbuffer() | |||
self._stack.extend(tokenlist) | |||
def _write_text_then_stack(self, text): | |||
def _emit_text_then_stack(self, text): | |||
"""Pop the current stack, write *text*, and then write the stack.""" | |||
stack = self._pop() | |||
self._write_text(text) | |||
self._emit_text(text) | |||
if stack: | |||
self._write_all(stack) | |||
self._emit_all(stack) | |||
self._head -= 1 | |||
def _read(self, delta=0, wrap=False, strict=False): | |||
@@ -168,6 +186,30 @@ class Tokenizer(object): | |||
self._fail_route() | |||
return self.END | |||
def _parse_template(self): | |||
"""Parse a template at the head of the wikicode string.""" | |||
reset = self._head | |||
try: | |||
template = self._parse(contexts.TEMPLATE_NAME) | |||
except BadRoute: | |||
self._head = reset | |||
raise | |||
self._emit_first(tokens.TemplateOpen()) | |||
self._emit_all(template) | |||
self._emit(tokens.TemplateClose()) | |||
def _parse_argument(self): | |||
"""Parse an argument at the head of the wikicode string.""" | |||
reset = self._head | |||
try: | |||
argument = self._parse(contexts.ARGUMENT_NAME) | |||
except BadRoute: | |||
self._head = reset | |||
raise | |||
self._emit_first(tokens.ArgumentOpen()) | |||
self._emit_all(argument) | |||
self._emit(tokens.ArgumentClose()) | |||
def _parse_template_or_argument(self): | |||
"""Parse a template or argument at the head of the wikicode string.""" | |||
self._head += 2 | |||
@@ -179,12 +221,12 @@ class Tokenizer(object): | |||
while braces: | |||
if braces == 1: | |||
return self._write_text_then_stack("{") | |||
return self._emit_text_then_stack("{") | |||
if braces == 2: | |||
try: | |||
self._parse_template() | |||
except BadRoute: | |||
return self._write_text_then_stack("{{") | |||
return self._emit_text_then_stack("{{") | |||
break | |||
try: | |||
self._parse_argument() | |||
@@ -194,35 +236,13 @@ class Tokenizer(object): | |||
self._parse_template() | |||
braces -= 2 | |||
except BadRoute: | |||
return self._write_text_then_stack("{" * braces) | |||
return self._emit_text_then_stack("{" * braces) | |||
if braces: | |||
self._head += 1 | |||
self._write_all(self._pop()) | |||
def _parse_template(self): | |||
"""Parse a template at the head of the wikicode string.""" | |||
reset = self._head | |||
try: | |||
template = self._parse(contexts.TEMPLATE_NAME) | |||
except BadRoute: | |||
self._head = reset | |||
raise | |||
self._write_first(tokens.TemplateOpen()) | |||
self._write_all(template) | |||
self._write(tokens.TemplateClose()) | |||
def _parse_argument(self): | |||
"""Parse an argument at the head of the wikicode string.""" | |||
reset = self._head | |||
try: | |||
argument = self._parse(contexts.ARGUMENT_NAME) | |||
except BadRoute: | |||
self._head = reset | |||
raise | |||
self._write_first(tokens.ArgumentOpen()) | |||
self._write_all(argument) | |||
self._write(tokens.ArgumentClose()) | |||
self._emit_all(self._pop()) | |||
if self._context & contexts.FAIL_NEXT: | |||
self._context ^= contexts.FAIL_NEXT | |||
def _handle_template_param(self): | |||
"""Handle a template parameter at the head of the string.""" | |||
@@ -231,22 +251,22 @@ class Tokenizer(object): | |||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._write_all(self._pop(keep_context=True)) | |||
self._emit_all(self._pop(keep_context=True)) | |||
self._context |= contexts.TEMPLATE_PARAM_KEY | |||
self._write(tokens.TemplateParamSeparator()) | |||
self._emit(tokens.TemplateParamSeparator()) | |||
self._push(self._context) | |||
def _handle_template_param_value(self): | |||
"""Handle a template parameter's value at the head of the string.""" | |||
self._write_all(self._pop(keep_context=True)) | |||
self._emit_all(self._pop(keep_context=True)) | |||
self._context ^= contexts.TEMPLATE_PARAM_KEY | |||
self._context |= contexts.TEMPLATE_PARAM_VALUE | |||
self._write(tokens.TemplateParamEquals()) | |||
self._emit(tokens.TemplateParamEquals()) | |||
def _handle_template_end(self): | |||
"""Handle the end of a template at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._write_all(self._pop(keep_context=True)) | |||
self._emit_all(self._pop(keep_context=True)) | |||
self._head += 1 | |||
return self._pop() | |||
@@ -254,7 +274,7 @@ class Tokenizer(object): | |||
"""Handle the separator between an argument's name and default.""" | |||
self._context ^= contexts.ARGUMENT_NAME | |||
self._context |= contexts.ARGUMENT_DEFAULT | |||
self._write(tokens.ArgumentSeparator()) | |||
self._emit(tokens.ArgumentSeparator()) | |||
def _handle_argument_end(self): | |||
"""Handle the end of an argument at the head of the string.""" | |||
@@ -269,17 +289,19 @@ class Tokenizer(object): | |||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text("[[") | |||
self._emit_text("[[") | |||
else: | |||
self._write(tokens.WikilinkOpen()) | |||
self._write_all(wikilink) | |||
self._write(tokens.WikilinkClose()) | |||
if self._context & contexts.FAIL_NEXT: | |||
self._context ^= contexts.FAIL_NEXT | |||
self._emit(tokens.WikilinkOpen()) | |||
self._emit_all(wikilink) | |||
self._emit(tokens.WikilinkClose()) | |||
def _handle_wikilink_separator(self): | |||
"""Handle the separator between a wikilink's title and its text.""" | |||
self._context ^= contexts.WIKILINK_TITLE | |||
self._context |= contexts.WIKILINK_TEXT | |||
self._write(tokens.WikilinkSeparator()) | |||
self._emit(tokens.WikilinkSeparator()) | |||
def _handle_wikilink_end(self): | |||
"""Handle the end of a wikilink at the head of the string.""" | |||
@@ -301,13 +323,13 @@ class Tokenizer(object): | |||
title, level = self._parse(context) | |||
except BadRoute: | |||
self._head = reset + best - 1 | |||
self._write_text("=" * best) | |||
self._emit_text("=" * best) | |||
else: | |||
self._write(tokens.HeadingStart(level=level)) | |||
self._emit(tokens.HeadingStart(level=level)) | |||
if level < best: | |||
self._write_text("=" * (best - level)) | |||
self._write_all(title) | |||
self._write(tokens.HeadingEnd()) | |||
self._emit_text("=" * (best - level)) | |||
self._emit_all(title) | |||
self._emit(tokens.HeadingEnd()) | |||
finally: | |||
self._global ^= contexts.GL_HEADING | |||
@@ -326,28 +348,28 @@ class Tokenizer(object): | |||
after, after_level = self._parse(self._context) | |||
except BadRoute: | |||
if level < best: | |||
self._write_text("=" * (best - level)) | |||
self._emit_text("=" * (best - level)) | |||
self._head = reset + best - 1 | |||
return self._pop(), level | |||
else: # Found another closure | |||
self._write_text("=" * best) | |||
self._write_all(after) | |||
self._emit_text("=" * best) | |||
self._emit_all(after) | |||
return self._pop(), after_level | |||
def _really_parse_entity(self): | |||
"""Actually parse an HTML entity and ensure that it is valid.""" | |||
self._write(tokens.HTMLEntityStart()) | |||
self._emit(tokens.HTMLEntityStart()) | |||
self._head += 1 | |||
this = self._read(strict=True) | |||
if this == "#": | |||
numeric = True | |||
self._write(tokens.HTMLEntityNumeric()) | |||
self._emit(tokens.HTMLEntityNumeric()) | |||
self._head += 1 | |||
this = self._read(strict=True) | |||
if this[0].lower() == "x": | |||
hexadecimal = True | |||
self._write(tokens.HTMLEntityHex(char=this[0])) | |||
self._emit(tokens.HTMLEntityHex(char=this[0])) | |||
this = this[1:] | |||
if not this: | |||
self._fail_route() | |||
@@ -373,8 +395,8 @@ class Tokenizer(object): | |||
if this not in htmlentities.entitydefs: | |||
self._fail_route() | |||
self._write(tokens.Text(text=this)) | |||
self._write(tokens.HTMLEntityEnd()) | |||
self._emit(tokens.Text(text=this)) | |||
self._emit(tokens.HTMLEntityEnd()) | |||
def _parse_entity(self): | |||
"""Parse an HTML entity at the head of the wikicode string.""" | |||
@@ -384,9 +406,9 @@ class Tokenizer(object): | |||
self._really_parse_entity() | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text(self._read()) | |||
self._emit_text(self._read()) | |||
else: | |||
self._write_all(self._pop()) | |||
self._emit_all(self._pop()) | |||
def _parse_comment(self): | |||
"""Parse an HTML comment at the head of the wikicode string.""" | |||
@@ -396,13 +418,231 @@ class Tokenizer(object): | |||
comment = self._parse(contexts.COMMENT) | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text("<!--") | |||
self._emit_text("<!--") | |||
else: | |||
self._write(tokens.CommentStart()) | |||
self._write_all(comment) | |||
self._write(tokens.CommentEnd()) | |||
self._emit(tokens.CommentStart()) | |||
self._emit_all(comment) | |||
self._emit(tokens.CommentEnd()) | |||
self._head += 2 | |||
def _push_tag_buffer(self, data): | |||
"""Write a pending tag attribute from *data* to the stack.""" | |||
if data.context & data.CX_QUOTED: | |||
self._emit_first(tokens.TagAttrQuote()) | |||
self._emit_all(self._pop()) | |||
buf = data.padding_buffer | |||
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | |||
pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"])) | |||
self._emit_all(self._pop()) | |||
data.padding_buffer = {key: "" for key in data.padding_buffer} | |||
def _handle_tag_space(self, data, text): | |||
"""Handle whitespace (*text*) inside of an HTML open tag.""" | |||
ctx = data.context | |||
end_of_value = ctx & data.CX_ATTR_VALUE and not ctx & (data.CX_QUOTED | data.CX_NOTE_QUOTE) | |||
if end_of_value or (ctx & data.CX_QUOTED and ctx & data.CX_NOTE_SPACE): | |||
self._push_tag_buffer(data) | |||
data.context = data.CX_ATTR_READY | |||
elif ctx & data.CX_NOTE_SPACE: | |||
data.context = data.CX_ATTR_READY | |||
elif ctx & data.CX_ATTR_NAME: | |||
data.context |= data.CX_NOTE_EQUALS | |||
data.padding_buffer["before_eq"] += text | |||
if ctx & data.CX_QUOTED and not ctx & data.CX_NOTE_SPACE: | |||
self._emit_text(text) | |||
elif data.context & data.CX_ATTR_READY: | |||
data.padding_buffer["first"] += text | |||
elif data.context & data.CX_ATTR_VALUE: | |||
data.padding_buffer["after_eq"] += text | |||
def _handle_tag_text(self, text): | |||
"""Handle regular *text* inside of an HTML open tag.""" | |||
next = self._read(1) | |||
if not self._can_recurse() or text not in self.MARKERS: | |||
self._emit_text(text) | |||
elif text == next == "{": | |||
self._parse_template_or_argument() | |||
elif text == next == "[": | |||
self._parse_wikilink() | |||
elif text == "<": | |||
self._parse_tag() | |||
else: | |||
self._emit_text(text) | |||
def _handle_tag_data(self, data, text): | |||
"""Handle all sorts of *text* data inside of an HTML open tag.""" | |||
for chunk in self.tag_splitter.split(text): | |||
if not chunk: | |||
continue | |||
if data.context & data.CX_NAME: | |||
if chunk in self.MARKERS or chunk.isspace(): | |||
self._fail_route() # Tags must start with text, not spaces | |||
data.context = data.CX_NOTE_SPACE | |||
elif chunk.isspace(): | |||
self._handle_tag_space(data, chunk) | |||
continue | |||
elif data.context & data.CX_NOTE_SPACE: | |||
if data.context & data.CX_QUOTED: | |||
data.context = data.CX_ATTR_VALUE | |||
self._pop() | |||
self._head = data.reset - 1 # Will be auto-incremented | |||
return # Break early | |||
self._fail_route() | |||
elif data.context & data.CX_ATTR_READY: | |||
data.context = data.CX_ATTR_NAME | |||
self._push(contexts.TAG_ATTR) | |||
elif data.context & data.CX_ATTR_NAME: | |||
if chunk == "=": | |||
data.context = data.CX_ATTR_VALUE | data.CX_NOTE_QUOTE | |||
self._emit(tokens.TagAttrEquals()) | |||
continue | |||
if data.context & data.CX_NOTE_EQUALS: | |||
self._push_tag_buffer(data) | |||
data.context = data.CX_ATTR_NAME | |||
self._push(contexts.TAG_ATTR) | |||
elif data.context & data.CX_ATTR_VALUE: | |||
escaped = self._read(-1) == "\\" and self._read(-2) != "\\" | |||
if data.context & data.CX_NOTE_QUOTE: | |||
data.context ^= data.CX_NOTE_QUOTE | |||
if chunk == '"' and not escaped: | |||
data.context |= data.CX_QUOTED | |||
self._push(self._context) | |||
data.reset = self._head | |||
continue | |||
elif data.context & data.CX_QUOTED: | |||
if chunk == '"' and not escaped: | |||
data.context |= data.CX_NOTE_SPACE | |||
continue | |||
self._handle_tag_text(chunk) | |||
def _handle_tag_close_open(self, data, token): | |||
"""Handle the closing of a open tag (``<foo>``).""" | |||
if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE): | |||
self._push_tag_buffer(data) | |||
self._emit(token(padding=data.padding_buffer["first"])) | |||
self._head += 1 | |||
def _handle_tag_open_close(self): | |||
"""Handle the opening of a closing tag (``</foo>``).""" | |||
self._emit(tokens.TagOpenClose()) | |||
self._push(contexts.TAG_CLOSE) | |||
self._head += 1 | |||
def _handle_tag_close_close(self): | |||
"""Handle the ending of a closing tag (``</foo>``).""" | |||
strip = lambda tok: tok.text.rstrip().lower() | |||
closing = self._pop() | |||
if len(closing) != 1 or (not isinstance(closing[0], tokens.Text) or | |||
strip(closing[0]) != strip(self._stack[1])): | |||
self._fail_route() | |||
self._emit_all(closing) | |||
self._emit(tokens.TagCloseClose()) | |||
return self._pop() | |||
def _handle_blacklisted_tag(self): | |||
"""Handle the body of an HTML tag that is parser-blacklisted.""" | |||
while True: | |||
this, next = self._read(), self._read(1) | |||
self._head += 1 | |||
if this is self.END: | |||
self._fail_route() | |||
elif this == "<" and next == "/": | |||
self._handle_tag_open_close() | |||
return self._parse(push=False) | |||
else: | |||
self._emit_text(this) | |||
def _handle_single_only_tag_end(self): | |||
"""Handle the end of an implicitly closing single-only HTML tag.""" | |||
padding = self._stack.pop().padding | |||
self._emit(tokens.TagCloseSelfclose(padding=padding, implicit=True)) | |||
self._head -= 1 # Offset displacement done by _handle_tag_close_open | |||
return self._pop() | |||
def _handle_single_tag_end(self): | |||
"""Handle the stream end when inside a single-supporting HTML tag.""" | |||
gen = enumerate(self._stack) | |||
index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen)) | |||
padding = self._stack[index].padding | |||
token = tokens.TagCloseSelfclose(padding=padding, implicit=True) | |||
self._stack[index] = token | |||
return self._pop() | |||
def _really_parse_tag(self): | |||
"""Actually parse an HTML tag, starting with the open (``<foo>``).""" | |||
data = _TagOpenData() | |||
self._push(contexts.TAG_OPEN) | |||
self._emit(tokens.TagOpenOpen()) | |||
while True: | |||
this, next = self._read(), self._read(1) | |||
can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or | |||
data.context & data.CX_NOTE_SPACE) | |||
if this is self.END: | |||
if self._context & contexts.TAG_ATTR: | |||
if data.context & data.CX_QUOTED: | |||
# Unclosed attribute quote: reset, don't die | |||
data.context = data.CX_ATTR_VALUE | |||
self._pop() | |||
self._head = data.reset | |||
continue | |||
self._pop() | |||
self._fail_route() | |||
elif this == ">" and can_exit: | |||
self._handle_tag_close_open(data, tokens.TagCloseOpen) | |||
self._context = contexts.TAG_BODY | |||
if is_single_only(self._stack[1].text): | |||
return self._handle_single_only_tag_end() | |||
if is_parsable(self._stack[1].text): | |||
return self._parse(push=False) | |||
return self._handle_blacklisted_tag() | |||
elif this == "/" and next == ">" and can_exit: | |||
self._handle_tag_close_open(data, tokens.TagCloseSelfclose) | |||
return self._pop() | |||
else: | |||
self._handle_tag_data(data, this) | |||
self._head += 1 | |||
def _handle_invalid_tag_start(self): | |||
"""Handle the (possible) start of an implicitly closing single tag.""" | |||
reset = self._head + 1 | |||
self._head += 2 | |||
try: | |||
if not is_single_only(self.tag_splitter.split(self._read())[0]): | |||
raise BadRoute() | |||
tag = self._really_parse_tag() | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("</") | |||
else: | |||
tag[0].invalid = True # Set flag of TagOpenOpen | |||
self._emit_all(tag) | |||
def _parse_tag(self): | |||
"""Parse an HTML tag at the head of the wikicode string.""" | |||
reset = self._head | |||
self._head += 1 | |||
try: | |||
tag = self._really_parse_tag() | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("<") | |||
else: | |||
self._emit_all(tag) | |||
def _handle_end(self): | |||
"""Handle the end of the stream of wikitext.""" | |||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | contexts.WIKILINK | | |||
contexts.HEADING | contexts.COMMENT | contexts.TAG) | |||
double_fail = (contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE) | |||
if self._context & fail: | |||
if self._context & contexts.TAG_BODY: | |||
if is_single(self._stack[1].text): | |||
return self._handle_single_tag_end() | |||
if self._context & double_fail: | |||
self._pop() | |||
self._fail_route() | |||
return self._pop() | |||
def _verify_safe(self, this): | |||
"""Make sure we are not trying to write an invalid character.""" | |||
context = self._context | |||
@@ -414,7 +654,7 @@ class Tokenizer(object): | |||
elif this == "\n" or this == "[" or this == "}": | |||
return False | |||
return True | |||
if context & contexts.TEMPLATE_NAME: | |||
elif context & contexts.TEMPLATE_NAME: | |||
if this == "{" or this == "}" or this == "[": | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
@@ -432,6 +672,8 @@ class Tokenizer(object): | |||
elif this is self.END or not this.isspace(): | |||
self._context |= contexts.HAS_TEXT | |||
return True | |||
elif context & contexts.TAG_CLOSE: | |||
return this != "<" | |||
else: | |||
if context & contexts.FAIL_ON_EQUALS: | |||
if this == "=": | |||
@@ -458,44 +700,38 @@ class Tokenizer(object): | |||
self._context |= contexts.FAIL_ON_RBRACE | |||
return True | |||
def _parse(self, context=0): | |||
def _parse(self, context=0, push=True): | |||
"""Parse the wikicode string, using *context* for when to stop.""" | |||
self._push(context) | |||
unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | | |||
contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME | | |||
contexts.TAG_CLOSE) | |||
double_unsafe = (contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE) | |||
if push: | |||
self._push(context) | |||
while True: | |||
this = self._read() | |||
unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | | |||
contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) | |||
if self._context & unsafe: | |||
if not self._verify_safe(this): | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
if self._context & double_unsafe: | |||
self._pop() | |||
self._fail_route() | |||
if this not in self.MARKERS: | |||
self._write_text(this) | |||
self._emit_text(this) | |||
self._head += 1 | |||
continue | |||
if this is self.END: | |||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | | |||
contexts.WIKILINK | contexts.HEADING | | |||
contexts.COMMENT) | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._pop() | |||
if self._context & fail: | |||
self._fail_route() | |||
return self._pop() | |||
return self._handle_end() | |||
next = self._read(1) | |||
if self._context & contexts.COMMENT: | |||
if this == next == "-" and self._read(2) == ">": | |||
return self._pop() | |||
else: | |||
self._write_text(this) | |||
self._emit_text(this) | |||
elif this == next == "{": | |||
if self._can_recurse(): | |||
self._parse_template_or_argument() | |||
if self._context & contexts.FAIL_NEXT: | |||
self._context ^= contexts.FAIL_NEXT | |||
else: | |||
self._write_text("{") | |||
self._emit_text("{") | |||
elif this == "|" and self._context & contexts.TEMPLATE: | |||
self._handle_template_param() | |||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | |||
@@ -508,14 +744,12 @@ class Tokenizer(object): | |||
if self._read(2) == "}": | |||
return self._handle_argument_end() | |||
else: | |||
self._write_text("}") | |||
self._emit_text("}") | |||
elif this == next == "[": | |||
if not self._context & contexts.WIKILINK_TITLE and self._can_recurse(): | |||
self._parse_wikilink() | |||
if self._context & contexts.FAIL_NEXT: | |||
self._context ^= contexts.FAIL_NEXT | |||
else: | |||
self._write_text("[") | |||
self._emit_text("[") | |||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | |||
self._handle_wikilink_separator() | |||
elif this == next == "]" and self._context & contexts.WIKILINK: | |||
@@ -524,7 +758,7 @@ class Tokenizer(object): | |||
if self._read(-1) in ("\n", self.START): | |||
self._parse_heading() | |||
else: | |||
self._write_text("=") | |||
self._emit_text("=") | |||
elif this == "=" and self._context & contexts.HEADING: | |||
return self._handle_heading_end() | |||
elif this == "\n" and self._context & contexts.HEADING: | |||
@@ -535,9 +769,21 @@ class Tokenizer(object): | |||
if self._read(2) == self._read(3) == "-": | |||
self._parse_comment() | |||
else: | |||
self._write_text(this) | |||
self._emit_text(this) | |||
elif this == "<" and next == "/" and self._read(2) is not self.END: | |||
if self._context & contexts.TAG_BODY: | |||
self._handle_tag_open_close() | |||
else: | |||
self._handle_invalid_tag_start() | |||
elif this == "<": | |||
if not self._context & contexts.TAG_CLOSE and self._can_recurse(): | |||
self._parse_tag() | |||
else: | |||
self._emit_text("<") | |||
elif this == ">" and self._context & contexts.TAG_CLOSE: | |||
return self._handle_tag_close_close() | |||
else: | |||
self._write_text(this) | |||
self._emit_text(this) | |||
self._head += 1 | |||
def tokenize(self, text): | |||
@@ -63,6 +63,10 @@ class Token(object): | |||
def __delattr__(self, key): | |||
del self._kwargs[key] | |||
def get(self, key, default=None): | |||
"""Same as :py:meth:`__getattr__`, but has a *default* if missing.""" | |||
return self._kwargs.get(key, default) | |||
def make(name): | |||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | |||
@@ -0,0 +1,76 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
"""Contains data regarding certain HTML tags.""" | |||
from __future__ import unicode_literals | |||
__all__ = ["get_wikicode", "is_parsable", "is_visible", "is_single", | |||
"is_single_only"] | |||
PARSER_BLACKLIST = [ | |||
# enwiki extensions @ 2013-06-28 | |||
"categorytree", "gallery", "hiero", "imagemap", "inputbox", "math", | |||
"nowiki", "pre", "score", "section", "source", "syntaxhighlight", | |||
"templatedata", "timeline" | |||
] | |||
INVISIBLE_TAGS = [ | |||
# enwiki extensions @ 2013-06-28 | |||
"categorytree", "gallery", "imagemap", "inputbox", "math", "score", | |||
"section", "templatedata", "timeline" | |||
] | |||
# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | |||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd"] | |||
WIKICODE = { | |||
"i": {"open": "''", "close": "''"}, | |||
"b": {"open": "'''", "close": "'''"}, | |||
"ul": {"open": "*"}, | |||
"ol": {"open": "#"}, | |||
"dt": {"open": ";"}, | |||
"dd": {"open": ":"}, | |||
"hr": {"open": "----"}, | |||
} | |||
def get_wikicode(tag): | |||
"""Return the appropriate wikicode before and after the given *tag*.""" | |||
data = WIKICODE[tag.lower()] | |||
return (data.get("open"), data.get("close")) | |||
def is_parsable(tag): | |||
"""Return if the given *tag*'s contents should be passed to the parser.""" | |||
return tag.lower() not in PARSER_BLACKLIST | |||
def is_visible(tag): | |||
"""Return whether or not the given *tag* contains visible text.""" | |||
return tag.lower() not in INVISIBLE_TAGS | |||
def is_single(tag): | |||
"""Return whether or not the given *tag* can exist without a close tag.""" | |||
return tag.lower() in SINGLE | |||
def is_single_only(tag): | |||
"""Return whether or not the given *tag* must exist without a close tag.""" | |||
return tag.lower() in SINGLE_ONLY |
@@ -31,6 +31,8 @@ from .compat import bytes, str | |||
from .nodes import Node | |||
from .smart_list import SmartList | |||
__all__ = ["parse_anything"] | |||
def parse_anything(value): | |||
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | |||
@@ -91,7 +91,23 @@ class TreeEqualityTestCase(TestCase): | |||
def assertTagNodeEqual(self, expected, actual): | |||
"""Assert that two Tag nodes have the same data.""" | |||
self.fail("Holding this until feature/html_tags is ready.") | |||
self.assertWikicodeEqual(expected.tag, actual.tag) | |||
if expected.contents is not None: | |||
self.assertWikicodeEqual(expected.contents, actual.contents) | |||
length = len(expected.attributes) | |||
self.assertEqual(length, len(actual.attributes)) | |||
for i in range(length): | |||
exp_attr = expected.attributes[i] | |||
act_attr = actual.attributes[i] | |||
self.assertWikicodeEqual(exp_attr.name, act_attr.name) | |||
if exp_attr.value is not None: | |||
self.assertWikicodeEqual(exp_attr.value, act_attr.value) | |||
self.assertIs(exp_attr.quoted, act_attr.quoted) | |||
self.assertEqual(exp.attr.padding, act_attr.padding) | |||
self.assertIs(expected.showtag, actual.showtag) | |||
self.assertIs(expected.self_closing, actual.self_closing) | |||
self.assertEqual(expected.padding, actual.padding) | |||
self.assertWikicodeEqual(expected.closing_tag, actual.closing_tag) | |||
def assertTemplateNodeEqual(self, expected, actual): | |||
"""Assert that two Template nodes have the same data.""" | |||
@@ -198,6 +198,18 @@ class TestBuilder(TreeEqualityTestCase): | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_tag(self): | |||
"""tests for building Tag nodes""" | |||
tests = [ | |||
([tokens.TagOpenOpen(), tokens.Text(text="ref"), | |||
tokens.TagCloseOpen(padding=""), tokens.TagOpenClose(), | |||
tokens.Text(text="ref"), tokens.TagCloseClose()], | |||
wrap([Tag(wraptext("ref"), wrap([]), [], True, False, "", | |||
wraptext("ref"))])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_integration(self): | |||
"""a test for building a combination of templates together""" | |||
# {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} | |||
@@ -33,6 +33,13 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t | |||
--- | |||
name: rich_tags | |||
label: a HTML tag with tons of other things in it | |||
input: "{{dubious claim}}<ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>" | |||
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: wildcard | |||
label: a wildcard assortment of various things | |||
input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" | |||
@@ -0,0 +1,529 @@ | |||
name: basic | |||
label: a basic tag with an open and close | |||
input: "<ref></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: basic_selfclosing | |||
label: a basic self-closing tag | |||
input: "<ref/>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: content | |||
label: a tag with some content in the middle | |||
input: "<ref>this is a reference</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), Text(text="this is a reference"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: padded_open | |||
label: a tag with some padding in the open tag | |||
input: "<ref ></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=" "), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: padded_close | |||
label: a tag with some padding in the close tag | |||
input: "<ref></ref >" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref "), TagCloseClose()] | |||
--- | |||
name: padded_selfclosing | |||
label: a self-closing tag with padding | |||
input: "<ref />" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseSelfclose(padding=" ")] | |||
--- | |||
name: attribute | |||
label: a tag with a single attribute | |||
input: "<ref name></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_value | |||
label: a tag with a single attribute with a value | |||
input: "<ref name=foo></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_quoted | |||
label: a tag with a single quoted attribute | |||
input: "<ref name="foo bar"></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_hyphen | |||
label: a tag with a single attribute, containing a hyphen | |||
input: "<ref name=foo-bar></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_quoted_hyphen | |||
label: a tag with a single quoted attribute, containing a hyphen | |||
input: "<ref name="foo-bar"></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_selfclosing | |||
label: a self-closing tag with a single attribute | |||
input: "<ref name/>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: attribute_selfclosing_value | |||
label: a self-closing tag with a single attribute with a value | |||
input: "<ref name=foo/>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: attribute_selfclosing_value_quoted | |||
label: a self-closing tag with a single quoted attribute | |||
input: "<ref name="foo"/>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: nested_tag | |||
label: a tag nested within the attributes of another | |||
input: "<ref name=<span style="color: red;">foo</span>>citation</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: nested_tag_quoted | |||
label: a tag nested within the attributes of another, quoted | |||
input: "<ref name="<span style="color: red;">foo</span>">citation</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: nested_troll_tag | |||
label: a bogus tag that appears to be nested within the attributes of another | |||
input: "<ref name=</ ><//>>citation</ref>" | |||
output: [Text(text="<ref name=</ ><//>>citation</ref>")] | |||
--- | |||
name: nested_troll_tag_quoted | |||
label: a bogus tag that appears to be nested within the attributes of another, quoted | |||
input: "<ref name="</ ><//>">citation</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: invalid_space_begin_open | |||
label: invalid tag: a space at the beginning of the open tag | |||
input: "< ref>test</ref>" | |||
output: [Text(text="< ref>test</ref>")] | |||
--- | |||
name: invalid_space_begin_close | |||
label: invalid tag: a space at the beginning of the close tag | |||
input: "<ref>test</ ref>" | |||
output: [Text(text="<ref>test</ ref>")] | |||
--- | |||
name: valid_space_end | |||
label: valid tag: spaces at the ends of both the open and close tags | |||
input: "<ref >test</ref >" | |||
output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=" "), Text(text="test"), TagOpenClose(), Text(text="ref "), TagCloseClose()] | |||
--- | |||
name: invalid_template_ends | |||
label: invalid tag: a template at the ends of both the open and close tags | |||
input: "<ref {{foo}}>test</ref {{foo}}>" | |||
output: [Text(text="<ref "), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">test</ref "), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">")] | |||
--- | |||
name: invalid_template_ends_nospace | |||
label: invalid tag: a template at the ends of both the open and close tags, without spacing | |||
input: "<ref {{foo}}>test</ref{{foo}}>" | |||
output: [Text(text="<ref "), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">test</ref"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">")] | |||
--- | |||
name: valid_template_end_open | |||
label: valid tag: a template at the end of the open tag | |||
input: "<ref {{foo}}>test</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding=""), Text(text="test"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: valid_template_end_open_space_end_close | |||
label: valid tag: a template at the end of the open tag; whitespace at the end of the close tag | |||
input: "<ref {{foo}}>test</ref\n>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding=""), Text(text="test"), TagOpenClose(), Text(text="ref\n"), TagCloseClose()] | |||
--- | |||
name: invalid_template_end_open_nospace | |||
label: invalid tag: a template at the end of the open tag, without spacing | |||
input: "<ref{{foo}}>test</ref>" | |||
output: [Text(text="<ref"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text=">test</ref>")] | |||
--- | |||
name: invalid_template_start_close | |||
label: invalid tag: a template at the beginning of the close tag | |||
input: "<ref>test</{{foo}}ref>" | |||
output: [Text(text="<ref>test</"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="ref>")] | |||
--- | |||
name: invalid_template_start_open | |||
label: invalid tag: a template at the beginning of the open tag | |||
input: "<{{foo}}ref>test</ref>" | |||
output: [Text(text="<"), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="ref>test</ref>")] | |||
--- | |||
name: unclosed_quote | |||
label: a quoted attribute that is never closed | |||
input: "<span style="foobar>stuff</span>" | |||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"foobar"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||
--- | |||
name: fake_quote | |||
label: a fake quoted attribute | |||
input: "<span style="foo"bar>stuff</span>" | |||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"foo\"bar"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||
--- | |||
name: fake_quote_complex | |||
label: a fake quoted attribute, with spaces and templates and links | |||
input: "<span style="foo {{bar}}\n[[baz]]"buzz >stuff</span>" | |||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"foo"), TagAttrStart(pad_first=" ", pad_before_eq="\n", pad_after_eq=""), TemplateOpen(), Text(text="bar"), TemplateClose(), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), WikilinkOpen(), Text(text="baz"), WikilinkClose(), Text(text="\"buzz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||
--- | |||
name: incomplete_lbracket | |||
label: incomplete tags: just a left bracket | |||
input: "<" | |||
output: [Text(text="<")] | |||
--- | |||
name: incomplete_lbracket_junk | |||
label: incomplete tags: just a left bracket, surrounded by stuff | |||
input: "foo<bar" | |||
output: [Text(text="foo<bar")] | |||
--- | |||
name: incomplete_unclosed_open | |||
label: incomplete tags: an unclosed open tag | |||
input: "junk <ref" | |||
output: [Text(text="junk <ref")] | |||
--- | |||
name: incomplete_unclosed_open_space | |||
label: incomplete tags: an unclosed open tag, space | |||
input: "junk <ref " | |||
output: [Text(text="junk <ref ")] | |||
--- | |||
name: incomplete_unclosed_open_unnamed_attr | |||
label: incomplete tags: an unclosed open tag, unnamed attribute | |||
input: "junk <ref name" | |||
output: [Text(text="junk <ref name")] | |||
--- | |||
name: incomplete_unclosed_open_attr_equals | |||
label: incomplete tags: an unclosed open tag, attribute, equal sign | |||
input: "junk <ref name=" | |||
output: [Text(text="junk <ref name=")] | |||
--- | |||
name: incomplete_unclosed_open_attr_equals_quoted | |||
label: incomplete tags: an unclosed open tag, attribute, equal sign, quote | |||
input: "junk <ref name="" | |||
output: [Text(text="junk <ref name=\"")] | |||
--- | |||
name: incomplete_unclosed_open_attr | |||
label: incomplete tags: an unclosed open tag, attribute with a key/value | |||
input: "junk <ref name=foo" | |||
output: [Text(text="junk <ref name=foo")] | |||
--- | |||
name: incomplete_unclosed_open_attr_quoted | |||
label: incomplete tags: an unclosed open tag, attribute with a key/value, quoted | |||
input: "junk <ref name="foo"" | |||
output: [Text(text="junk <ref name=\"foo\"")] | |||
--- | |||
name: incomplete_open | |||
label: incomplete tags: an open tag | |||
input: "junk <ref>" | |||
output: [Text(text="junk <ref>")] | |||
--- | |||
name: incomplete_open_unnamed_attr | |||
label: incomplete tags: an open tag, unnamed attribute | |||
input: "junk <ref name>" | |||
output: [Text(text="junk <ref name>")] | |||
--- | |||
name: incomplete_open_attr_equals | |||
label: incomplete tags: an open tag, attribute, equal sign | |||
input: "junk <ref name=>" | |||
output: [Text(text="junk <ref name=>")] | |||
--- | |||
name: incomplete_open_attr | |||
label: incomplete tags: an open tag, attribute with a key/value | |||
input: "junk <ref name=foo>" | |||
output: [Text(text="junk <ref name=foo>")] | |||
--- | |||
name: incomplete_open_attr_quoted | |||
label: incomplete tags: an open tag, attribute with a key/value, quoted | |||
input: "junk <ref name="foo">" | |||
output: [Text(text="junk <ref name=\"foo\">")] | |||
--- | |||
name: incomplete_open_text | |||
label: incomplete tags: an open tag, text | |||
input: "junk <ref>foo" | |||
output: [Text(text="junk <ref>foo")] | |||
--- | |||
name: incomplete_open_attr_text | |||
label: incomplete tags: an open tag, attribute with a key/value, text | |||
input: "junk <ref name=foo>bar" | |||
output: [Text(text="junk <ref name=foo>bar")] | |||
--- | |||
name: incomplete_open_text_lbracket | |||
label: incomplete tags: an open tag, text, left open bracket | |||
input: "junk <ref>bar<" | |||
output: [Text(text="junk <ref>bar<")] | |||
--- | |||
name: incomplete_open_text_lbracket_slash | |||
label: incomplete tags: an open tag, text, left bracket, slash | |||
input: "junk <ref>bar</" | |||
output: [Text(text="junk <ref>bar</")] | |||
--- | |||
name: incomplete_open_text_unclosed_close | |||
label: incomplete tags: an open tag, text, unclosed close | |||
input: "junk <ref>bar</ref" | |||
output: [Text(text="junk <ref>bar</ref")] | |||
--- | |||
name: incomplete_open_text_wrong_close | |||
label: incomplete tags: an open tag, text, wrong close | |||
input: "junk <ref>bar</span>" | |||
output: [Text(text="junk <ref>bar</span>")] | |||
--- | |||
name: incomplete_close | |||
label: incomplete tags: a close tag | |||
input: "junk </ref>" | |||
output: [Text(text="junk </ref>")] | |||
--- | |||
name: incomplete_no_tag_name_open | |||
label: incomplete tags: no tag name within brackets; just an open | |||
input: "junk <>" | |||
output: [Text(text="junk <>")] | |||
--- | |||
name: incomplete_no_tag_name_selfclosing | |||
label: incomplete tags: no tag name within brackets; self-closing | |||
input: "junk < />" | |||
output: [Text(text="junk < />")] | |||
--- | |||
name: incomplete_no_tag_name_open_close | |||
label: incomplete tags: no tag name within brackets; open and close | |||
input: "junk <></>" | |||
output: [Text(text="junk <></>")] | |||
--- | |||
name: backslash_premature_before | |||
label: a backslash before a quote before a space | |||
input: "<foo attribute="this is\\" quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_premature_after | |||
label: a backslash before a quote after a space | |||
input: "<foo attribute="this is \\"quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_premature_middle | |||
label: a backslash before a quote in the middle of a word | |||
input: "<foo attribute="this i\\"s quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_adjacent | |||
label: escaped quotes next to unescaped quotes | |||
input: "<foo attribute="\\"this is quoted\\"">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_endquote | |||
label: backslashes before the end quote, causing the attribute to become unquoted | |||
input: "<foo attribute="this_is quoted\\">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), Text(text="\"this_is"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_double | |||
label: two adjacent backslashes, which do *not* affect the quote | |||
input: "<foo attribute="this is\\\\" quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_triple | |||
label: three adjacent backslashes, which do *not* affect the quote | |||
input: "<foo attribute="this is\\\\\\" quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_unaffecting | |||
label: backslashes near quotes, but not immediately adjacent, thus having no effect | |||
input: "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: unparsable | |||
label: a tag that should not be put through the normal parser | |||
input: "{{t1}}<nowiki>{{t2}}</nowiki>{{t3}}" | |||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||
--- | |||
name: unparsable_complex | |||
label: a tag that should not be put through the normal parser; lots of stuff inside | |||
input: "{{t1}}<pre>{{t2}}\n==Heading==\nThis is some text with a [[page|link]].</pre>{{t3}}" | |||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="pre"), TagCloseOpen(padding=""), Text(text="{{t2}}\n==Heading==\nThis is some text with a [[page|link]]."), TagOpenClose(), Text(text="pre"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||
--- | |||
name: unparsable_attributed | |||
label: a tag that should not be put through the normal parser; parsed attributes | |||
input: "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}" | |||
output: [TemplateOpen(), Text(text=u't1'), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||
--- | |||
name: unparsable_incomplete | |||
label: a tag that should not be put through the normal parser; incomplete | |||
input: "{{t1}}<nowiki>{{t2}}{{t3}}" | |||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), Text(text="<nowiki>"), TemplateOpen(), Text(text="t2"), TemplateClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||
--- | |||
name: single_open_close | |||
label: a tag that supports being single; both an open and a close tag | |||
input: "foo<li>bar{{baz}}</li>" | |||
output: [Text(text="foo"), TagOpenOpen(), Text(text="li"), TagCloseOpen(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose(), TagOpenClose(), Text(text="li"), TagCloseClose()] | |||
--- | |||
name: single_open | |||
label: a tag that supports being single; just an open tag | |||
input: "foo<li>bar{{baz}}" | |||
output: [Text(text="foo"), TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: single_selfclose | |||
label: a tag that supports being single; a self-closing tag | |||
input: "foo<li/>bar{{baz}}" | |||
output: [Text(text="foo"), TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: single_close | |||
label: a tag that supports being single; just a close tag | |||
input: "foo</li>bar{{baz}}" | |||
output: [Text(text="foo</li>bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: single_only_open_close | |||
label: a tag that can only be single; both an open and a close tag | |||
input: "foo<br>bar{{baz}}</br>" | |||
output: [Text(text="foo"), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose(), TagOpenOpen(invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True)] | |||
--- | |||
name: single_only_open | |||
label: a tag that can only be single; just an open tag | |||
input: "foo<br>bar{{baz}}" | |||
output: [Text(text="foo"), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: single_only_selfclose | |||
label: a tag that can only be single; a self-closing tag | |||
input: "foo<br/>bar{{baz}}" | |||
output: [Text(text="foo"), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: single_only_close | |||
label: a tag that can only be single; just a close tag | |||
input: "foo</br>bar{{baz}}" | |||
output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseSelfclose(padding="", implicit=True), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: single_only_double | |||
label: a tag that can only be single; a tag with backslashes at the beginning and end | |||
input: "foo</br/>bar{{baz}}" | |||
output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseSelfclose(padding=""), Text(text="bar"), TemplateOpen(), Text(text="baz"), TemplateClose()] |
@@ -23,3 +23,10 @@ name: unicode2 | |||
label: additional unicode check for non-BMP codepoints | |||
input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" | |||
output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] | |||
--- | |||
name: large | |||
label: a lot of text, requiring multiple textbuffer blocks in the C tokenizer | |||
input: "ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN" | |||
output: [Text(text="ZWfsZYcZyhGbkDYJiguJuuhsNyHGFkFhnjkbLJyXIygTHqcXdhsDkEOTSIKYlBiohLIkiXxvyebUyCGvvBcYqFdtcftGmaAanKXEIyYSEKlTfEEbdGhdePVwVImOyKiHSzAEuGyEVRIKPZaNjQsYqpqARIQfvAklFtQyTJVGlLwjJIxYkiqmHBmdOvTyNqJRbMvouoqXRyOhYDwowtkcZGSOcyzVxibQdnzhDYbrgbatUrlOMRvFSzmLWHRihtXnddwYadPgFWUOxAzAgddJVDXHerawdkrRuWaEXfuwQSkQUmLEJUmrgXDVlXCpciaisfuOUjBldElygamkkXbewzLucKRnAEBimIIotXeslRRhnqQjrypnLQvvdCsKFWPVTZaHvzJMFEahDHWcCbyXgxFvknWjhVfiLSDuFhGoFxqSvhjnnRZLmCMhmWeOgSoanDEInKTWHnbpKyUlabLppITDFFxyWKAnUYJQIcmYnrvMmzmtYvsbCYbebgAhMFVVFAKUSvlkLFYluDpbpBaNFWyfXTaOdSBrfiHDTWGBTUCXMqVvRCIMrEjWpQaGsABkioGnveQWqBTDdRQlxQiUipwfyqAocMddXqdvTHhEwjEzMkOSWVPjJvDtClhYwpvRztPmRKCSpGIpXQqrYtTLmShFdpKtOxGtGOZYIdyUGPjdmyvhJTQMtgYJWUUZnecRjBfQXsyWQWikyONySLzLEqRFqcJYdRNFcGwWZtfZasfFWcvdsHRXoqKlKYihRAOJdrPBDdxksXFwKceQVncmFXfUfBsNgjKzoObVExSnRnjegeEhqxXzPmFcuiasViAFeaXrAxXhSfSyCILkKYpjxNeKynUmdcGAbwRwRnlAFbOSCafmzXddiNpLCFTHBELvArdXFpKUGpSHRekhrMedMRNkQzmSyFKjVwiWwCvbNWjgxJRzYeRxHiCCRMXktmKBxbxGZvOpvZIJOwvGIxcBLzsMFlDqAMLtScdsJtrbIUAvKfcdChXGnBzIxGxXMgxJhayrziaCswdpjJJJhkaYnGhHXqZwOzHFdhhUIEtfjERdLaSPRTDDMHpQtonNaIgXUYhjdbnnKppfMBxgNSOOXJAPtFjfAKnrRDrumZBpNhxMstqjTGBViRkDqbTdXYUirsedifGYzZpQkvdNhtFTOPgsYXYCwZHLcSLSfwfpQKtWfZuRUUryHJsbVsAOQcIJdSKKlOvCeEjUQNRPHKXuBJUjPuaAJJxcDMqyaufqfVwUmHLdjeYZzSiiGLHOTCInpVAalbXXTMLugLiwFiyPSuSFiyJUKVrWjbZAHaJtZnQmnvorRrxdPKThqXzNgTjszQiCoMczRnwGYJMERUWGXFyrSbAqsHmLwLlnJOJoXNsjVehQjVOpQOQJAZWwFZBlgyVIplzLTlFwumPgBLYrUIAJAcmvHPGfHfWQguCjfTYzxYfbohaLFAPwxFRrNuCdCzLlEbuhyYjCmuDBTJDMCdLpNRVqEALjnPSaBPsKWRCKNGwEMFpiEWbYZRwaMopjoUuBUvMpvyLfsPKDrfQLiFOQIWPtLIMoijUEUYfhykHrSKbTtrvjwIzHdWZDVwLIpNkloCqpzIsErxxKAFuFEjikWNYChqYqVslXMtoSWzNhbMuxYbzLfJIcPGoUeGPkGyPQNhDyrjgdKekzftFrRPTuyLYqCArkDcWHTrjPQHfoThBNnTQyMwLEWxEnBXLtzJmFVLGEPrdbEwlXpgYfnVnWoNXgPQKKyiXifpvrmJATzQOzYwFhliiYxlbnsEPKbHYUfJLrwYPfSUwTIHiEvBFMrEtVmqJobfcwsiiEudTIiAnrtuywgKLOiMYbEIOAOJdOXqroPjWnQQcTNxFvkIEIsuHLyhSqSphuSmlvknzydQEnebOreeZwOouXYKlObAkaWHhOdTFLoMCHOWrVKeXjcniaxtgCziKEqWOZUWHJQpcDJzYnnduDZrmxgjZroBRwoPBUTJMYipsgJwbTSlvMyXXdAmiEWGMiQxhGvHGPLOKeTxNaLnFVbWpiYIVyqN")] |