@@ -10,6 +10,10 @@ v0.4 (unreleased): | |||
option, RECURSE_OTHERS, which recurses over all children except instances of | |||
'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)` | |||
returns all un-nested templates). | |||
- The parser now understands HTML tag attributes quoted with single quotes. | |||
When setting a tag attribute's value, quotes will be added if necessary. As | |||
part of this, Attribute's 'quoted' attribute has been changed to 'quotes', | |||
and is now either a string or None. | |||
- Calling Template.remove() with a Parameter object that is not part of the | |||
template now raises ValueError instead of doing nothing. | |||
- Parameters with non-integer keys can no longer be created with | |||
@@ -18,6 +18,11 @@ Unreleased | |||
which recurses over all children except instances of *forcetype* (for | |||
example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested | |||
templates). | |||
- The parser now understands HTML tag attributes quoted with single quotes. | |||
When setting a tag attribute's value, quotes will be added if necessary. As | |||
part of this, :py:class:`.Attribute`\ 's :py:attr:`~.Attribute.quoted` | |||
attribute has been changed to :py:attr:`~.Attribute.quotes`, and is now | |||
either a string or ``None``. | |||
- Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that | |||
is not part of the template now raises :py:exc:`ValueError` instead of doing | |||
nothing. | |||
@@ -36,12 +36,14 @@ class Attribute(StringMixIn): | |||
whose value is ``"foo"``. | |||
""" | |||
def __init__(self, name, value=None, quoted=True, pad_first=" ", | |||
def __init__(self, name, value=None, quotes='"', pad_first=" ", | |||
pad_before_eq="", pad_after_eq=""): | |||
super(Attribute, self).__init__() | |||
if not quotes and self._value_needs_quotes(value): | |||
raise ValueError("given value {0!r} requires quotes".format(value)) | |||
self._name = name | |||
self._value = value | |||
self._quoted = quoted | |||
self._quotes = quotes | |||
self._pad_first = pad_first | |||
self._pad_before_eq = pad_before_eq | |||
self._pad_after_eq = pad_after_eq | |||
@@ -50,11 +52,18 @@ class Attribute(StringMixIn): | |||
result = self.pad_first + str(self.name) + self.pad_before_eq | |||
if self.value is not None: | |||
result += "=" + self.pad_after_eq | |||
if self.quoted: | |||
return result + '"' + str(self.value) + '"' | |||
if self.quotes: | |||
return result + self.quotes + str(self.value) + self.quotes | |||
return result + str(self.value) | |||
return result | |||
@staticmethod | |||
def _value_needs_quotes(val): | |||
"""Return the preferred quotes for the given value, or None.""" | |||
if val and any(char.isspace() for char in val): | |||
return ('"' in val and "'" in val) or ("'" if '"' in val else '"') | |||
return None | |||
def _set_padding(self, attr, value): | |||
"""Setter for the value of a padding attribute.""" | |||
if not value: | |||
@@ -65,6 +74,14 @@ class Attribute(StringMixIn): | |||
raise ValueError("padding must be entirely whitespace") | |||
setattr(self, attr, value) | |||
@staticmethod | |||
def coerce_quotes(quotes): | |||
"""Coerce a quote type into an acceptable value, or raise an error.""" | |||
orig, quotes = quotes, str(quotes) if quotes else None | |||
if quotes not in [None, '"', "'"]: | |||
raise ValueError("{0!r} is not a valid quote type".format(orig)) | |||
return quotes | |||
@property | |||
def name(self): | |||
"""The name of the attribute as a :py:class:`~.Wikicode` object.""" | |||
@@ -76,9 +93,9 @@ class Attribute(StringMixIn): | |||
return self._value | |||
@property | |||
def quoted(self): | |||
"""Whether the attribute's value is quoted with double quotes.""" | |||
return self._quoted | |||
def quotes(self): | |||
"""How to enclose the attribute value. ``"``, ``'``, or ``None``.""" | |||
return self._quotes | |||
@property | |||
def pad_first(self): | |||
@@ -101,11 +118,21 @@ class Attribute(StringMixIn): | |||
@value.setter | |||
def value(self, newval): | |||
self._value = None if newval is None else parse_anything(newval) | |||
@quoted.setter | |||
def quoted(self, value): | |||
self._quoted = bool(value) | |||
if newval is None: | |||
self._value = None | |||
else: | |||
code = parse_anything(newval) | |||
quotes = self._value_needs_quotes(code) | |||
if quotes in ['"', "'"] or (quotes is True and not self.quotes): | |||
self._quotes = quotes | |||
self._value = code | |||
@quotes.setter | |||
def quotes(self, value): | |||
value = self.coerce_quotes(value) | |||
if not value and self._value_needs_quotes(self.value): | |||
raise ValueError("attribute value requires quotes") | |||
self._quotes = value | |||
@pad_first.setter | |||
def pad_first(self, value): | |||
@@ -236,21 +236,24 @@ class Tag(Node): | |||
return attr | |||
raise ValueError(name) | |||
def add(self, name, value=None, quoted=True, pad_first=" ", | |||
def add(self, name, value=None, quotes='"', pad_first=" ", | |||
pad_before_eq="", pad_after_eq=""): | |||
"""Add an attribute with the given *name* and *value*. | |||
*name* and *value* can be anything parsable by | |||
:py:func:`.utils.parse_anything`; *value* can be omitted if the | |||
attribute is valueless. *quoted* is a bool telling whether to wrap the | |||
*value* in double quotes (this is recommended). *pad_first*, | |||
*pad_before_eq*, and *pad_after_eq* are whitespace used as padding | |||
before the name, before the equal sign (or after the name if no value), | |||
and after the equal sign (ignored if no value), respectively. | |||
attribute is valueless. If *quotes* is not ``None``, it should be a | |||
string (either ``"`` or ``'``) that *value* will be wrapped in (this is | |||
recommended). ``None`` is only legal if *value* contains no spacing. | |||
*pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as | |||
padding before the name, before the equal sign (or after the name if no | |||
value), and after the equal sign (ignored if no value), respectively. | |||
""" | |||
if value is not None: | |||
value = parse_anything(value) | |||
attr = Attribute(parse_anything(name), value, quoted) | |||
quotes = Attribute.coerce_quotes(quotes) | |||
attr = Attribute(parse_anything(name), value, quotes) | |||
attr.pad_first = pad_first | |||
attr.pad_before_eq = pad_before_eq | |||
attr.pad_after_eq = pad_after_eq | |||
@@ -193,7 +193,7 @@ class Builder(object): | |||
def _handle_attribute(self, start): | |||
"""Handle a case where a tag attribute is at the head of the tokens.""" | |||
name, quoted = None, False | |||
name = quotes = None | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
@@ -201,7 +201,7 @@ class Builder(object): | |||
name = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.TagAttrQuote): | |||
quoted = True | |||
quotes = token.char | |||
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, | |||
tokens.TagCloseSelfclose)): | |||
self._tokens.append(token) | |||
@@ -209,7 +209,7 @@ class Builder(object): | |||
value = self._pop() | |||
else: | |||
name, value = self._pop(), None | |||
return Attribute(name, value, quoted, start.pad_first, | |||
return Attribute(name, value, quotes, start.pad_first, | |||
start.pad_before_eq, start.pad_after_eq) | |||
else: | |||
self._write(self._handle_token(token)) | |||
@@ -173,7 +173,7 @@ static TagData* TagData_new(void) | |||
ALLOC_BUFFER(self->pad_first) | |||
ALLOC_BUFFER(self->pad_before_eq) | |||
ALLOC_BUFFER(self->pad_after_eq) | |||
self->reset = 0; | |||
self->quoter = self->reset = 0; | |||
return self; | |||
} | |||
@@ -1566,10 +1566,18 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||
*/ | |||
static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) | |||
{ | |||
PyObject *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq; | |||
PyObject *tokens, *kwargs, *tmp, *pad_first, *pad_before_eq, *pad_after_eq; | |||
if (data->context & TAG_QUOTED) { | |||
if (Tokenizer_emit_first(self, TagAttrQuote)) | |||
kwargs = PyDict_New(); | |||
if (!kwargs) | |||
return -1; | |||
tmp = PyUnicode_FromUnicode(&data->quoter, 1); | |||
if (!tmp) | |||
return -1; | |||
PyDict_SetItemString(kwargs, "char", tmp); | |||
Py_DECREF(tmp); | |||
if (Tokenizer_emit_first_kwargs(self, TagAttrQuote, kwargs)) | |||
return -1; | |||
tokens = Tokenizer_pop(self); | |||
if (!tokens) | |||
@@ -1721,16 +1729,17 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||
Tokenizer_READ_BACKWARDS(self, 2) != '\\'); | |||
if (data->context & TAG_NOTE_QUOTE) { | |||
data->context ^= TAG_NOTE_QUOTE; | |||
if (chunk == '"' && !escaped) { | |||
if ((chunk == '"' || chunk == '\'') && !escaped) { | |||
data->context |= TAG_QUOTED; | |||
data->quoter = chunk; | |||
data->reset = self->head; | |||
if (Tokenizer_push(self, self->topstack->context)) | |||
return -1; | |||
data->reset = self->head; | |||
return 0; | |||
} | |||
} | |||
else if (data->context & TAG_QUOTED) { | |||
if (chunk == '"' && !escaped) { | |||
if (chunk == data->quoter && !escaped) { | |||
data->context |= TAG_NOTE_SPACE; | |||
return 0; | |||
} | |||
@@ -206,6 +206,7 @@ typedef struct { | |||
struct Textbuffer* pad_first; | |||
struct Textbuffer* pad_before_eq; | |||
struct Textbuffer* pad_after_eq; | |||
Py_UNICODE quoter; | |||
Py_ssize_t reset; | |||
} TagData; | |||
@@ -53,6 +53,7 @@ class _TagOpenData(object): | |||
def __init__(self): | |||
self.context = self.CX_NAME | |||
self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""} | |||
self.quoter = None | |||
self.reset = 0 | |||
@@ -66,7 +67,7 @@ class Tokenizer(object): | |||
MAX_DEPTH = 40 | |||
MAX_CYCLES = 100000 | |||
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | |||
tag_splitter = re.compile(r"([\s\"\\]+)") | |||
tag_splitter = re.compile(r"([\s\"\'\\]+)") | |||
def __init__(self): | |||
self._text = None | |||
@@ -612,7 +613,7 @@ class Tokenizer(object): | |||
def _push_tag_buffer(self, data): | |||
"""Write a pending tag attribute from *data* to the stack.""" | |||
if data.context & data.CX_QUOTED: | |||
self._emit_first(tokens.TagAttrQuote()) | |||
self._emit_first(tokens.TagAttrQuote(char=data.quoter)) | |||
self._emit_all(self._pop()) | |||
buf = data.padding_buffer | |||
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | |||
@@ -689,13 +690,14 @@ class Tokenizer(object): | |||
escaped = self._read(-1) == "\\" and self._read(-2) != "\\" | |||
if data.context & data.CX_NOTE_QUOTE: | |||
data.context ^= data.CX_NOTE_QUOTE | |||
if chunk == '"' and not escaped: | |||
if chunk in "'\"" and not escaped: | |||
data.context |= data.CX_QUOTED | |||
self._push(self._context) | |||
data.quoter = chunk | |||
data.reset = self._head | |||
self._push(self._context) | |||
continue | |||
elif data.context & data.CX_QUOTED: | |||
if chunk == '"' and not escaped: | |||
if chunk == data.quoter and not escaped: | |||
data.context |= data.CX_NOTE_SPACE | |||
continue | |||
self._handle_tag_text(chunk) | |||
@@ -100,7 +100,7 @@ CommentEnd = make("CommentEnd") # --> | |||
TagOpenOpen = make("TagOpenOpen") # < | |||
TagAttrStart = make("TagAttrStart") | |||
TagAttrEquals = make("TagAttrEquals") # = | |||
TagAttrQuote = make("TagAttrQuote") # " | |||
TagAttrQuote = make("TagAttrQuote") # ", ' | |||
TagCloseOpen = make("TagCloseOpen") # > | |||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||
TagOpenClose = make("TagOpenClose") # </ | |||
@@ -98,7 +98,7 @@ class TreeEqualityTestCase(TestCase): | |||
self.assertWikicodeEqual(exp_attr.name, act_attr.name) | |||
if exp_attr.value is not None: | |||
self.assertWikicodeEqual(exp_attr.value, act_attr.value) | |||
self.assertIs(exp_attr.quoted, act_attr.quoted) | |||
self.assertEqual(exp_attr.quotes, act_attr.quotes) | |||
self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | |||
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | |||
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | |||
@@ -42,12 +42,14 @@ class TestAttribute(TreeEqualityTestCase): | |||
self.assertEqual(" foo", str(node)) | |||
node2 = Attribute(wraptext("foo"), wraptext("bar")) | |||
self.assertEqual(' foo="bar"', str(node2)) | |||
node3 = Attribute(wraptext("a"), wraptext("b"), True, "", " ", " ") | |||
node3 = Attribute(wraptext("a"), wraptext("b"), '"', "", " ", " ") | |||
self.assertEqual('a = "b"', str(node3)) | |||
node3 = Attribute(wraptext("a"), wraptext("b"), False, "", " ", " ") | |||
self.assertEqual("a = b", str(node3)) | |||
node4 = Attribute(wraptext("a"), wrap([]), False, " ", "", " ") | |||
self.assertEqual(" a= ", str(node4)) | |||
node4 = Attribute(wraptext("a"), wraptext("b"), "'", "", " ", " ") | |||
self.assertEqual("a = 'b'", str(node4)) | |||
node5 = Attribute(wraptext("a"), wraptext("b"), None, "", " ", " ") | |||
self.assertEqual("a = b", str(node5)) | |||
node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") | |||
self.assertEqual(" a= ", str(node6)) | |||
def test_name(self): | |||
"""test getter/setter for the name attribute""" | |||
@@ -66,17 +68,35 @@ class TestAttribute(TreeEqualityTestCase): | |||
self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value) | |||
node.value = None | |||
self.assertIs(None, node.value) | |||
node2 = Attribute(wraptext("id"), wraptext("foo"), None) | |||
node2.value = "foo bar baz" | |||
self.assertWikicodeEqual(wraptext("foo bar baz"), node2.value) | |||
self.assertEqual('"', node2.quotes) | |||
node2.value = 'foo "bar" baz' | |||
self.assertWikicodeEqual(wraptext('foo "bar" baz'), node2.value) | |||
self.assertEqual("'", node2.quotes) | |||
node2.value = "foo 'bar' baz" | |||
self.assertWikicodeEqual(wraptext("foo 'bar' baz"), node2.value) | |||
self.assertEqual('"', node2.quotes) | |||
node2.value = "fo\"o 'bar' b\"az" | |||
self.assertWikicodeEqual(wraptext("fo\"o 'bar' b\"az"), node2.value) | |||
self.assertEqual('"', node2.quotes) | |||
def test_quoted(self): | |||
"""test getter/setter for the quoted attribute""" | |||
node1 = Attribute(wraptext("id"), wraptext("foo"), False) | |||
def test_quotes(self): | |||
"""test getter/setter for the quotes attribute""" | |||
node1 = Attribute(wraptext("id"), wraptext("foo"), None) | |||
node2 = Attribute(wraptext("id"), wraptext("bar")) | |||
self.assertFalse(node1.quoted) | |||
self.assertTrue(node2.quoted) | |||
node1.quoted = True | |||
node2.quoted = "" | |||
self.assertTrue(node1.quoted) | |||
self.assertFalse(node2.quoted) | |||
node3 = Attribute(wraptext("id"), wraptext("foo bar baz")) | |||
self.assertIs(None, node1.quotes) | |||
self.assertEqual('"', node2.quotes) | |||
node1.quotes = "'" | |||
node2.quotes = None | |||
self.assertEqual("'", node1.quotes) | |||
self.assertIs(None, node2.quotes) | |||
self.assertRaises(ValueError, setattr, node1, "quotes", "foobar") | |||
self.assertRaises(ValueError, setattr, node3, "quotes", None) | |||
self.assertRaises(ValueError, Attribute, wraptext("id"), | |||
wraptext("foo bar baz"), None) | |||
def test_padding(self): | |||
"""test getter/setter for the padding attributes""" | |||
@@ -270,7 +270,7 @@ class TestBuilder(TreeEqualityTestCase): | |||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | |||
pad_after_eq=""), | |||
tokens.Text(text="name"), tokens.TagAttrEquals(), | |||
tokens.TagAttrQuote(), tokens.Text(text="abc"), | |||
tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"), | |||
tokens.TagCloseSelfclose(padding=" ")], | |||
wrap([Tag(wraptext("ref"), | |||
attrs=[Attribute(wraptext("name"), wraptext("abc"))], | |||
@@ -298,7 +298,7 @@ class TestBuilder(TreeEqualityTestCase): | |||
wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])), | |||
# <ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} | |||
# mno = "{{p}} [[q]] {{r}}">[[Source]]</ref> | |||
# mno = '{{p}} [[q]] {{r}}'>[[Source]]</ref> | |||
([tokens.TagOpenOpen(), tokens.Text(text="ref"), | |||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | |||
pad_after_eq=""), | |||
@@ -308,7 +308,7 @@ class TestBuilder(TreeEqualityTestCase): | |||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | |||
pad_after_eq=""), | |||
tokens.Text(text="foo"), tokens.TagAttrEquals(), | |||
tokens.TagAttrQuote(), tokens.Text(text="bar "), | |||
tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "), | |||
tokens.TemplateOpen(), tokens.Text(text="baz"), | |||
tokens.TemplateClose(), | |||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | |||
@@ -326,7 +326,7 @@ class TestBuilder(TreeEqualityTestCase): | |||
tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ", | |||
pad_after_eq=" "), | |||
tokens.Text(text="mno"), tokens.TagAttrEquals(), | |||
tokens.TagAttrQuote(), tokens.TemplateOpen(), | |||
tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(), | |||
tokens.Text(text="p"), tokens.TemplateClose(), | |||
tokens.Text(text=" "), tokens.WikilinkOpen(), | |||
tokens.Text(text="q"), tokens.WikilinkClose(), | |||
@@ -338,17 +338,17 @@ class TestBuilder(TreeEqualityTestCase): | |||
tokens.TagCloseClose()], | |||
wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ | |||
Attribute(wraptext("name"), | |||
wrap([Template(wraptext("abc"))]), False), | |||
wrap([Template(wraptext("abc"))]), None), | |||
Attribute(wraptext("foo"), wrap([Text("bar "), | |||
Template(wraptext("baz"))]), pad_first=" "), | |||
Attribute(wraptext("abc"), wrap([Template(wraptext("de")), | |||
Text("f")]), False), | |||
Text("f")]), None), | |||
Attribute(wraptext("ghi"), wrap([Text("j"), | |||
Template(wraptext("k")), | |||
Template(wraptext("l"))]), False), | |||
Template(wraptext("l"))]), None), | |||
Attribute(wraptext("mno"), wrap([Template(wraptext("p")), | |||
Text(" "), Wikilink(wraptext("q")), Text(" "), | |||
Template(wraptext("r"))]), True, " \n ", " ", | |||
Template(wraptext("r"))]), "'", " \n ", " ", | |||
" ")])])), | |||
# "''italic text''" | |||
@@ -34,9 +34,9 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) | |||
agennv = lambda name: Attribute(wraptext(name)) | |||
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) | |||
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c) | |||
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c) | |||
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) | |||
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) | |||
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) | |||
class TestTag(TreeEqualityTestCase): | |||
"""Test cases for the Tag node.""" | |||
@@ -276,28 +276,33 @@ class TestTag(TreeEqualityTestCase): | |||
"""test Tag.add()""" | |||
node = Tag(wraptext("ref"), wraptext("cite")) | |||
node.add("name", "value") | |||
node.add("name", "value", quoted=False) | |||
node.add("name", "value", quotes=None) | |||
node.add("name", "value", quotes="'") | |||
node.add("name") | |||
node.add(1, False) | |||
node.add("style", "{{foobar}}") | |||
node.add("name", "value", True, "\n", " ", " ") | |||
node.add("name", "value", '"', "\n", " ", " ") | |||
attr1 = ' name="value"' | |||
attr2 = " name=value" | |||
attr3 = " name" | |||
attr4 = ' 1="False"' | |||
attr5 = ' style="{{foobar}}"' | |||
attr6 = '\nname = "value"' | |||
attr3 = " name='value'" | |||
attr4 = " name" | |||
attr5 = ' 1="False"' | |||
attr6 = ' style="{{foobar}}"' | |||
attr7 = '\nname = "value"' | |||
self.assertEqual(attr1, node.attributes[0]) | |||
self.assertEqual(attr2, node.attributes[1]) | |||
self.assertEqual(attr3, node.attributes[2]) | |||
self.assertEqual(attr4, node.attributes[3]) | |||
self.assertEqual(attr5, node.attributes[4]) | |||
self.assertEqual(attr6, node.attributes[5]) | |||
self.assertEqual(attr6, node.get("name")) | |||
self.assertEqual(attr7, node.attributes[6]) | |||
self.assertEqual(attr7, node.get("name")) | |||
self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]), | |||
node.attributes[4].value) | |||
node.attributes[5].value) | |||
self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5, | |||
attr6, ">cite</ref>")), node) | |||
attr6, attr7, ">cite</ref>")), node) | |||
self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar") | |||
self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None) | |||
def test_remove(self): | |||
"""test Tag.remove()""" | |||
@@ -43,7 +43,7 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t | |||
name: rich_tags | |||
label: a HTML tag with tons of other things in it | |||
input: "{{dubious claim}}<ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>" | |||
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
@@ -57,7 +57,14 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before | |||
name: attribute_quoted | |||
label: a tag with a single quoted attribute | |||
input: "<ref name="foo bar"></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_single_quoted | |||
label: a tag with a single singly-quoted attribute | |||
input: "<ref name='foo bar'></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
@@ -71,7 +78,7 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before | |||
name: attribute_quoted_hyphen | |||
label: a tag with a single quoted attribute, containing a hyphen | |||
input: "<ref name="foo-bar"></ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
@@ -92,21 +99,21 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before | |||
name: attribute_selfclosing_value_quoted | |||
label: a self-closing tag with a single quoted attribute | |||
input: "<ref name="foo"/>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: nested_tag | |||
label: a tag nested within the attributes of another | |||
input: "<ref name=<span style="color: red;">foo</span>>citation</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: nested_tag_quoted | |||
label: a tag nested within the attributes of another, quoted | |||
input: "<ref name="<span style="color: red;">foo</span>">citation</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
@@ -120,7 +127,7 @@ output: [Text(text="<ref name=</ ><//>>citation</ref>")] | |||
name: nested_troll_tag_quoted | |||
label: a bogus tag that appears to be nested within the attributes of another, quoted | |||
input: "<ref name="</ ><//>">citation</ref>" | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
@@ -222,6 +229,27 @@ output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_befor | |||
--- | |||
name: quotes_in_quotes | |||
label: singly-quoted text inside a doubly-quoted attribute | |||
input: "<span foo="bar 'baz buzz' biz">stuff</span>" | |||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar 'baz buzz' biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||
--- | |||
name: quotes_in_quotes_2 | |||
label: doubly-quoted text inside a singly-quoted attribute | |||
input: "<span foo='bar "baz buzz" biz'>stuff</span>" | |||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||
--- | |||
name: quotes_in_quotes_3 | |||
label: doubly-quoted text inside a singly-quoted attribute, with backslashes | |||
input: "<span foo='bar "baz buzz\\" biz'>stuff</span>" | |||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\\\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||
--- | |||
name: incomplete_lbracket | |||
label: incomplete tags: just a left bracket | |||
input: "<" | |||
@@ -407,28 +435,28 @@ output: [Text(text="junk <></>")] | |||
name: backslash_premature_before | |||
label: a backslash before a quote before a space | |||
input: "<foo attribute="this is\\" quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_premature_after | |||
label: a backslash before a quote after a space | |||
input: "<foo attribute="this is \\"quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_premature_middle | |||
label: a backslash before a quote in the middle of a word | |||
input: "<foo attribute="this i\\"s quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_adjacent | |||
label: escaped quotes next to unescaped quotes | |||
input: "<foo attribute="\\"this is quoted\\"">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
@@ -442,21 +470,21 @@ output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before | |||
name: backslash_double | |||
label: two adjacent backslashes, which do *not* affect the quote | |||
input: "<foo attribute="this is\\\\" quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_triple | |||
label: three adjacent backslashes, which do *not* affect the quote | |||
input: "<foo attribute="this is\\\\\\" quoted">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
name: backslash_unaffecting | |||
label: backslashes near quotes, but not immediately adjacent, thus having no effect | |||
input: "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>" | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||
--- | |||
@@ -477,7 +505,7 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(t | |||
name: unparsable_attributed | |||
label: a tag that should not be put through the normal parser; parsed attributes | |||
input: "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}" | |||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||
--- | |||
@@ -575,7 +603,7 @@ output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseS | |||
name: single_only_close_attribute | |||
label: a tag that can only be single; presented as a close tag with an attribute | |||
input: "</br id="break">" | |||
output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)] | |||
output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)] | |||
--- | |||