diff --git a/CHANGELOG b/CHANGELOG
index 1200575..f7dcb8a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,10 @@ v0.4 (unreleased):
option, RECURSE_OTHERS, which recurses over all children except instances of
'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)`
returns all un-nested templates).
+- The parser now understands HTML tag attributes quoted with single quotes.
+ When setting a tag attribute's value, quotes will be added if necessary. As
+ part of this, Attribute's 'quoted' attribute has been changed to 'quotes',
+ and is now either a string or None.
- Calling Template.remove() with a Parameter object that is not part of the
template now raises ValueError instead of doing nothing.
- Parameters with non-integer keys can no longer be created with
diff --git a/docs/changelog.rst b/docs/changelog.rst
index ba26722..3bc4ce7 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -18,6 +18,11 @@ Unreleased
which recurses over all children except instances of *forcetype* (for
example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
templates).
+- The parser now understands HTML tag attributes quoted with single quotes.
+ When setting a tag attribute's value, quotes will be added if necessary. As
+ part of this, :py:class:`.Attribute`\ 's :py:attr:`~.Attribute.quoted`
+ attribute has been changed to :py:attr:`~.Attribute.quotes`, and is now
+ either a string or ``None``.
- Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that
is not part of the template now raises :py:exc:`ValueError` instead of doing
nothing.
diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py
index 4b7c668..6256138 100644
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -36,12 +36,14 @@ class Attribute(StringMixIn):
whose value is ``"foo"``.
"""
- def __init__(self, name, value=None, quoted=True, pad_first=" ",
+ def __init__(self, name, value=None, quotes='"', pad_first=" ",
pad_before_eq="", pad_after_eq=""):
super(Attribute, self).__init__()
+ if not quotes and self._value_needs_quotes(value):
+ raise ValueError("given value {0!r} requires quotes".format(value))
self._name = name
self._value = value
- self._quoted = quoted
+ self._quotes = quotes
self._pad_first = pad_first
self._pad_before_eq = pad_before_eq
self._pad_after_eq = pad_after_eq
@@ -50,11 +52,18 @@ class Attribute(StringMixIn):
result = self.pad_first + str(self.name) + self.pad_before_eq
if self.value is not None:
result += "=" + self.pad_after_eq
- if self.quoted:
- return result + '"' + str(self.value) + '"'
+ if self.quotes:
+ return result + self.quotes + str(self.value) + self.quotes
return result + str(self.value)
return result
+ @staticmethod
+ def _value_needs_quotes(val):
+ """Return the preferred quotes for the given value, or None."""
+ if val and any(char.isspace() for char in val):
+ return ('"' in val and "'" in val) or ("'" if '"' in val else '"')
+ return None
+
def _set_padding(self, attr, value):
"""Setter for the value of a padding attribute."""
if not value:
@@ -65,6 +74,14 @@ class Attribute(StringMixIn):
raise ValueError("padding must be entirely whitespace")
setattr(self, attr, value)
+ @staticmethod
+ def coerce_quotes(quotes):
+ """Coerce a quote type into an acceptable value, or raise an error."""
+ orig, quotes = quotes, str(quotes) if quotes else None
+ if quotes not in [None, '"', "'"]:
+ raise ValueError("{0!r} is not a valid quote type".format(orig))
+ return quotes
+
@property
def name(self):
"""The name of the attribute as a :py:class:`~.Wikicode` object."""
@@ -76,9 +93,9 @@ class Attribute(StringMixIn):
return self._value
@property
- def quoted(self):
- """Whether the attribute's value is quoted with double quotes."""
- return self._quoted
+ def quotes(self):
+ """How to enclose the attribute value. ``"``, ``'``, or ``None``."""
+ return self._quotes
@property
def pad_first(self):
@@ -101,11 +118,21 @@ class Attribute(StringMixIn):
@value.setter
def value(self, newval):
- self._value = None if newval is None else parse_anything(newval)
-
- @quoted.setter
- def quoted(self, value):
- self._quoted = bool(value)
+ if newval is None:
+ self._value = None
+ else:
+ code = parse_anything(newval)
+ quotes = self._value_needs_quotes(code)
+ if quotes in ['"', "'"] or (quotes is True and not self.quotes):
+ self._quotes = quotes
+ self._value = code
+
+ @quotes.setter
+ def quotes(self, value):
+ value = self.coerce_quotes(value)
+ if not value and self._value_needs_quotes(self.value):
+ raise ValueError("attribute value requires quotes")
+ self._quotes = value
@pad_first.setter
def pad_first(self, value):
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index f283d46..1b8efb8 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -236,21 +236,24 @@ class Tag(Node):
return attr
raise ValueError(name)
- def add(self, name, value=None, quoted=True, pad_first=" ",
+ def add(self, name, value=None, quotes='"', pad_first=" ",
pad_before_eq="", pad_after_eq=""):
"""Add an attribute with the given *name* and *value*.
*name* and *value* can be anything parsable by
:py:func:`.utils.parse_anything`; *value* can be omitted if the
- attribute is valueless. *quoted* is a bool telling whether to wrap the
- *value* in double quotes (this is recommended). *pad_first*,
- *pad_before_eq*, and *pad_after_eq* are whitespace used as padding
- before the name, before the equal sign (or after the name if no value),
- and after the equal sign (ignored if no value), respectively.
+ attribute is valueless. If *quotes* is not ``None``, it should be a
+ string (either ``"`` or ``'``) that *value* will be wrapped in (this is
+ recommended). ``None`` is only legal if *value* contains no spacing.
+
+ *pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as
+ padding before the name, before the equal sign (or after the name if no
+ value), and after the equal sign (ignored if no value), respectively.
"""
if value is not None:
value = parse_anything(value)
- attr = Attribute(parse_anything(name), value, quoted)
+ quotes = Attribute.coerce_quotes(quotes)
+ attr = Attribute(parse_anything(name), value, quotes)
attr.pad_first = pad_first
attr.pad_before_eq = pad_before_eq
attr.pad_after_eq = pad_after_eq
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index 559bd54..c9a930b 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -193,7 +193,7 @@ class Builder(object):
def _handle_attribute(self, start):
"""Handle a case where a tag attribute is at the head of the tokens."""
- name, quoted = None, False
+ name = quotes = None
self._push()
while self._tokens:
token = self._tokens.pop()
@@ -201,7 +201,7 @@ class Builder(object):
name = self._pop()
self._push()
elif isinstance(token, tokens.TagAttrQuote):
- quoted = True
+ quotes = token.char
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen,
tokens.TagCloseSelfclose)):
self._tokens.append(token)
@@ -209,7 +209,7 @@ class Builder(object):
value = self._pop()
else:
name, value = self._pop(), None
- return Attribute(name, value, quoted, start.pad_first,
+ return Attribute(name, value, quotes, start.pad_first,
start.pad_before_eq, start.pad_after_eq)
else:
self._write(self._handle_token(token))
diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c
index 963e7d7..4c6414e 100644
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -173,7 +173,7 @@ static TagData* TagData_new(void)
ALLOC_BUFFER(self->pad_first)
ALLOC_BUFFER(self->pad_before_eq)
ALLOC_BUFFER(self->pad_after_eq)
- self->reset = 0;
+ self->quoter = self->reset = 0;
return self;
}
@@ -1566,10 +1566,18 @@ static int Tokenizer_parse_comment(Tokenizer* self)
*/
static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
{
- PyObject *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq;
+ PyObject *tokens, *kwargs, *tmp, *pad_first, *pad_before_eq, *pad_after_eq;
if (data->context & TAG_QUOTED) {
- if (Tokenizer_emit_first(self, TagAttrQuote))
+ kwargs = PyDict_New();
+ if (!kwargs)
+ return -1;
+ tmp = PyUnicode_FromUnicode(&data->quoter, 1);
+ if (!tmp)
+ return -1;
+ PyDict_SetItemString(kwargs, "char", tmp);
+ Py_DECREF(tmp);
+ if (Tokenizer_emit_first_kwargs(self, TagAttrQuote, kwargs))
return -1;
tokens = Tokenizer_pop(self);
if (!tokens)
@@ -1721,16 +1729,17 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
Tokenizer_READ_BACKWARDS(self, 2) != '\\');
if (data->context & TAG_NOTE_QUOTE) {
data->context ^= TAG_NOTE_QUOTE;
- if (chunk == '"' && !escaped) {
+ if ((chunk == '"' || chunk == '\'') && !escaped) {
data->context |= TAG_QUOTED;
+ data->quoter = chunk;
+ data->reset = self->head;
if (Tokenizer_push(self, self->topstack->context))
return -1;
- data->reset = self->head;
return 0;
}
}
else if (data->context & TAG_QUOTED) {
- if (chunk == '"' && !escaped) {
+ if (chunk == data->quoter && !escaped) {
data->context |= TAG_NOTE_SPACE;
return 0;
}
diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h
index 4312e2f..dde6464 100644
--- a/mwparserfromhell/parser/tokenizer.h
+++ b/mwparserfromhell/parser/tokenizer.h
@@ -206,6 +206,7 @@ typedef struct {
struct Textbuffer* pad_first;
struct Textbuffer* pad_before_eq;
struct Textbuffer* pad_after_eq;
+ Py_UNICODE quoter;
Py_ssize_t reset;
} TagData;
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 6430f0f..4422b5c 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -53,6 +53,7 @@ class _TagOpenData(object):
def __init__(self):
self.context = self.CX_NAME
self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""}
+ self.quoter = None
self.reset = 0
@@ -66,7 +67,7 @@ class Tokenizer(object):
MAX_DEPTH = 40
MAX_CYCLES = 100000
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
- tag_splitter = re.compile(r"([\s\"\\]+)")
+ tag_splitter = re.compile(r"([\s\"\'\\]+)")
def __init__(self):
self._text = None
@@ -612,7 +613,7 @@ class Tokenizer(object):
def _push_tag_buffer(self, data):
"""Write a pending tag attribute from *data* to the stack."""
if data.context & data.CX_QUOTED:
- self._emit_first(tokens.TagAttrQuote())
+ self._emit_first(tokens.TagAttrQuote(char=data.quoter))
self._emit_all(self._pop())
buf = data.padding_buffer
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"],
@@ -689,13 +690,14 @@ class Tokenizer(object):
escaped = self._read(-1) == "\\" and self._read(-2) != "\\"
if data.context & data.CX_NOTE_QUOTE:
data.context ^= data.CX_NOTE_QUOTE
- if chunk == '"' and not escaped:
+ if chunk in "'\"" and not escaped:
data.context |= data.CX_QUOTED
- self._push(self._context)
+ data.quoter = chunk
data.reset = self._head
+ self._push(self._context)
continue
elif data.context & data.CX_QUOTED:
- if chunk == '"' and not escaped:
+ if chunk == data.quoter and not escaped:
data.context |= data.CX_NOTE_SPACE
continue
self._handle_tag_text(chunk)
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index c7cc3ef..e567731 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -100,7 +100,7 @@ CommentEnd = make("CommentEnd") # -->
TagOpenOpen = make("TagOpenOpen") # <
TagAttrStart = make("TagAttrStart")
TagAttrEquals = make("TagAttrEquals") # =
-TagAttrQuote = make("TagAttrQuote") # "
+TagAttrQuote = make("TagAttrQuote") # ", '
TagCloseOpen = make("TagCloseOpen") # >
TagCloseSelfclose = make("TagCloseSelfclose") # />
TagOpenClose = make("TagOpenClose") #
diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py
index 10d491e..bb713c2 100644
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -98,7 +98,7 @@ class TreeEqualityTestCase(TestCase):
self.assertWikicodeEqual(exp_attr.name, act_attr.name)
if exp_attr.value is not None:
self.assertWikicodeEqual(exp_attr.value, act_attr.value)
- self.assertIs(exp_attr.quoted, act_attr.quoted)
+ self.assertEqual(exp_attr.quotes, act_attr.quotes)
self.assertEqual(exp_attr.pad_first, act_attr.pad_first)
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq)
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq)
diff --git a/tests/test_attribute.py b/tests/test_attribute.py
index 50eed74..15e546d 100644
--- a/tests/test_attribute.py
+++ b/tests/test_attribute.py
@@ -42,12 +42,14 @@ class TestAttribute(TreeEqualityTestCase):
self.assertEqual(" foo", str(node))
node2 = Attribute(wraptext("foo"), wraptext("bar"))
self.assertEqual(' foo="bar"', str(node2))
- node3 = Attribute(wraptext("a"), wraptext("b"), True, "", " ", " ")
+ node3 = Attribute(wraptext("a"), wraptext("b"), '"', "", " ", " ")
self.assertEqual('a = "b"', str(node3))
- node3 = Attribute(wraptext("a"), wraptext("b"), False, "", " ", " ")
- self.assertEqual("a = b", str(node3))
- node4 = Attribute(wraptext("a"), wrap([]), False, " ", "", " ")
- self.assertEqual(" a= ", str(node4))
+ node4 = Attribute(wraptext("a"), wraptext("b"), "'", "", " ", " ")
+ self.assertEqual("a = 'b'", str(node4))
+ node5 = Attribute(wraptext("a"), wraptext("b"), None, "", " ", " ")
+ self.assertEqual("a = b", str(node5))
+ node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ")
+ self.assertEqual(" a= ", str(node6))
def test_name(self):
"""test getter/setter for the name attribute"""
@@ -66,17 +68,35 @@ class TestAttribute(TreeEqualityTestCase):
self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value)
node.value = None
self.assertIs(None, node.value)
+ node2 = Attribute(wraptext("id"), wraptext("foo"), None)
+ node2.value = "foo bar baz"
+ self.assertWikicodeEqual(wraptext("foo bar baz"), node2.value)
+ self.assertEqual('"', node2.quotes)
+ node2.value = 'foo "bar" baz'
+ self.assertWikicodeEqual(wraptext('foo "bar" baz'), node2.value)
+ self.assertEqual("'", node2.quotes)
+ node2.value = "foo 'bar' baz"
+ self.assertWikicodeEqual(wraptext("foo 'bar' baz"), node2.value)
+ self.assertEqual('"', node2.quotes)
+ node2.value = "fo\"o 'bar' b\"az"
+ self.assertWikicodeEqual(wraptext("fo\"o 'bar' b\"az"), node2.value)
+ self.assertEqual('"', node2.quotes)
- def test_quoted(self):
- """test getter/setter for the quoted attribute"""
- node1 = Attribute(wraptext("id"), wraptext("foo"), False)
+ def test_quotes(self):
+ """test getter/setter for the quotes attribute"""
+ node1 = Attribute(wraptext("id"), wraptext("foo"), None)
node2 = Attribute(wraptext("id"), wraptext("bar"))
- self.assertFalse(node1.quoted)
- self.assertTrue(node2.quoted)
- node1.quoted = True
- node2.quoted = ""
- self.assertTrue(node1.quoted)
- self.assertFalse(node2.quoted)
+ node3 = Attribute(wraptext("id"), wraptext("foo bar baz"))
+ self.assertIs(None, node1.quotes)
+ self.assertEqual('"', node2.quotes)
+ node1.quotes = "'"
+ node2.quotes = None
+ self.assertEqual("'", node1.quotes)
+ self.assertIs(None, node2.quotes)
+ self.assertRaises(ValueError, setattr, node1, "quotes", "foobar")
+ self.assertRaises(ValueError, setattr, node3, "quotes", None)
+ self.assertRaises(ValueError, Attribute, wraptext("id"),
+ wraptext("foo bar baz"), None)
def test_padding(self):
"""test getter/setter for the padding attributes"""
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 58e3d1e..8f71ede 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -270,7 +270,7 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
pad_after_eq=""),
tokens.Text(text="name"), tokens.TagAttrEquals(),
- tokens.TagAttrQuote(), tokens.Text(text="abc"),
+ tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"),
tokens.TagCloseSelfclose(padding=" ")],
wrap([Tag(wraptext("ref"),
attrs=[Attribute(wraptext("name"), wraptext("abc"))],
@@ -298,7 +298,7 @@ class TestBuilder(TreeEqualityTestCase):
wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])),
# [[[Source]]]
+ # mno = '{{p}} [[q]] {{r}}'>[[Source]]
([tokens.TagOpenOpen(), tokens.Text(text="ref"),
tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
pad_after_eq=""),
@@ -308,7 +308,7 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
pad_after_eq=""),
tokens.Text(text="foo"), tokens.TagAttrEquals(),
- tokens.TagAttrQuote(), tokens.Text(text="bar "),
+ tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "),
tokens.TemplateOpen(), tokens.Text(text="baz"),
tokens.TemplateClose(),
tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
@@ -326,7 +326,7 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ",
pad_after_eq=" "),
tokens.Text(text="mno"), tokens.TagAttrEquals(),
- tokens.TagAttrQuote(), tokens.TemplateOpen(),
+ tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(),
tokens.Text(text="p"), tokens.TemplateClose(),
tokens.Text(text=" "), tokens.WikilinkOpen(),
tokens.Text(text="q"), tokens.WikilinkClose(),
@@ -338,17 +338,17 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagCloseClose()],
wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [
Attribute(wraptext("name"),
- wrap([Template(wraptext("abc"))]), False),
+ wrap([Template(wraptext("abc"))]), None),
Attribute(wraptext("foo"), wrap([Text("bar "),
Template(wraptext("baz"))]), pad_first=" "),
Attribute(wraptext("abc"), wrap([Template(wraptext("de")),
- Text("f")]), False),
+ Text("f")]), None),
Attribute(wraptext("ghi"), wrap([Text("j"),
Template(wraptext("k")),
- Template(wraptext("l"))]), False),
+ Template(wraptext("l"))]), None),
Attribute(wraptext("mno"), wrap([Template(wraptext("p")),
Text(" "), Wikilink(wraptext("q")), Text(" "),
- Template(wraptext("r"))]), True, " \n ", " ",
+ Template(wraptext("r"))]), "'", " \n ", " ",
" ")])])),
# "''italic text''"
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 0eae713..7577cce 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -34,9 +34,9 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
agennv = lambda name: Attribute(wraptext(name))
-agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False)
-agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c)
-agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c)
+agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None)
+agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c)
+agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c)
class TestTag(TreeEqualityTestCase):
"""Test cases for the Tag node."""
@@ -276,28 +276,33 @@ class TestTag(TreeEqualityTestCase):
"""test Tag.add()"""
node = Tag(wraptext("ref"), wraptext("cite"))
node.add("name", "value")
- node.add("name", "value", quoted=False)
+ node.add("name", "value", quotes=None)
+ node.add("name", "value", quotes="'")
node.add("name")
node.add(1, False)
node.add("style", "{{foobar}}")
- node.add("name", "value", True, "\n", " ", " ")
+ node.add("name", "value", '"', "\n", " ", " ")
attr1 = ' name="value"'
attr2 = " name=value"
- attr3 = " name"
- attr4 = ' 1="False"'
- attr5 = ' style="{{foobar}}"'
- attr6 = '\nname = "value"'
+ attr3 = " name='value'"
+ attr4 = " name"
+ attr5 = ' 1="False"'
+ attr6 = ' style="{{foobar}}"'
+ attr7 = '\nname = "value"'
self.assertEqual(attr1, node.attributes[0])
self.assertEqual(attr2, node.attributes[1])
self.assertEqual(attr3, node.attributes[2])
self.assertEqual(attr4, node.attributes[3])
self.assertEqual(attr5, node.attributes[4])
self.assertEqual(attr6, node.attributes[5])
- self.assertEqual(attr6, node.get("name"))
+ self.assertEqual(attr7, node.attributes[6])
+ self.assertEqual(attr7, node.get("name"))
self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]),
- node.attributes[4].value)
+ node.attributes[5].value)
self.assertEqual("".join(("[cite]")), node)
+ attr6, attr7, ">cite")), node)
+ self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar")
+ self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None)
def test_remove(self):
"""test Tag.remove()"""
diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest
index 5e1a409..372a367 100644
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -43,7 +43,7 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t
name: rich_tags
label: a HTML tag with tons of other things in it
input: "{{dubious claim}}[[[Source]]]"
-output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]
---
diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest
index 26e569b..f979329 100644
--- a/tests/tokenizer/tags.mwtest
+++ b/tests/tokenizer/tags.mwtest
@@ -57,7 +57,14 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
name: attribute_quoted
label: a tag with a single quoted attribute
input: ""
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+
+---
+
+name: attribute_single_quoted
+label: a tag with a single singly-quoted attribute
+input: ""
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
---
@@ -71,7 +78,7 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
name: attribute_quoted_hyphen
label: a tag with a single quoted attribute, containing a hyphen
input: ""
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
---
@@ -92,21 +99,21 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
name: attribute_selfclosing_value_quoted
label: a self-closing tag with a single quoted attribute
input: ""
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(padding="")]
---
name: nested_tag
label: a tag nested within the attributes of another
input: "[foo>citation]"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
---
name: nested_tag_quoted
label: a tag nested within the attributes of another, quoted
input: "[foo">citation]"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
---
@@ -120,7 +127,7 @@ output: [Text(text="[/>>citation]")]
name: nested_troll_tag_quoted
label: a bogus tag that appears to be nested within the attributes of another, quoted
input: "[citation]"
-output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text=" >/>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text=" >/>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
---
@@ -222,6 +229,27 @@ output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_befor
---
+name: quotes_in_quotes
+label: singly-quoted text inside a doubly-quoted attribute
+input: "stuff"
+output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar 'baz buzz' biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]
+
+---
+
+name: quotes_in_quotes_2
+label: doubly-quoted text inside a singly-quoted attribute
+input: "stuff"
+output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]
+
+---
+
+name: quotes_in_quotes_3
+label: doubly-quoted text inside a singly-quoted attribute, with backslashes
+input: "stuff"
+output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\\\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]
+
+---
+
name: incomplete_lbracket
label: incomplete tags: just a left bracket
input: "<"
@@ -407,28 +435,28 @@ output: [Text(text="junk <>>")]
name: backslash_premature_before
label: a backslash before a quote before a space
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
name: backslash_premature_after
label: a backslash before a quote after a space
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
name: backslash_premature_middle
label: a backslash before a quote in the middle of a word
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
name: backslash_adjacent
label: escaped quotes next to unescaped quotes
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
@@ -442,21 +470,21 @@ output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before
name: backslash_double
label: two adjacent backslashes, which do *not* affect the quote
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
name: backslash_triple
label: three adjacent backslashes, which do *not* affect the quote
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
name: backslash_unaffecting
label: backslashes near quotes, but not immediately adjacent, thus having no effect
input: "blah"
-output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
+output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
---
@@ -477,7 +505,7 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(t
name: unparsable_attributed
label: a tag that should not be put through the normal parser; parsed attributes
input: "{{t1}}{{t2}}{{t3}}"
-output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]
+output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]
---
@@ -575,7 +603,7 @@ output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseS
name: single_only_close_attribute
label: a tag that can only be single; presented as a close tag with an attribute
input: ""
-output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)]
+output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)]
---