diff --git a/CHANGELOG b/CHANGELOG index 1200575..f7dcb8a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,10 @@ v0.4 (unreleased): option, RECURSE_OTHERS, which recurses over all children except instances of 'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)` returns all un-nested templates). +- The parser now understands HTML tag attributes quoted with single quotes. + When setting a tag attribute's value, quotes will be added if necessary. As + part of this, Attribute's 'quoted' attribute has been changed to 'quotes', + and is now either a string or None. - Calling Template.remove() with a Parameter object that is not part of the template now raises ValueError instead of doing nothing. - Parameters with non-integer keys can no longer be created with diff --git a/docs/changelog.rst b/docs/changelog.rst index ba26722..3bc4ce7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -18,6 +18,11 @@ Unreleased which recurses over all children except instances of *forcetype* (for example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested templates). +- The parser now understands HTML tag attributes quoted with single quotes. + When setting a tag attribute's value, quotes will be added if necessary. As + part of this, :py:class:`.Attribute`\ 's :py:attr:`~.Attribute.quoted` + attribute has been changed to :py:attr:`~.Attribute.quotes`, and is now + either a string or ``None``. - Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that is not part of the template now raises :py:exc:`ValueError` instead of doing nothing. diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 4b7c668..6256138 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -36,12 +36,14 @@ class Attribute(StringMixIn): whose value is ``"foo"``. """ - def __init__(self, name, value=None, quoted=True, pad_first=" ", + def __init__(self, name, value=None, quotes='"', pad_first=" ", pad_before_eq="", pad_after_eq=""): super(Attribute, self).__init__() + if not quotes and self._value_needs_quotes(value): + raise ValueError("given value {0!r} requires quotes".format(value)) self._name = name self._value = value - self._quoted = quoted + self._quotes = quotes self._pad_first = pad_first self._pad_before_eq = pad_before_eq self._pad_after_eq = pad_after_eq @@ -50,11 +52,18 @@ class Attribute(StringMixIn): result = self.pad_first + str(self.name) + self.pad_before_eq if self.value is not None: result += "=" + self.pad_after_eq - if self.quoted: - return result + '"' + str(self.value) + '"' + if self.quotes: + return result + self.quotes + str(self.value) + self.quotes return result + str(self.value) return result + @staticmethod + def _value_needs_quotes(val): + """Return the preferred quotes for the given value, or None.""" + if val and any(char.isspace() for char in val): + return ('"' in val and "'" in val) or ("'" if '"' in val else '"') + return None + def _set_padding(self, attr, value): """Setter for the value of a padding attribute.""" if not value: @@ -65,6 +74,14 @@ class Attribute(StringMixIn): raise ValueError("padding must be entirely whitespace") setattr(self, attr, value) + @staticmethod + def coerce_quotes(quotes): + """Coerce a quote type into an acceptable value, or raise an error.""" + orig, quotes = quotes, str(quotes) if quotes else None + if quotes not in [None, '"', "'"]: + raise ValueError("{0!r} is not a valid quote type".format(orig)) + return quotes + @property def name(self): """The name of the attribute as a :py:class:`~.Wikicode` object.""" @@ -76,9 +93,9 @@ class Attribute(StringMixIn): return self._value @property - def quoted(self): - """Whether the attribute's value is quoted with double quotes.""" - return self._quoted + def quotes(self): + """How to enclose the attribute value. ``"``, ``'``, or ``None``.""" + return self._quotes @property def pad_first(self): @@ -101,11 +118,21 @@ class Attribute(StringMixIn): @value.setter def value(self, newval): - self._value = None if newval is None else parse_anything(newval) - - @quoted.setter - def quoted(self, value): - self._quoted = bool(value) + if newval is None: + self._value = None + else: + code = parse_anything(newval) + quotes = self._value_needs_quotes(code) + if quotes in ['"', "'"] or (quotes is True and not self.quotes): + self._quotes = quotes + self._value = code + + @quotes.setter + def quotes(self, value): + value = self.coerce_quotes(value) + if not value and self._value_needs_quotes(self.value): + raise ValueError("attribute value requires quotes") + self._quotes = value @pad_first.setter def pad_first(self, value): diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index f283d46..1b8efb8 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -236,21 +236,24 @@ class Tag(Node): return attr raise ValueError(name) - def add(self, name, value=None, quoted=True, pad_first=" ", + def add(self, name, value=None, quotes='"', pad_first=" ", pad_before_eq="", pad_after_eq=""): """Add an attribute with the given *name* and *value*. *name* and *value* can be anything parsable by :py:func:`.utils.parse_anything`; *value* can be omitted if the - attribute is valueless. *quoted* is a bool telling whether to wrap the - *value* in double quotes (this is recommended). *pad_first*, - *pad_before_eq*, and *pad_after_eq* are whitespace used as padding - before the name, before the equal sign (or after the name if no value), - and after the equal sign (ignored if no value), respectively. + attribute is valueless. If *quotes* is not ``None``, it should be a + string (either ``"`` or ``'``) that *value* will be wrapped in (this is + recommended). ``None`` is only legal if *value* contains no spacing. + + *pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as + padding before the name, before the equal sign (or after the name if no + value), and after the equal sign (ignored if no value), respectively. """ if value is not None: value = parse_anything(value) - attr = Attribute(parse_anything(name), value, quoted) + quotes = Attribute.coerce_quotes(quotes) + attr = Attribute(parse_anything(name), value, quotes) attr.pad_first = pad_first attr.pad_before_eq = pad_before_eq attr.pad_after_eq = pad_after_eq diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 559bd54..c9a930b 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -193,7 +193,7 @@ class Builder(object): def _handle_attribute(self, start): """Handle a case where a tag attribute is at the head of the tokens.""" - name, quoted = None, False + name = quotes = None self._push() while self._tokens: token = self._tokens.pop() @@ -201,7 +201,7 @@ class Builder(object): name = self._pop() self._push() elif isinstance(token, tokens.TagAttrQuote): - quoted = True + quotes = token.char elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, tokens.TagCloseSelfclose)): self._tokens.append(token) @@ -209,7 +209,7 @@ class Builder(object): value = self._pop() else: name, value = self._pop(), None - return Attribute(name, value, quoted, start.pad_first, + return Attribute(name, value, quotes, start.pad_first, start.pad_before_eq, start.pad_after_eq) else: self._write(self._handle_token(token)) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 963e7d7..4c6414e 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -173,7 +173,7 @@ static TagData* TagData_new(void) ALLOC_BUFFER(self->pad_first) ALLOC_BUFFER(self->pad_before_eq) ALLOC_BUFFER(self->pad_after_eq) - self->reset = 0; + self->quoter = self->reset = 0; return self; } @@ -1566,10 +1566,18 @@ static int Tokenizer_parse_comment(Tokenizer* self) */ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) { - PyObject *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq; + PyObject *tokens, *kwargs, *tmp, *pad_first, *pad_before_eq, *pad_after_eq; if (data->context & TAG_QUOTED) { - if (Tokenizer_emit_first(self, TagAttrQuote)) + kwargs = PyDict_New(); + if (!kwargs) + return -1; + tmp = PyUnicode_FromUnicode(&data->quoter, 1); + if (!tmp) + return -1; + PyDict_SetItemString(kwargs, "char", tmp); + Py_DECREF(tmp); + if (Tokenizer_emit_first_kwargs(self, TagAttrQuote, kwargs)) return -1; tokens = Tokenizer_pop(self); if (!tokens) @@ -1721,16 +1729,17 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) Tokenizer_READ_BACKWARDS(self, 2) != '\\'); if (data->context & TAG_NOTE_QUOTE) { data->context ^= TAG_NOTE_QUOTE; - if (chunk == '"' && !escaped) { + if ((chunk == '"' || chunk == '\'') && !escaped) { data->context |= TAG_QUOTED; + data->quoter = chunk; + data->reset = self->head; if (Tokenizer_push(self, self->topstack->context)) return -1; - data->reset = self->head; return 0; } } else if (data->context & TAG_QUOTED) { - if (chunk == '"' && !escaped) { + if (chunk == data->quoter && !escaped) { data->context |= TAG_NOTE_SPACE; return 0; } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 4312e2f..dde6464 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -206,6 +206,7 @@ typedef struct { struct Textbuffer* pad_first; struct Textbuffer* pad_before_eq; struct Textbuffer* pad_after_eq; + Py_UNICODE quoter; Py_ssize_t reset; } TagData; diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 6430f0f..4422b5c 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -53,6 +53,7 @@ class _TagOpenData(object): def __init__(self): self.context = self.CX_NAME self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""} + self.quoter = None self.reset = 0 @@ -66,7 +67,7 @@ class Tokenizer(object): MAX_DEPTH = 40 MAX_CYCLES = 100000 regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) - tag_splitter = re.compile(r"([\s\"\\]+)") + tag_splitter = re.compile(r"([\s\"\'\\]+)") def __init__(self): self._text = None @@ -612,7 +613,7 @@ class Tokenizer(object): def _push_tag_buffer(self, data): """Write a pending tag attribute from *data* to the stack.""" if data.context & data.CX_QUOTED: - self._emit_first(tokens.TagAttrQuote()) + self._emit_first(tokens.TagAttrQuote(char=data.quoter)) self._emit_all(self._pop()) buf = data.padding_buffer self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], @@ -689,13 +690,14 @@ class Tokenizer(object): escaped = self._read(-1) == "\\" and self._read(-2) != "\\" if data.context & data.CX_NOTE_QUOTE: data.context ^= data.CX_NOTE_QUOTE - if chunk == '"' and not escaped: + if chunk in "'\"" and not escaped: data.context |= data.CX_QUOTED - self._push(self._context) + data.quoter = chunk data.reset = self._head + self._push(self._context) continue elif data.context & data.CX_QUOTED: - if chunk == '"' and not escaped: + if chunk == data.quoter and not escaped: data.context |= data.CX_NOTE_SPACE continue self._handle_tag_text(chunk) diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index c7cc3ef..e567731 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -100,7 +100,7 @@ CommentEnd = make("CommentEnd") # --> TagOpenOpen = make("TagOpenOpen") # < TagAttrStart = make("TagAttrStart") TagAttrEquals = make("TagAttrEquals") # = -TagAttrQuote = make("TagAttrQuote") # " +TagAttrQuote = make("TagAttrQuote") # ", ' TagCloseOpen = make("TagCloseOpen") # > TagCloseSelfclose = make("TagCloseSelfclose") # /> TagOpenClose = make("TagOpenClose") # [[Source]] + # mno = '{{p}} [[q]] {{r}}'>[[Source]] ([tokens.TagOpenOpen(), tokens.Text(text="ref"), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), @@ -308,7 +308,7 @@ class TestBuilder(TreeEqualityTestCase): tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), tokens.Text(text="foo"), tokens.TagAttrEquals(), - tokens.TagAttrQuote(), tokens.Text(text="bar "), + tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "), tokens.TemplateOpen(), tokens.Text(text="baz"), tokens.TemplateClose(), tokens.TagAttrStart(pad_first=" ", pad_before_eq="", @@ -326,7 +326,7 @@ class TestBuilder(TreeEqualityTestCase): tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), tokens.Text(text="mno"), tokens.TagAttrEquals(), - tokens.TagAttrQuote(), tokens.TemplateOpen(), + tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(), tokens.Text(text="p"), tokens.TemplateClose(), tokens.Text(text=" "), tokens.WikilinkOpen(), tokens.Text(text="q"), tokens.WikilinkClose(), @@ -338,17 +338,17 @@ class TestBuilder(TreeEqualityTestCase): tokens.TagCloseClose()], wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ Attribute(wraptext("name"), - wrap([Template(wraptext("abc"))]), False), + wrap([Template(wraptext("abc"))]), None), Attribute(wraptext("foo"), wrap([Text("bar "), Template(wraptext("baz"))]), pad_first=" "), Attribute(wraptext("abc"), wrap([Template(wraptext("de")), - Text("f")]), False), + Text("f")]), None), Attribute(wraptext("ghi"), wrap([Text("j"), Template(wraptext("k")), - Template(wraptext("l"))]), False), + Template(wraptext("l"))]), None), Attribute(wraptext("mno"), wrap([Template(wraptext("p")), Text(" "), Wikilink(wraptext("q")), Text(" "), - Template(wraptext("r"))]), True, " \n ", " ", + Template(wraptext("r"))]), "'", " \n ", " ", " ")])])), # "''italic text''" diff --git a/tests/test_tag.py b/tests/test_tag.py index 0eae713..7577cce 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -34,9 +34,9 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) agennv = lambda name: Attribute(wraptext(name)) -agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) -agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c) -agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c) +agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) +agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) +agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) class TestTag(TreeEqualityTestCase): """Test cases for the Tag node.""" @@ -276,28 +276,33 @@ class TestTag(TreeEqualityTestCase): """test Tag.add()""" node = Tag(wraptext("ref"), wraptext("cite")) node.add("name", "value") - node.add("name", "value", quoted=False) + node.add("name", "value", quotes=None) + node.add("name", "value", quotes="'") node.add("name") node.add(1, False) node.add("style", "{{foobar}}") - node.add("name", "value", True, "\n", " ", " ") + node.add("name", "value", '"', "\n", " ", " ") attr1 = ' name="value"' attr2 = " name=value" - attr3 = " name" - attr4 = ' 1="False"' - attr5 = ' style="{{foobar}}"' - attr6 = '\nname = "value"' + attr3 = " name='value'" + attr4 = " name" + attr5 = ' 1="False"' + attr6 = ' style="{{foobar}}"' + attr7 = '\nname = "value"' self.assertEqual(attr1, node.attributes[0]) self.assertEqual(attr2, node.attributes[1]) self.assertEqual(attr3, node.attributes[2]) self.assertEqual(attr4, node.attributes[3]) self.assertEqual(attr5, node.attributes[4]) self.assertEqual(attr6, node.attributes[5]) - self.assertEqual(attr6, node.get("name")) + self.assertEqual(attr7, node.attributes[6]) + self.assertEqual(attr7, node.get("name")) self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]), - node.attributes[4].value) + node.attributes[5].value) self.assertEqual("".join(("cite")), node) + attr6, attr7, ">cite")), node) + self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar") + self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None) def test_remove(self): """test Tag.remove()""" diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 5e1a409..372a367 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -43,7 +43,7 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t name: rich_tags label: a HTML tag with tons of other things in it input: "{{dubious claim}}[[Source]]" -output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] +output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- diff --git a/tests/tokenizer/tags.mwtest b/tests/tokenizer/tags.mwtest index 26e569b..f979329 100644 --- a/tests/tokenizer/tags.mwtest +++ b/tests/tokenizer/tags.mwtest @@ -57,7 +57,14 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before name: attribute_quoted label: a tag with a single quoted attribute input: "" -output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] + +--- + +name: attribute_single_quoted +label: a tag with a single singly-quoted attribute +input: "" +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- @@ -71,7 +78,7 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before name: attribute_quoted_hyphen label: a tag with a single quoted attribute, containing a hyphen input: "" -output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- @@ -92,21 +99,21 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before name: attribute_selfclosing_value_quoted label: a self-closing tag with a single quoted attribute input: "" -output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")] +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(padding="")] --- name: nested_tag label: a tag nested within the attributes of another input: "foo>citation" -output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- name: nested_tag_quoted label: a tag nested within the attributes of another, quoted input: "foo">citation" -output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- @@ -120,7 +127,7 @@ output: [Text(text=">citation")] name: nested_troll_tag_quoted label: a bogus tag that appears to be nested within the attributes of another, quoted input: "citation" -output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text=""), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text=""), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] --- @@ -222,6 +229,27 @@ output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_befor --- +name: quotes_in_quotes +label: singly-quoted text inside a doubly-quoted attribute +input: "stuff" +output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar 'baz buzz' biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] + +--- + +name: quotes_in_quotes_2 +label: doubly-quoted text inside a singly-quoted attribute +input: "stuff" +output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] + +--- + +name: quotes_in_quotes_3 +label: doubly-quoted text inside a singly-quoted attribute, with backslashes +input: "stuff" +output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\\\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] + +--- + name: incomplete_lbracket label: incomplete tags: just a left bracket input: "<" @@ -407,28 +435,28 @@ output: [Text(text="junk <>")] name: backslash_premature_before label: a backslash before a quote before a space input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- name: backslash_premature_after label: a backslash before a quote after a space input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- name: backslash_premature_middle label: a backslash before a quote in the middle of a word input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- name: backslash_adjacent label: escaped quotes next to unescaped quotes input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- @@ -442,21 +470,21 @@ output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before name: backslash_double label: two adjacent backslashes, which do *not* affect the quote input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- name: backslash_triple label: three adjacent backslashes, which do *not* affect the quote input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- name: backslash_unaffecting label: backslashes near quotes, but not immediately adjacent, thus having no effect input: "blah" -output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] +output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] --- @@ -477,7 +505,7 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(t name: unparsable_attributed label: a tag that should not be put through the normal parser; parsed attributes input: "{{t1}}{{t2}}{{t3}}" -output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] +output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] --- @@ -575,7 +603,7 @@ output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseS name: single_only_close_attribute label: a tag that can only be single; presented as a close tag with an attribute input: "
" -output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)] +output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)] ---