Browse Source

Support attributes quoted with '; add required quotes in value setter.

tags/v0.4
Ben Kurtovic 10 years ago
parent
commit
b997e4cd71
15 changed files with 189 additions and 85 deletions
  1. +4
    -0
      CHANGELOG
  2. +5
    -0
      docs/changelog.rst
  3. +39
    -12
      mwparserfromhell/nodes/extras/attribute.py
  4. +10
    -7
      mwparserfromhell/nodes/tag.py
  5. +3
    -3
      mwparserfromhell/parser/builder.py
  6. +15
    -6
      mwparserfromhell/parser/tokenizer.c
  7. +1
    -0
      mwparserfromhell/parser/tokenizer.h
  8. +7
    -5
      mwparserfromhell/parser/tokenizer.py
  9. +1
    -1
      mwparserfromhell/parser/tokens.py
  10. +1
    -1
      tests/_test_tree_equality.py
  11. +34
    -14
      tests/test_attribute.py
  12. +8
    -8
      tests/test_builder.py
  13. +17
    -12
      tests/test_tag.py
  14. +1
    -1
      tests/tokenizer/integration.mwtest
  15. +43
    -15
      tests/tokenizer/tags.mwtest

+ 4
- 0
CHANGELOG View File

@@ -10,6 +10,10 @@ v0.4 (unreleased):
option, RECURSE_OTHERS, which recurses over all children except instances of option, RECURSE_OTHERS, which recurses over all children except instances of
'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)` 'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)`
returns all un-nested templates). returns all un-nested templates).
- The parser now understands HTML tag attributes quoted with single quotes.
When setting a tag attribute's value, quotes will be added if necessary. As
part of this, Attribute's 'quoted' attribute has been changed to 'quotes',
and is now either a string or None.
- Calling Template.remove() with a Parameter object that is not part of the - Calling Template.remove() with a Parameter object that is not part of the
template now raises ValueError instead of doing nothing. template now raises ValueError instead of doing nothing.
- Parameters with non-integer keys can no longer be created with - Parameters with non-integer keys can no longer be created with


+ 5
- 0
docs/changelog.rst View File

@@ -18,6 +18,11 @@ Unreleased
which recurses over all children except instances of *forcetype* (for which recurses over all children except instances of *forcetype* (for
example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested example, ``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested
templates). templates).
- The parser now understands HTML tag attributes quoted with single quotes.
When setting a tag attribute's value, quotes will be added if necessary. As
part of this, :py:class:`.Attribute`\ 's :py:attr:`~.Attribute.quoted`
attribute has been changed to :py:attr:`~.Attribute.quotes`, and is now
either a string or ``None``.
- Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that - Calling :py:meth:`.Template.remove` with a :py:class:`.Parameter` object that
is not part of the template now raises :py:exc:`ValueError` instead of doing is not part of the template now raises :py:exc:`ValueError` instead of doing
nothing. nothing.


+ 39
- 12
mwparserfromhell/nodes/extras/attribute.py View File

@@ -36,12 +36,14 @@ class Attribute(StringMixIn):
whose value is ``"foo"``. whose value is ``"foo"``.
""" """


def __init__(self, name, value=None, quoted=True, pad_first=" ",
def __init__(self, name, value=None, quotes='"', pad_first=" ",
pad_before_eq="", pad_after_eq=""): pad_before_eq="", pad_after_eq=""):
super(Attribute, self).__init__() super(Attribute, self).__init__()
if not quotes and self._value_needs_quotes(value):
raise ValueError("given value {0!r} requires quotes".format(value))
self._name = name self._name = name
self._value = value self._value = value
self._quoted = quoted
self._quotes = quotes
self._pad_first = pad_first self._pad_first = pad_first
self._pad_before_eq = pad_before_eq self._pad_before_eq = pad_before_eq
self._pad_after_eq = pad_after_eq self._pad_after_eq = pad_after_eq
@@ -50,11 +52,18 @@ class Attribute(StringMixIn):
result = self.pad_first + str(self.name) + self.pad_before_eq result = self.pad_first + str(self.name) + self.pad_before_eq
if self.value is not None: if self.value is not None:
result += "=" + self.pad_after_eq result += "=" + self.pad_after_eq
if self.quoted:
return result + '"' + str(self.value) + '"'
if self.quotes:
return result + self.quotes + str(self.value) + self.quotes
return result + str(self.value) return result + str(self.value)
return result return result


@staticmethod
def _value_needs_quotes(val):
"""Return the preferred quotes for the given value, or None."""
if val and any(char.isspace() for char in val):
return ('"' in val and "'" in val) or ("'" if '"' in val else '"')
return None

def _set_padding(self, attr, value): def _set_padding(self, attr, value):
"""Setter for the value of a padding attribute.""" """Setter for the value of a padding attribute."""
if not value: if not value:
@@ -65,6 +74,14 @@ class Attribute(StringMixIn):
raise ValueError("padding must be entirely whitespace") raise ValueError("padding must be entirely whitespace")
setattr(self, attr, value) setattr(self, attr, value)


@staticmethod
def coerce_quotes(quotes):
"""Coerce a quote type into an acceptable value, or raise an error."""
orig, quotes = quotes, str(quotes) if quotes else None
if quotes not in [None, '"', "'"]:
raise ValueError("{0!r} is not a valid quote type".format(orig))
return quotes

@property @property
def name(self): def name(self):
"""The name of the attribute as a :py:class:`~.Wikicode` object.""" """The name of the attribute as a :py:class:`~.Wikicode` object."""
@@ -76,9 +93,9 @@ class Attribute(StringMixIn):
return self._value return self._value


@property @property
def quoted(self):
"""Whether the attribute's value is quoted with double quotes."""
return self._quoted
def quotes(self):
"""How to enclose the attribute value. ``"``, ``'``, or ``None``."""
return self._quotes


@property @property
def pad_first(self): def pad_first(self):
@@ -101,11 +118,21 @@ class Attribute(StringMixIn):


@value.setter @value.setter
def value(self, newval): def value(self, newval):
self._value = None if newval is None else parse_anything(newval)

@quoted.setter
def quoted(self, value):
self._quoted = bool(value)
if newval is None:
self._value = None
else:
code = parse_anything(newval)
quotes = self._value_needs_quotes(code)
if quotes in ['"', "'"] or (quotes is True and not self.quotes):
self._quotes = quotes
self._value = code

@quotes.setter
def quotes(self, value):
value = self.coerce_quotes(value)
if not value and self._value_needs_quotes(self.value):
raise ValueError("attribute value requires quotes")
self._quotes = value


@pad_first.setter @pad_first.setter
def pad_first(self, value): def pad_first(self, value):


+ 10
- 7
mwparserfromhell/nodes/tag.py View File

@@ -236,21 +236,24 @@ class Tag(Node):
return attr return attr
raise ValueError(name) raise ValueError(name)


def add(self, name, value=None, quoted=True, pad_first=" ",
def add(self, name, value=None, quotes='"', pad_first=" ",
pad_before_eq="", pad_after_eq=""): pad_before_eq="", pad_after_eq=""):
"""Add an attribute with the given *name* and *value*. """Add an attribute with the given *name* and *value*.


*name* and *value* can be anything parsable by *name* and *value* can be anything parsable by
:py:func:`.utils.parse_anything`; *value* can be omitted if the :py:func:`.utils.parse_anything`; *value* can be omitted if the
attribute is valueless. *quoted* is a bool telling whether to wrap the
*value* in double quotes (this is recommended). *pad_first*,
*pad_before_eq*, and *pad_after_eq* are whitespace used as padding
before the name, before the equal sign (or after the name if no value),
and after the equal sign (ignored if no value), respectively.
attribute is valueless. If *quotes* is not ``None``, it should be a
string (either ``"`` or ``'``) that *value* will be wrapped in (this is
recommended). ``None`` is only legal if *value* contains no spacing.

*pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as
padding before the name, before the equal sign (or after the name if no
value), and after the equal sign (ignored if no value), respectively.
""" """
if value is not None: if value is not None:
value = parse_anything(value) value = parse_anything(value)
attr = Attribute(parse_anything(name), value, quoted)
quotes = Attribute.coerce_quotes(quotes)
attr = Attribute(parse_anything(name), value, quotes)
attr.pad_first = pad_first attr.pad_first = pad_first
attr.pad_before_eq = pad_before_eq attr.pad_before_eq = pad_before_eq
attr.pad_after_eq = pad_after_eq attr.pad_after_eq = pad_after_eq


+ 3
- 3
mwparserfromhell/parser/builder.py View File

@@ -193,7 +193,7 @@ class Builder(object):


def _handle_attribute(self, start): def _handle_attribute(self, start):
"""Handle a case where a tag attribute is at the head of the tokens.""" """Handle a case where a tag attribute is at the head of the tokens."""
name, quoted = None, False
name = quotes = None
self._push() self._push()
while self._tokens: while self._tokens:
token = self._tokens.pop() token = self._tokens.pop()
@@ -201,7 +201,7 @@ class Builder(object):
name = self._pop() name = self._pop()
self._push() self._push()
elif isinstance(token, tokens.TagAttrQuote): elif isinstance(token, tokens.TagAttrQuote):
quoted = True
quotes = token.char
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen,
tokens.TagCloseSelfclose)): tokens.TagCloseSelfclose)):
self._tokens.append(token) self._tokens.append(token)
@@ -209,7 +209,7 @@ class Builder(object):
value = self._pop() value = self._pop()
else: else:
name, value = self._pop(), None name, value = self._pop(), None
return Attribute(name, value, quoted, start.pad_first,
return Attribute(name, value, quotes, start.pad_first,
start.pad_before_eq, start.pad_after_eq) start.pad_before_eq, start.pad_after_eq)
else: else:
self._write(self._handle_token(token)) self._write(self._handle_token(token))


+ 15
- 6
mwparserfromhell/parser/tokenizer.c View File

@@ -173,7 +173,7 @@ static TagData* TagData_new(void)
ALLOC_BUFFER(self->pad_first) ALLOC_BUFFER(self->pad_first)
ALLOC_BUFFER(self->pad_before_eq) ALLOC_BUFFER(self->pad_before_eq)
ALLOC_BUFFER(self->pad_after_eq) ALLOC_BUFFER(self->pad_after_eq)
self->reset = 0;
self->quoter = self->reset = 0;
return self; return self;
} }


@@ -1566,10 +1566,18 @@ static int Tokenizer_parse_comment(Tokenizer* self)
*/ */
static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data)
{ {
PyObject *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq;
PyObject *tokens, *kwargs, *tmp, *pad_first, *pad_before_eq, *pad_after_eq;


if (data->context & TAG_QUOTED) { if (data->context & TAG_QUOTED) {
if (Tokenizer_emit_first(self, TagAttrQuote))
kwargs = PyDict_New();
if (!kwargs)
return -1;
tmp = PyUnicode_FromUnicode(&data->quoter, 1);
if (!tmp)
return -1;
PyDict_SetItemString(kwargs, "char", tmp);
Py_DECREF(tmp);
if (Tokenizer_emit_first_kwargs(self, TagAttrQuote, kwargs))
return -1; return -1;
tokens = Tokenizer_pop(self); tokens = Tokenizer_pop(self);
if (!tokens) if (!tokens)
@@ -1721,16 +1729,17 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk)
Tokenizer_READ_BACKWARDS(self, 2) != '\\'); Tokenizer_READ_BACKWARDS(self, 2) != '\\');
if (data->context & TAG_NOTE_QUOTE) { if (data->context & TAG_NOTE_QUOTE) {
data->context ^= TAG_NOTE_QUOTE; data->context ^= TAG_NOTE_QUOTE;
if (chunk == '"' && !escaped) {
if ((chunk == '"' || chunk == '\'') && !escaped) {
data->context |= TAG_QUOTED; data->context |= TAG_QUOTED;
data->quoter = chunk;
data->reset = self->head;
if (Tokenizer_push(self, self->topstack->context)) if (Tokenizer_push(self, self->topstack->context))
return -1; return -1;
data->reset = self->head;
return 0; return 0;
} }
} }
else if (data->context & TAG_QUOTED) { else if (data->context & TAG_QUOTED) {
if (chunk == '"' && !escaped) {
if (chunk == data->quoter && !escaped) {
data->context |= TAG_NOTE_SPACE; data->context |= TAG_NOTE_SPACE;
return 0; return 0;
} }


+ 1
- 0
mwparserfromhell/parser/tokenizer.h View File

@@ -206,6 +206,7 @@ typedef struct {
struct Textbuffer* pad_first; struct Textbuffer* pad_first;
struct Textbuffer* pad_before_eq; struct Textbuffer* pad_before_eq;
struct Textbuffer* pad_after_eq; struct Textbuffer* pad_after_eq;
Py_UNICODE quoter;
Py_ssize_t reset; Py_ssize_t reset;
} TagData; } TagData;




+ 7
- 5
mwparserfromhell/parser/tokenizer.py View File

@@ -53,6 +53,7 @@ class _TagOpenData(object):
def __init__(self): def __init__(self):
self.context = self.CX_NAME self.context = self.CX_NAME
self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""} self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""}
self.quoter = None
self.reset = 0 self.reset = 0




@@ -66,7 +67,7 @@ class Tokenizer(object):
MAX_DEPTH = 40 MAX_DEPTH = 40
MAX_CYCLES = 100000 MAX_CYCLES = 100000
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
tag_splitter = re.compile(r"([\s\"\\]+)")
tag_splitter = re.compile(r"([\s\"\'\\]+)")


def __init__(self): def __init__(self):
self._text = None self._text = None
@@ -612,7 +613,7 @@ class Tokenizer(object):
def _push_tag_buffer(self, data): def _push_tag_buffer(self, data):
"""Write a pending tag attribute from *data* to the stack.""" """Write a pending tag attribute from *data* to the stack."""
if data.context & data.CX_QUOTED: if data.context & data.CX_QUOTED:
self._emit_first(tokens.TagAttrQuote())
self._emit_first(tokens.TagAttrQuote(char=data.quoter))
self._emit_all(self._pop()) self._emit_all(self._pop())
buf = data.padding_buffer buf = data.padding_buffer
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], self._emit_first(tokens.TagAttrStart(pad_first=buf["first"],
@@ -689,13 +690,14 @@ class Tokenizer(object):
escaped = self._read(-1) == "\\" and self._read(-2) != "\\" escaped = self._read(-1) == "\\" and self._read(-2) != "\\"
if data.context & data.CX_NOTE_QUOTE: if data.context & data.CX_NOTE_QUOTE:
data.context ^= data.CX_NOTE_QUOTE data.context ^= data.CX_NOTE_QUOTE
if chunk == '"' and not escaped:
if chunk in "'\"" and not escaped:
data.context |= data.CX_QUOTED data.context |= data.CX_QUOTED
self._push(self._context)
data.quoter = chunk
data.reset = self._head data.reset = self._head
self._push(self._context)
continue continue
elif data.context & data.CX_QUOTED: elif data.context & data.CX_QUOTED:
if chunk == '"' and not escaped:
if chunk == data.quoter and not escaped:
data.context |= data.CX_NOTE_SPACE data.context |= data.CX_NOTE_SPACE
continue continue
self._handle_tag_text(chunk) self._handle_tag_text(chunk)


+ 1
- 1
mwparserfromhell/parser/tokens.py View File

@@ -100,7 +100,7 @@ CommentEnd = make("CommentEnd") # -->
TagOpenOpen = make("TagOpenOpen") # < TagOpenOpen = make("TagOpenOpen") # <
TagAttrStart = make("TagAttrStart") TagAttrStart = make("TagAttrStart")
TagAttrEquals = make("TagAttrEquals") # = TagAttrEquals = make("TagAttrEquals") # =
TagAttrQuote = make("TagAttrQuote") # "
TagAttrQuote = make("TagAttrQuote") # ", '
TagCloseOpen = make("TagCloseOpen") # > TagCloseOpen = make("TagCloseOpen") # >
TagCloseSelfclose = make("TagCloseSelfclose") # /> TagCloseSelfclose = make("TagCloseSelfclose") # />
TagOpenClose = make("TagOpenClose") # </ TagOpenClose = make("TagOpenClose") # </


+ 1
- 1
tests/_test_tree_equality.py View File

@@ -98,7 +98,7 @@ class TreeEqualityTestCase(TestCase):
self.assertWikicodeEqual(exp_attr.name, act_attr.name) self.assertWikicodeEqual(exp_attr.name, act_attr.name)
if exp_attr.value is not None: if exp_attr.value is not None:
self.assertWikicodeEqual(exp_attr.value, act_attr.value) self.assertWikicodeEqual(exp_attr.value, act_attr.value)
self.assertIs(exp_attr.quoted, act_attr.quoted)
self.assertEqual(exp_attr.quotes, act_attr.quotes)
self.assertEqual(exp_attr.pad_first, act_attr.pad_first) self.assertEqual(exp_attr.pad_first, act_attr.pad_first)
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq)
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq)


+ 34
- 14
tests/test_attribute.py View File

@@ -42,12 +42,14 @@ class TestAttribute(TreeEqualityTestCase):
self.assertEqual(" foo", str(node)) self.assertEqual(" foo", str(node))
node2 = Attribute(wraptext("foo"), wraptext("bar")) node2 = Attribute(wraptext("foo"), wraptext("bar"))
self.assertEqual(' foo="bar"', str(node2)) self.assertEqual(' foo="bar"', str(node2))
node3 = Attribute(wraptext("a"), wraptext("b"), True, "", " ", " ")
node3 = Attribute(wraptext("a"), wraptext("b"), '"', "", " ", " ")
self.assertEqual('a = "b"', str(node3)) self.assertEqual('a = "b"', str(node3))
node3 = Attribute(wraptext("a"), wraptext("b"), False, "", " ", " ")
self.assertEqual("a = b", str(node3))
node4 = Attribute(wraptext("a"), wrap([]), False, " ", "", " ")
self.assertEqual(" a= ", str(node4))
node4 = Attribute(wraptext("a"), wraptext("b"), "'", "", " ", " ")
self.assertEqual("a = 'b'", str(node4))
node5 = Attribute(wraptext("a"), wraptext("b"), None, "", " ", " ")
self.assertEqual("a = b", str(node5))
node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ")
self.assertEqual(" a= ", str(node6))


def test_name(self): def test_name(self):
"""test getter/setter for the name attribute""" """test getter/setter for the name attribute"""
@@ -66,17 +68,35 @@ class TestAttribute(TreeEqualityTestCase):
self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value) self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value)
node.value = None node.value = None
self.assertIs(None, node.value) self.assertIs(None, node.value)
node2 = Attribute(wraptext("id"), wraptext("foo"), None)
node2.value = "foo bar baz"
self.assertWikicodeEqual(wraptext("foo bar baz"), node2.value)
self.assertEqual('"', node2.quotes)
node2.value = 'foo "bar" baz'
self.assertWikicodeEqual(wraptext('foo "bar" baz'), node2.value)
self.assertEqual("'", node2.quotes)
node2.value = "foo 'bar' baz"
self.assertWikicodeEqual(wraptext("foo 'bar' baz"), node2.value)
self.assertEqual('"', node2.quotes)
node2.value = "fo\"o 'bar' b\"az"
self.assertWikicodeEqual(wraptext("fo\"o 'bar' b\"az"), node2.value)
self.assertEqual('"', node2.quotes)


def test_quoted(self):
"""test getter/setter for the quoted attribute"""
node1 = Attribute(wraptext("id"), wraptext("foo"), False)
def test_quotes(self):
"""test getter/setter for the quotes attribute"""
node1 = Attribute(wraptext("id"), wraptext("foo"), None)
node2 = Attribute(wraptext("id"), wraptext("bar")) node2 = Attribute(wraptext("id"), wraptext("bar"))
self.assertFalse(node1.quoted)
self.assertTrue(node2.quoted)
node1.quoted = True
node2.quoted = ""
self.assertTrue(node1.quoted)
self.assertFalse(node2.quoted)
node3 = Attribute(wraptext("id"), wraptext("foo bar baz"))
self.assertIs(None, node1.quotes)
self.assertEqual('"', node2.quotes)
node1.quotes = "'"
node2.quotes = None
self.assertEqual("'", node1.quotes)
self.assertIs(None, node2.quotes)
self.assertRaises(ValueError, setattr, node1, "quotes", "foobar")
self.assertRaises(ValueError, setattr, node3, "quotes", None)
self.assertRaises(ValueError, Attribute, wraptext("id"),
wraptext("foo bar baz"), None)


def test_padding(self): def test_padding(self):
"""test getter/setter for the padding attributes""" """test getter/setter for the padding attributes"""


+ 8
- 8
tests/test_builder.py View File

@@ -270,7 +270,7 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
pad_after_eq=""), pad_after_eq=""),
tokens.Text(text="name"), tokens.TagAttrEquals(), tokens.Text(text="name"), tokens.TagAttrEquals(),
tokens.TagAttrQuote(), tokens.Text(text="abc"),
tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"),
tokens.TagCloseSelfclose(padding=" ")], tokens.TagCloseSelfclose(padding=" ")],
wrap([Tag(wraptext("ref"), wrap([Tag(wraptext("ref"),
attrs=[Attribute(wraptext("name"), wraptext("abc"))], attrs=[Attribute(wraptext("name"), wraptext("abc"))],
@@ -298,7 +298,7 @@ class TestBuilder(TreeEqualityTestCase):
wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])), wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])),


# <ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} # <ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}}
# mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>
# mno = '{{p}} [[q]] {{r}}'>[[Source]]</ref>
([tokens.TagOpenOpen(), tokens.Text(text="ref"), ([tokens.TagOpenOpen(), tokens.Text(text="ref"),
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
pad_after_eq=""), pad_after_eq=""),
@@ -308,7 +308,7 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
pad_after_eq=""), pad_after_eq=""),
tokens.Text(text="foo"), tokens.TagAttrEquals(), tokens.Text(text="foo"), tokens.TagAttrEquals(),
tokens.TagAttrQuote(), tokens.Text(text="bar "),
tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "),
tokens.TemplateOpen(), tokens.Text(text="baz"), tokens.TemplateOpen(), tokens.Text(text="baz"),
tokens.TemplateClose(), tokens.TemplateClose(),
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", tokens.TagAttrStart(pad_first=" ", pad_before_eq="",
@@ -326,7 +326,7 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ", tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ",
pad_after_eq=" "), pad_after_eq=" "),
tokens.Text(text="mno"), tokens.TagAttrEquals(), tokens.Text(text="mno"), tokens.TagAttrEquals(),
tokens.TagAttrQuote(), tokens.TemplateOpen(),
tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(),
tokens.Text(text="p"), tokens.TemplateClose(), tokens.Text(text="p"), tokens.TemplateClose(),
tokens.Text(text=" "), tokens.WikilinkOpen(), tokens.Text(text=" "), tokens.WikilinkOpen(),
tokens.Text(text="q"), tokens.WikilinkClose(), tokens.Text(text="q"), tokens.WikilinkClose(),
@@ -338,17 +338,17 @@ class TestBuilder(TreeEqualityTestCase):
tokens.TagCloseClose()], tokens.TagCloseClose()],
wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [
Attribute(wraptext("name"), Attribute(wraptext("name"),
wrap([Template(wraptext("abc"))]), False),
wrap([Template(wraptext("abc"))]), None),
Attribute(wraptext("foo"), wrap([Text("bar "), Attribute(wraptext("foo"), wrap([Text("bar "),
Template(wraptext("baz"))]), pad_first=" "), Template(wraptext("baz"))]), pad_first=" "),
Attribute(wraptext("abc"), wrap([Template(wraptext("de")), Attribute(wraptext("abc"), wrap([Template(wraptext("de")),
Text("f")]), False),
Text("f")]), None),
Attribute(wraptext("ghi"), wrap([Text("j"), Attribute(wraptext("ghi"), wrap([Text("j"),
Template(wraptext("k")), Template(wraptext("k")),
Template(wraptext("l"))]), False),
Template(wraptext("l"))]), None),
Attribute(wraptext("mno"), wrap([Template(wraptext("p")), Attribute(wraptext("mno"), wrap([Template(wraptext("p")),
Text(" "), Wikilink(wraptext("q")), Text(" "), Text(" "), Wikilink(wraptext("q")), Text(" "),
Template(wraptext("r"))]), True, " \n ", " ",
Template(wraptext("r"))]), "'", " \n ", " ",
" ")])])), " ")])])),


# "''italic text''" # "''italic text''"


+ 17
- 12
tests/test_tag.py View File

@@ -34,9 +34,9 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext


agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) agen = lambda name, value: Attribute(wraptext(name), wraptext(value))
agennv = lambda name: Attribute(wraptext(name)) agennv = lambda name: Attribute(wraptext(name))
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False)
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c)
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c)
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None)
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c)
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c)


class TestTag(TreeEqualityTestCase): class TestTag(TreeEqualityTestCase):
"""Test cases for the Tag node.""" """Test cases for the Tag node."""
@@ -276,28 +276,33 @@ class TestTag(TreeEqualityTestCase):
"""test Tag.add()""" """test Tag.add()"""
node = Tag(wraptext("ref"), wraptext("cite")) node = Tag(wraptext("ref"), wraptext("cite"))
node.add("name", "value") node.add("name", "value")
node.add("name", "value", quoted=False)
node.add("name", "value", quotes=None)
node.add("name", "value", quotes="'")
node.add("name") node.add("name")
node.add(1, False) node.add(1, False)
node.add("style", "{{foobar}}") node.add("style", "{{foobar}}")
node.add("name", "value", True, "\n", " ", " ")
node.add("name", "value", '"', "\n", " ", " ")
attr1 = ' name="value"' attr1 = ' name="value"'
attr2 = " name=value" attr2 = " name=value"
attr3 = " name"
attr4 = ' 1="False"'
attr5 = ' style="{{foobar}}"'
attr6 = '\nname = "value"'
attr3 = " name='value'"
attr4 = " name"
attr5 = ' 1="False"'
attr6 = ' style="{{foobar}}"'
attr7 = '\nname = "value"'
self.assertEqual(attr1, node.attributes[0]) self.assertEqual(attr1, node.attributes[0])
self.assertEqual(attr2, node.attributes[1]) self.assertEqual(attr2, node.attributes[1])
self.assertEqual(attr3, node.attributes[2]) self.assertEqual(attr3, node.attributes[2])
self.assertEqual(attr4, node.attributes[3]) self.assertEqual(attr4, node.attributes[3])
self.assertEqual(attr5, node.attributes[4]) self.assertEqual(attr5, node.attributes[4])
self.assertEqual(attr6, node.attributes[5]) self.assertEqual(attr6, node.attributes[5])
self.assertEqual(attr6, node.get("name"))
self.assertEqual(attr7, node.attributes[6])
self.assertEqual(attr7, node.get("name"))
self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]), self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]),
node.attributes[4].value)
node.attributes[5].value)
self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5, self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5,
attr6, ">cite</ref>")), node)
attr6, attr7, ">cite</ref>")), node)
self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar")
self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None)


def test_remove(self): def test_remove(self):
"""test Tag.remove()""" """test Tag.remove()"""


+ 1
- 1
tests/tokenizer/integration.mwtest View File

@@ -43,7 +43,7 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t
name: rich_tags name: rich_tags
label: a HTML tag with tons of other things in it label: a HTML tag with tons of other things in it
input: "{{dubious claim}}<ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>" input: "{{dubious claim}}<ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>"
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()]


--- ---




+ 43
- 15
tests/tokenizer/tags.mwtest View File

@@ -57,7 +57,14 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
name: attribute_quoted name: attribute_quoted
label: a tag with a single quoted attribute label: a tag with a single quoted attribute
input: "<ref name="foo bar"></ref>" input: "<ref name="foo bar"></ref>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]

---

name: attribute_single_quoted
label: a tag with a single singly-quoted attribute
input: "<ref name='foo bar'></ref>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]


--- ---


@@ -71,7 +78,7 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
name: attribute_quoted_hyphen name: attribute_quoted_hyphen
label: a tag with a single quoted attribute, containing a hyphen label: a tag with a single quoted attribute, containing a hyphen
input: "<ref name="foo-bar"></ref>" input: "<ref name="foo-bar"></ref>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()]


--- ---


@@ -92,21 +99,21 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before
name: attribute_selfclosing_value_quoted name: attribute_selfclosing_value_quoted
label: a self-closing tag with a single quoted attribute label: a self-closing tag with a single quoted attribute
input: "<ref name="foo"/>" input: "<ref name="foo"/>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")]
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(padding="")]


--- ---


name: nested_tag name: nested_tag
label: a tag nested within the attributes of another label: a tag nested within the attributes of another
input: "<ref name=<span style="color: red;">foo</span>>citation</ref>" input: "<ref name=<span style="color: red;">foo</span>>citation</ref>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]


--- ---


name: nested_tag_quoted name: nested_tag_quoted
label: a tag nested within the attributes of another, quoted label: a tag nested within the attributes of another, quoted
input: "<ref name="<span style="color: red;">foo</span>">citation</ref>" input: "<ref name="<span style="color: red;">foo</span>">citation</ref>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]


--- ---


@@ -120,7 +127,7 @@ output: [Text(text="<ref name=</ ><//>>citation</ref>")]
name: nested_troll_tag_quoted name: nested_troll_tag_quoted
label: a bogus tag that appears to be nested within the attributes of another, quoted label: a bogus tag that appears to be nested within the attributes of another, quoted
input: "<ref name="</ ><//>">citation</ref>" input: "<ref name="</ ><//>">citation</ref>"
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()]


--- ---


@@ -222,6 +229,27 @@ output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_befor


--- ---


name: quotes_in_quotes
label: singly-quoted text inside a doubly-quoted attribute
input: "<span foo="bar 'baz buzz' biz">stuff</span>"
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar 'baz buzz' biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]

---

name: quotes_in_quotes_2
label: doubly-quoted text inside a singly-quoted attribute
input: "<span foo='bar "baz buzz" biz'>stuff</span>"
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]

---

name: quotes_in_quotes_3
label: doubly-quoted text inside a singly-quoted attribute, with backslashes
input: "<span foo='bar "baz buzz\\" biz'>stuff</span>"
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\\\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()]

---

name: incomplete_lbracket name: incomplete_lbracket
label: incomplete tags: just a left bracket label: incomplete tags: just a left bracket
input: "<" input: "<"
@@ -407,28 +435,28 @@ output: [Text(text="junk <></>")]
name: backslash_premature_before name: backslash_premature_before
label: a backslash before a quote before a space label: a backslash before a quote before a space
input: "<foo attribute="this is\\" quoted">blah</foo>" input: "<foo attribute="this is\\" quoted">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


name: backslash_premature_after name: backslash_premature_after
label: a backslash before a quote after a space label: a backslash before a quote after a space
input: "<foo attribute="this is \\"quoted">blah</foo>" input: "<foo attribute="this is \\"quoted">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


name: backslash_premature_middle name: backslash_premature_middle
label: a backslash before a quote in the middle of a word label: a backslash before a quote in the middle of a word
input: "<foo attribute="this i\\"s quoted">blah</foo>" input: "<foo attribute="this i\\"s quoted">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


name: backslash_adjacent name: backslash_adjacent
label: escaped quotes next to unescaped quotes label: escaped quotes next to unescaped quotes
input: "<foo attribute="\\"this is quoted\\"">blah</foo>" input: "<foo attribute="\\"this is quoted\\"">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


@@ -442,21 +470,21 @@ output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before
name: backslash_double name: backslash_double
label: two adjacent backslashes, which do *not* affect the quote label: two adjacent backslashes, which do *not* affect the quote
input: "<foo attribute="this is\\\\" quoted">blah</foo>" input: "<foo attribute="this is\\\\" quoted">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


name: backslash_triple name: backslash_triple
label: three adjacent backslashes, which do *not* affect the quote label: three adjacent backslashes, which do *not* affect the quote
input: "<foo attribute="this is\\\\\\" quoted">blah</foo>" input: "<foo attribute="this is\\\\\\" quoted">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


name: backslash_unaffecting name: backslash_unaffecting
label: backslashes near quotes, but not immediately adjacent, thus having no effect label: backslashes near quotes, but not immediately adjacent, thus having no effect
input: "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>" input: "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>"
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()]


--- ---


@@ -477,7 +505,7 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(t
name: unparsable_attributed name: unparsable_attributed
label: a tag that should not be put through the normal parser; parsed attributes label: a tag that should not be put through the normal parser; parsed attributes
input: "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}" input: "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}"
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()]


--- ---


@@ -575,7 +603,7 @@ output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseS
name: single_only_close_attribute name: single_only_close_attribute
label: a tag that can only be single; presented as a close tag with an attribute label: a tag that can only be single; presented as a close tag with an attribute
input: "</br id="break">" input: "</br id="break">"
output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)]
output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)]


--- ---




Loading…
Cancel
Save