Browse Source

Added closing_wiki_markup support to Tag node

Added support for allowing different wiki syntax for replacing the opening
and closing tags. Added for table support.
tags/v0.4
David Winegar 9 years ago
parent
commit
d356a570b3
4 changed files with 80 additions and 23 deletions
  1. +32
    -2
      mwparserfromhell/nodes/tag.py
  2. +3
    -1
      mwparserfromhell/parser/builder.py
  3. +18
    -0
      tests/test_tag.py
  4. +27
    -20
      tests/tokenizer/tables.mwtest

+ 32
- 2
mwparserfromhell/nodes/tag.py View File

@@ -35,7 +35,7 @@ class Tag(Node):

def __init__(self, tag, contents=None, attrs=None, wiki_markup=None,
self_closing=False, invalid=False, implicit=False, padding="",
closing_tag=None):
closing_tag=None, closing_wiki_markup=None):
super(Tag, self).__init__()
self._tag = tag
if contents is None and not self_closing:
@@ -44,6 +44,13 @@ class Tag(Node):
self._contents = contents
self._attrs = attrs if attrs else []
self._wiki_markup = wiki_markup
if wiki_markup and not self_closing:
if closing_wiki_markup:
self._closing_wiki_markup = closing_wiki_markup
else:
self._closing_wiki_markup = wiki_markup
else:
self._closing_wiki_markup = None
self._self_closing = self_closing
self._invalid = invalid
self._implicit = implicit
@@ -55,10 +62,11 @@ class Tag(Node):

def __unicode__(self):
if self.wiki_markup:
attrs = "".join([str(attr) for attr in self.attributes]) if self.attributes else ""
if self.self_closing:
return self.wiki_markup
else:
return self.wiki_markup + str(self.contents) + self.wiki_markup
return self.wiki_markup + attrs + str(self.contents) + self.closing_wiki_markup

result = ("</" if self.invalid else "<") + str(self.tag)
if self.attributes:
@@ -135,6 +143,19 @@ class Tag(Node):
return self._wiki_markup

@property
def closing_wiki_markup(self):
"""The wikified version of the closing tag to show instead of HTML.

If set to a value, this will be displayed instead of the close tag
brackets. If tag is :attr:`self_closing` is ``True``, this is set to
``None`` and not displayed. If :attr:`wiki_markup` is set and this has
not been set, this is set to the value of :attr:`wiki_markup`. If this
has been set and :attr:`wiki_markup` is set to a ``False`` value, this
is set to ``None``.
"""
return self._closing_wiki_markup

@property
def self_closing(self):
"""Whether the tag is self-closing with no content (like ``<br/>``)."""
return self._self_closing
@@ -185,10 +206,19 @@ class Tag(Node):
@wiki_markup.setter
def wiki_markup(self, value):
self._wiki_markup = str(value) if value else None
if not value or not self.closing_wiki_markup:
self.closing_wiki_markup = str(value) if value else None


@closing_wiki_markup.setter
def closing_wiki_markup(self, value):
self._closing_wiki_markup = str(value) if value and not self.self_closing else None

@self_closing.setter
def self_closing(self, value):
self._self_closing = bool(value)
if not bool(value):
self.closing_wiki_markup = None

@invalid.setter
def invalid(self, value):


+ 3
- 1
mwparserfromhell/parser/builder.py View File

@@ -248,6 +248,7 @@ class Builder(object):
close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
implicit, attrs, contents, closing_tag = False, [], None, None
wiki_markup, invalid = token.wiki_markup, token.invalid or False
closing_wiki_markup = None
self._push()
while self._tokens:
token = self._tokens.pop()
@@ -258,6 +259,7 @@ class Builder(object):
tag = self._pop()
self._push()
elif isinstance(token, tokens.TagOpenClose):
closing_wiki_markup = token.wiki_markup
contents = self._pop()
self._push()
elif isinstance(token, close_tokens):
@@ -270,7 +272,7 @@ class Builder(object):
self_closing = False
closing_tag = self._pop()
return Tag(tag, contents, attrs, wiki_markup, self_closing,
invalid, implicit, padding, closing_tag)
invalid, implicit, padding, closing_tag, closing_wiki_markup)
else:
self._write(self._handle_token(token))
raise ParserError("_handle_tag() missed a close token")


+ 18
- 0
tests/test_tag.py View File

@@ -171,6 +171,24 @@ class TestTag(TreeEqualityTestCase):
self.assertFalse(node.wiki_markup)
self.assertEqual("<i>italic text</i>", node)

def test_closing_wiki_markup(self):
"""test getter/setter behavior for closing_wiki_markup attribute"""
node = Tag(wraptext("table"), wraptext("\n"))
self.assertIs(None, node.closing_wiki_markup)
node.wiki_markup = "{|"
self.assertEqual("{|", node.closing_wiki_markup)
node.closing_wiki_markup = "|}"
self.assertEqual("|}", node.closing_wiki_markup)
self.assertEqual("{|\n|}", node)
node.wiki_markup = False
self.assertFalse(node.closing_wiki_markup)
node.self_closing = True
node.wiki_markup = "{|"
self.assertIs(None, node.closing_wiki_markup)
node.wiki_markup = False
node.self_closing = False
self.assertEqual("<table>\n</table>", node)

def test_self_closing(self):
"""test getter/setter for the self_closing attribute"""
node = Tag(wraptext("ref"), wraptext("foobar"))


+ 27
- 20
tests/tokenizer/tables.mwtest View File

@@ -1,14 +1,14 @@
name: empty_table
label: Parsing an empty table.
input: "{|\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: inline_table
label: Correctly handle tables with close on the same line.
input: "{||}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

@@ -29,7 +29,7 @@ output: [Text(text="{| | ")]
name: leading_whitespace_table
label: Handle leading whitespace for a table.
input: "foo \n \t {|\n|}"
output: [Text(text="foo \n \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [Text(text="foo \n \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

@@ -43,112 +43,119 @@ output: [Text(text="foo \n foo \t {|\n|}")]
name: table_row_simple
label: Simple table row.
input: "{|\n |- \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_simple
label: Simple table cell.
input: "{|\n | foo \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_inline
label: Multiple inline table cells.
input: "{|\n | foo || bar || test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_header_simple
label: Simple header cell.
input: "{|\n ! foo \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_header_inline
label: Multiple inline header cells.
input: "{|\n ! foo || bar !! test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseSelfclose(), Text(text=" bar "),TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: nowiki_inside_table
label: Nowiki handles pipe characters in tables.
input: "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_text_outside_cell
label: Parse text inside table but outside of a cell.
input: "{|\n bar \n | foo \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: no_table_cell_with_leading_characters
label: Fail to create a table cell when there are leading non-whitespace characters.
input: "{|\n bar | foo \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: no_table_row_with_leading_characters
label: Fail to create a table row when there are leading non-whitespace characters.
input: "{|\n bar |- foo \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: template_inside_table_cell
label: Template within table cell.
input: "{|\n |{{foo\n|bar=baz}} \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_attributes
label: Parse table cell style attributes.
input: "{| \n | name="foo bar"| test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_attributes_quote_with_pipe
label: Pipe inside an attribute quote should still be used as a style separator.
input: "{| \n | name="foo|bar"| test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(), Text(text="bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_attributes_name_with_pipe
label: Pipe inside an attribute name should still be used as a style separator.
input: "{| \n | name|="foo bar"| test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_attributes_pipe_after_equals
label: Pipe inside an attribute should still be used as a style separator after an equals.
input: "{| \n | name=|"foo|bar"| test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_cell_attributes_templates
label: Pipe inside attributes shouldn't be style separator.
input: "{| \n | {{comment|template=baz}} | test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseSelfclose(), Text(text=" test \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_row_attributes
label: Parse table row style attributes.
input: "{| \n |- name="foo bar"\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

---

name: table_row_attributes_crazy_whitespace
label: Parse table row style attributes with different whitespace.
input: "{| \t \n |- \t name="foo bar"\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]


---
@@ -156,4 +163,4 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
name: table_attributes
label: Parse table style attributes.
input: "{| name="foo bar"\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()]

Loading…
Cancel
Save