Browse Source

Add keep_template_params to Wikicode.strip_code (#175)

tags/v0.5
Ben Kurtovic 7 years ago
parent
commit
68ded2f890
22 changed files with 82 additions and 64 deletions
  1. +2
    -0
      CHANGELOG
  2. +2
    -0
      docs/changelog.rst
  3. +1
    -1
      mwparserfromhell/nodes/__init__.py
  4. +2
    -2
      mwparserfromhell/nodes/argument.py
  5. +3
    -3
      mwparserfromhell/nodes/external_link.py
  6. +2
    -2
      mwparserfromhell/nodes/heading.py
  7. +2
    -2
      mwparserfromhell/nodes/html_entity.py
  8. +2
    -2
      mwparserfromhell/nodes/tag.py
  9. +6
    -0
      mwparserfromhell/nodes/template.py
  10. +1
    -1
      mwparserfromhell/nodes/text.py
  11. +3
    -3
      mwparserfromhell/nodes/wikilink.py
  12. +15
    -5
      mwparserfromhell/wikicode.py
  13. +3
    -5
      tests/test_argument.py
  14. +1
    -3
      tests/test_comment.py
  15. +5
    -6
      tests/test_external_link.py
  16. +1
    -3
      tests/test_heading.py
  17. +7
    -7
      tests/test_html_entity.py
  18. +4
    -5
      tests/test_tag.py
  19. +13
    -6
      tests/test_template.py
  20. +1
    -3
      tests/test_text.py
  21. +4
    -1
      tests/test_wikicode.py
  22. +2
    -4
      tests/test_wikilink.py

+ 2
- 0
CHANGELOG View File

@@ -2,6 +2,8 @@ v0.5 (unreleased):

- Made Template.remove(keep_field=True) behave more reasonably when the
parameter is already empty.
- Added the keep_template_params argument to Wikicode.strip_code(). If True,
then template parameters will be preserved in the output.
- Wikicode objects can now be pickled properly (fixed infinite recursion error
on incompletely-constructed StringMixIn subclasses).
- Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples.


+ 2
- 0
docs/changelog.rst View File

@@ -9,6 +9,8 @@ Unreleased

- Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more
reasonably when the parameter is already empty.
- Added the *keep_template_params* argument to :meth:`.Wikicode.strip_code`.
If *True*, then template parameters will be preserved in the output.
- :class:`.Wikicode` objects can now be pickled properly (fixed infinite
recursion error on incompletely-constructed :class:`.StringMixIn`
subclasses).


+ 1
- 1
mwparserfromhell/nodes/__init__.py View File

@@ -58,7 +58,7 @@ class Node(StringMixIn):
return
yield # pragma: no cover (this is a generator that yields nothing)

def __strip__(self, normalize, collapse):
def __strip__(self, **kwargs):
return None

def __showtree__(self, write, get, mark):


+ 2
- 2
mwparserfromhell/nodes/argument.py View File

@@ -47,9 +47,9 @@ class Argument(Node):
if self.default is not None:
yield self.default

def __strip__(self, normalize, collapse):
def __strip__(self, **kwargs):
if self.default is not None:
return self.default.strip_code(normalize, collapse)
return self.default.strip_code(**kwargs)
return None

def __showtree__(self, write, get, mark):


+ 3
- 3
mwparserfromhell/nodes/external_link.py View File

@@ -49,12 +49,12 @@ class ExternalLink(Node):
if self.title is not None:
yield self.title

def __strip__(self, normalize, collapse):
def __strip__(self, **kwargs):
if self.brackets:
if self.title:
return self.title.strip_code(normalize, collapse)
return self.title.strip_code(**kwargs)
return None
return self.url.strip_code(normalize, collapse)
return self.url.strip_code(**kwargs)

def __showtree__(self, write, get, mark):
if self.brackets:


+ 2
- 2
mwparserfromhell/nodes/heading.py View File

@@ -42,8 +42,8 @@ class Heading(Node):
def __children__(self):
yield self.title

def __strip__(self, normalize, collapse):
return self.title.strip_code(normalize, collapse)
def __strip__(self, **kwargs):
return self.title.strip_code(**kwargs)

def __showtree__(self, write, get, mark):
write("=" * self.level)


+ 2
- 2
mwparserfromhell/nodes/html_entity.py View File

@@ -58,8 +58,8 @@ class HTMLEntity(Node):
return "&#{0}{1};".format(self.hex_char, self.value)
return "&#{0};".format(self.value)

def __strip__(self, normalize, collapse):
if normalize:
def __strip__(self, **kwargs):
if kwargs.get("normalize"):
return self.normalize()
return self



+ 2
- 2
mwparserfromhell/nodes/tag.py View File

@@ -98,9 +98,9 @@ class Tag(Node):
if not self.self_closing and not self.wiki_markup and self.closing_tag:
yield self.closing_tag

def __strip__(self, normalize, collapse):
def __strip__(self, **kwargs):
if self.contents and is_visible(self.tag):
return self.contents.strip_code(normalize, collapse)
return self.contents.strip_code(**kwargs)
return None

def __showtree__(self, write, get, mark):


+ 6
- 0
mwparserfromhell/nodes/template.py View File

@@ -58,6 +58,12 @@ class Template(Node):
yield param.name
yield param.value

def __strip__(self, **kwargs):
if kwargs.get("keep_template_params"):
parts = [param.value.strip_code(**kwargs) for param in self.params]
return " ".join(part for part in parts if part)
return None

def __showtree__(self, write, get, mark):
write("{{")
get(self.name)


+ 1
- 1
mwparserfromhell/nodes/text.py View File

@@ -37,7 +37,7 @@ class Text(Node):
def __unicode__(self):
return self.value

def __strip__(self, normalize, collapse):
def __strip__(self, **kwargs):
return self

def __showtree__(self, write, get, mark):


+ 3
- 3
mwparserfromhell/nodes/wikilink.py View File

@@ -46,10 +46,10 @@ class Wikilink(Node):
if self.text is not None:
yield self.text

def __strip__(self, normalize, collapse):
def __strip__(self, **kwargs):
if self.text is not None:
return self.text.strip_code(normalize, collapse)
return self.title.strip_code(normalize, collapse)
return self.text.strip_code(**kwargs)
return self.title.strip_code(**kwargs)

def __showtree__(self, write, get, mark):
write("[[")


+ 15
- 5
mwparserfromhell/wikicode.py View File

@@ -531,23 +531,33 @@ class Wikicode(StringMixIn):
# Ensure that earlier sections are earlier in the returned list:
return [section for i, section in sorted(sections)]

def strip_code(self, normalize=True, collapse=True):
def strip_code(self, normalize=True, collapse=True,
keep_template_params=False):
"""Return a rendered string without unprintable code such as templates.

The way a node is stripped is handled by the
:meth:`~.Node.__strip__` method of :class:`.Node` objects, which
generally return a subset of their nodes or ``None``. For example,
templates and tags are removed completely, links are stripped to just
their display part, headings are stripped to just their title. If
*normalize* is ``True``, various things may be done to strip code
their display part, headings are stripped to just their title.

If *normalize* is ``True``, various things may be done to strip code
further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
remove excess whitespace as well (three or more newlines are converted
to two, for example).
to two, for example). If *keep_template_params* is ``True``, then
template parameters will be preserved in the output (normally, they are
removed completely).
"""
kwargs = {
"normalize": normalize,
"collapse": collapse,
"keep_template_params": keep_template_params
}

nodes = []
for node in self.nodes:
stripped = node.__strip__(normalize, collapse)
stripped = node.__strip__(**kwargs)
if stripped:
nodes.append(str(stripped))



+ 3
- 5
tests/test_argument.py View File

@@ -56,12 +56,10 @@ class TestArgument(TreeEqualityTestCase):

def test_strip(self):
"""test Argument.__strip__()"""
node = Argument(wraptext("foobar"))
node1 = Argument(wraptext("foobar"))
node2 = Argument(wraptext("foo"), wraptext("bar"))
for a in (True, False):
for b in (True, False):
self.assertIs(None, node.__strip__(a, b))
self.assertEqual("bar", node2.__strip__(a, b))
self.assertIs(None, node1.__strip__())
self.assertEqual("bar", node2.__strip__())

def test_showtree(self):
"""test Argument.__showtree__()"""


+ 1
- 3
tests/test_comment.py View File

@@ -49,9 +49,7 @@ class TestComment(TreeEqualityTestCase):
def test_strip(self):
"""test Comment.__strip__()"""
node = Comment("foobar")
for a in (True, False):
for b in (True, False):
self.assertIs(None, node.__strip__(a, b))
self.assertIs(None, node.__strip__())

def test_showtree(self):
"""test Comment.__showtree__()"""


+ 5
- 6
tests/test_external_link.py View File

@@ -66,12 +66,11 @@ class TestExternalLink(TreeEqualityTestCase):
node2 = ExternalLink(wraptext("http://example.com"))
node3 = ExternalLink(wraptext("http://example.com"), wrap([]))
node4 = ExternalLink(wraptext("http://example.com"), wraptext("Link"))
for a in (True, False):
for b in (True, False):
self.assertEqual("http://example.com", node1.__strip__(a, b))
self.assertEqual(None, node2.__strip__(a, b))
self.assertEqual(None, node3.__strip__(a, b))
self.assertEqual("Link", node4.__strip__(a, b))

self.assertEqual("http://example.com", node1.__strip__())
self.assertEqual(None, node2.__strip__())
self.assertEqual(None, node3.__strip__())
self.assertEqual("Link", node4.__strip__())

def test_showtree(self):
"""test ExternalLink.__showtree__()"""


+ 1
- 3
tests/test_heading.py View File

@@ -52,9 +52,7 @@ class TestHeading(TreeEqualityTestCase):
def test_strip(self):
"""test Heading.__strip__()"""
node = Heading(wraptext("foobar"), 3)
for a in (True, False):
for b in (True, False):
self.assertEqual("foobar", node.__strip__(a, b))
self.assertEqual("foobar", node.__strip__())

def test_showtree(self):
"""test Heading.__showtree__()"""


+ 7
- 7
tests/test_html_entity.py View File

@@ -57,13 +57,13 @@ class TestHTMLEntity(TreeEqualityTestCase):
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
node2 = HTMLEntity("107", named=False, hexadecimal=False)
node3 = HTMLEntity("e9", named=False, hexadecimal=True)
for a in (True, False):
self.assertEqual("\xa0", node1.__strip__(True, a))
self.assertEqual("&nbsp;", node1.__strip__(False, a))
self.assertEqual("k", node2.__strip__(True, a))
self.assertEqual("&#107;", node2.__strip__(False, a))
self.assertEqual("é", node3.__strip__(True, a))
self.assertEqual("&#xe9;", node3.__strip__(False, a))
self.assertEqual("\xa0", node1.__strip__(normalize=True))
self.assertEqual("&nbsp;", node1.__strip__(normalize=False))
self.assertEqual("k", node2.__strip__(normalize=True))
self.assertEqual("&#107;", node2.__strip__(normalize=False))
self.assertEqual("é", node3.__strip__(normalize=True))
self.assertEqual("&#xe9;", node3.__strip__(normalize=False))

def test_showtree(self):
"""test HTMLEntity.__showtree__()"""


+ 4
- 5
tests/test_tag.py View File

@@ -103,11 +103,10 @@ class TestTag(TreeEqualityTestCase):
node1 = Tag(wraptext("i"), wraptext("foobar"))
node2 = Tag(wraptext("math"), wraptext("foobar"))
node3 = Tag(wraptext("br"), self_closing=True)
for a in (True, False):
for b in (True, False):
self.assertEqual("foobar", node1.__strip__(a, b))
self.assertEqual(None, node2.__strip__(a, b))
self.assertEqual(None, node3.__strip__(a, b))

self.assertEqual("foobar", node1.__strip__())
self.assertEqual(None, node2.__strip__())
self.assertEqual(None, node3.__strip__())

def test_showtree(self):
"""test Tag.__showtree__()"""


+ 13
- 6
tests/test_template.py View File

@@ -67,12 +67,19 @@ class TestTemplate(TreeEqualityTestCase):
def test_strip(self):
"""test Template.__strip__()"""
node1 = Template(wraptext("foobar"))
node2 = Template(wraptext("foo"),
[pgenh("1", "bar"), pgens("abc", "def")])
for a in (True, False):
for b in (True, False):
self.assertEqual(None, node1.__strip__(a, b))
self.assertEqual(None, node2.__strip__(a, b))
node2 = Template(wraptext("foo"), [
pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")])
node3 = Template(wraptext("foo"), [
pgenh("1", "foo"),
Parameter(wraptext("2"), wrap([Template(wraptext("hello"))]),
showkey=False),
pgenh("3", "bar")])

self.assertEqual(None, node1.__strip__(keep_template_params=False))
self.assertEqual(None, node2.__strip__(keep_template_params=False))
self.assertEqual("", node1.__strip__(keep_template_params=True))
self.assertEqual("bar def", node2.__strip__(keep_template_params=True))
self.assertEqual("foo bar", node3.__strip__(keep_template_params=True))

def test_showtree(self):
"""test Template.__showtree__()"""


+ 1
- 3
tests/test_text.py View File

@@ -49,9 +49,7 @@ class TestText(unittest.TestCase):
def test_strip(self):
"""test Text.__strip__()"""
node = Text("foobar")
for a in (True, False):
for b in (True, False):
self.assertIs(node, node.__strip__(a, b))
self.assertIs(node, node.__strip__())

def test_showtree(self):
"""test Text.__showtree__()"""


+ 4
- 1
tests/test_wikicode.py View File

@@ -433,7 +433,7 @@ class TestWikicode(TreeEqualityTestCase):
"""test Wikicode.strip_code()"""
# Since individual nodes have test cases for their __strip__ methods,
# we're only going to do an integration test:
code = parse("Foo [[bar]]\n\n{{baz}}\n\n[[a|b]] &Sigma;")
code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] &Sigma;")
self.assertEqual("Foo bar\n\nb Σ",
code.strip_code(normalize=True, collapse=True))
self.assertEqual("Foo bar\n\n\n\nb Σ",
@@ -442,6 +442,9 @@ class TestWikicode(TreeEqualityTestCase):
code.strip_code(normalize=False, collapse=True))
self.assertEqual("Foo bar\n\n\n\nb &Sigma;",
code.strip_code(normalize=False, collapse=False))
self.assertEqual("Foo bar\n\nhello\n\nb Σ",
code.strip_code(normalize=True, collapse=True,
keep_template_params=True))

def test_get_tree(self):
"""test Wikicode.get_tree()"""


+ 2
- 4
tests/test_wikilink.py View File

@@ -58,10 +58,8 @@ class TestWikilink(TreeEqualityTestCase):
"""test Wikilink.__strip__()"""
node = Wikilink(wraptext("foobar"))
node2 = Wikilink(wraptext("foo"), wraptext("bar"))
for a in (True, False):
for b in (True, False):
self.assertEqual("foobar", node.__strip__(a, b))
self.assertEqual("bar", node2.__strip__(a, b))
self.assertEqual("foobar", node.__strip__())
self.assertEqual("bar", node2.__strip__())

def test_showtree(self):
"""test Wikilink.__showtree__()"""


Loading…
Cancel
Save