From 68ded2f890c7965cc560471602f5cdad5ca435bc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 18 Mar 2017 23:43:30 -0400 Subject: [PATCH] Add keep_template_params to Wikicode.strip_code (#175) --- CHANGELOG | 2 ++ docs/changelog.rst | 2 ++ mwparserfromhell/nodes/__init__.py | 2 +- mwparserfromhell/nodes/argument.py | 4 ++-- mwparserfromhell/nodes/external_link.py | 6 +++--- mwparserfromhell/nodes/heading.py | 4 ++-- mwparserfromhell/nodes/html_entity.py | 4 ++-- mwparserfromhell/nodes/tag.py | 4 ++-- mwparserfromhell/nodes/template.py | 6 ++++++ mwparserfromhell/nodes/text.py | 2 +- mwparserfromhell/nodes/wikilink.py | 6 +++--- mwparserfromhell/wikicode.py | 20 +++++++++++++++----- tests/test_argument.py | 8 +++----- tests/test_comment.py | 4 +--- tests/test_external_link.py | 11 +++++------ tests/test_heading.py | 4 +--- tests/test_html_entity.py | 14 +++++++------- tests/test_tag.py | 9 ++++----- tests/test_template.py | 19 +++++++++++++------ tests/test_text.py | 4 +--- tests/test_wikicode.py | 5 ++++- tests/test_wikilink.py | 6 ++---- 22 files changed, 82 insertions(+), 64 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 4480035..3832524 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ v0.5 (unreleased): - Made Template.remove(keep_field=True) behave more reasonably when the parameter is already empty. +- Added the keep_template_params argument to Wikicode.strip_code(). If True, + then template parameters will be preserved in the output. - Wikicode objects can now be pickled properly (fixed infinite recursion error on incompletely-constructed StringMixIn subclasses). - Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples. diff --git a/docs/changelog.rst b/docs/changelog.rst index 669b448..2c6be16 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Unreleased - Made :meth:`Template.remove(keep_field=True) <.Template.remove>` behave more reasonably when the parameter is already empty. +- Added the *keep_template_params* argument to :meth:`.Wikicode.strip_code`. + If *True*, then template parameters will be preserved in the output. - :class:`.Wikicode` objects can now be pickled properly (fixed infinite recursion error on incompletely-constructed :class:`.StringMixIn` subclasses). diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 91678c8..17ad3c3 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -58,7 +58,7 @@ class Node(StringMixIn): return yield # pragma: no cover (this is a generator that yields nothing) - def __strip__(self, normalize, collapse): + def __strip__(self, **kwargs): return None def __showtree__(self, write, get, mark): diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 9146704..4259a35 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -47,9 +47,9 @@ class Argument(Node): if self.default is not None: yield self.default - def __strip__(self, normalize, collapse): + def __strip__(self, **kwargs): if self.default is not None: - return self.default.strip_code(normalize, collapse) + return self.default.strip_code(**kwargs) return None def __showtree__(self, write, get, mark): diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 8493a25..f2659ab 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -49,12 +49,12 @@ class ExternalLink(Node): if self.title is not None: yield self.title - def __strip__(self, normalize, collapse): + def __strip__(self, **kwargs): if self.brackets: if self.title: - return self.title.strip_code(normalize, collapse) + return self.title.strip_code(**kwargs) return None - return self.url.strip_code(normalize, collapse) + return self.url.strip_code(**kwargs) def __showtree__(self, write, get, mark): if self.brackets: diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 7bba702..79f3364 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -42,8 +42,8 @@ class Heading(Node): def __children__(self): yield self.title - def __strip__(self, normalize, collapse): - return self.title.strip_code(normalize, collapse) + def __strip__(self, **kwargs): + return self.title.strip_code(**kwargs) def __showtree__(self, write, get, mark): write("=" * self.level) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 8b7f270..d5e9d73 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -58,8 +58,8 @@ class HTMLEntity(Node): return "&#{0}{1};".format(self.hex_char, self.value) return "&#{0};".format(self.value) - def __strip__(self, normalize, collapse): - if normalize: + def __strip__(self, **kwargs): + if kwargs.get("normalize"): return self.normalize() return self diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index d393e2c..f0611a6 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -98,9 +98,9 @@ class Tag(Node): if not self.self_closing and not self.wiki_markup and self.closing_tag: yield self.closing_tag - def __strip__(self, normalize, collapse): + def __strip__(self, **kwargs): if self.contents and is_visible(self.tag): - return self.contents.strip_code(normalize, collapse) + return self.contents.strip_code(**kwargs) return None def __showtree__(self, write, get, mark): diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index ccc63fd..9c89fbd 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -58,6 +58,12 @@ class Template(Node): yield param.name yield param.value + def __strip__(self, **kwargs): + if kwargs.get("keep_template_params"): + parts = [param.value.strip_code(**kwargs) for param in self.params] + return " ".join(part for part in parts if part) + return None + def __showtree__(self, write, get, mark): write("{{") get(self.name) diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 08ac205..a49930f 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -37,7 +37,7 @@ class Text(Node): def __unicode__(self): return self.value - def __strip__(self, normalize, collapse): + def __strip__(self, **kwargs): return self def __showtree__(self, write, get, mark): diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index f71b5f6..8f4bf7d 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -46,10 +46,10 @@ class Wikilink(Node): if self.text is not None: yield self.text - def __strip__(self, normalize, collapse): + def __strip__(self, **kwargs): if self.text is not None: - return self.text.strip_code(normalize, collapse) - return self.title.strip_code(normalize, collapse) + return self.text.strip_code(**kwargs) + return self.title.strip_code(**kwargs) def __showtree__(self, write, get, mark): write("[[") diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 447f6ff..73aea41 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -531,23 +531,33 @@ class Wikicode(StringMixIn): # Ensure that earlier sections are earlier in the returned list: return [section for i, section in sorted(sections)] - def strip_code(self, normalize=True, collapse=True): + def strip_code(self, normalize=True, collapse=True, + keep_template_params=False): """Return a rendered string without unprintable code such as templates. The way a node is stripped is handled by the :meth:`~.Node.__strip__` method of :class:`.Node` objects, which generally return a subset of their nodes or ``None``. For example, templates and tags are removed completely, links are stripped to just - their display part, headings are stripped to just their title. If - *normalize* is ``True``, various things may be done to strip code + their display part, headings are stripped to just their title. + + If *normalize* is ``True``, various things may be done to strip code further, such as converting HTML entities like ``Σ``, ``Σ``, and ``Σ`` to ``Σ``. If *collapse* is ``True``, we will try to remove excess whitespace as well (three or more newlines are converted - to two, for example). + to two, for example). If *keep_template_params* is ``True``, then + template parameters will be preserved in the output (normally, they are + removed completely). """ + kwargs = { + "normalize": normalize, + "collapse": collapse, + "keep_template_params": keep_template_params + } + nodes = [] for node in self.nodes: - stripped = node.__strip__(normalize, collapse) + stripped = node.__strip__(**kwargs) if stripped: nodes.append(str(stripped)) diff --git a/tests/test_argument.py b/tests/test_argument.py index de12eab..6209b2f 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -56,12 +56,10 @@ class TestArgument(TreeEqualityTestCase): def test_strip(self): """test Argument.__strip__()""" - node = Argument(wraptext("foobar")) + node1 = Argument(wraptext("foobar")) node2 = Argument(wraptext("foo"), wraptext("bar")) - for a in (True, False): - for b in (True, False): - self.assertIs(None, node.__strip__(a, b)) - self.assertEqual("bar", node2.__strip__(a, b)) + self.assertIs(None, node1.__strip__()) + self.assertEqual("bar", node2.__strip__()) def test_showtree(self): """test Argument.__showtree__()""" diff --git a/tests/test_comment.py b/tests/test_comment.py index 97a6503..27129c9 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -49,9 +49,7 @@ class TestComment(TreeEqualityTestCase): def test_strip(self): """test Comment.__strip__()""" node = Comment("foobar") - for a in (True, False): - for b in (True, False): - self.assertIs(None, node.__strip__(a, b)) + self.assertIs(None, node.__strip__()) def test_showtree(self): """test Comment.__showtree__()""" diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 3432ae1..8cb3158 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -66,12 +66,11 @@ class TestExternalLink(TreeEqualityTestCase): node2 = ExternalLink(wraptext("http://example.com")) node3 = ExternalLink(wraptext("http://example.com"), wrap([])) node4 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) - for a in (True, False): - for b in (True, False): - self.assertEqual("http://example.com", node1.__strip__(a, b)) - self.assertEqual(None, node2.__strip__(a, b)) - self.assertEqual(None, node3.__strip__(a, b)) - self.assertEqual("Link", node4.__strip__(a, b)) + + self.assertEqual("http://example.com", node1.__strip__()) + self.assertEqual(None, node2.__strip__()) + self.assertEqual(None, node3.__strip__()) + self.assertEqual("Link", node4.__strip__()) def test_showtree(self): """test ExternalLink.__showtree__()""" diff --git a/tests/test_heading.py b/tests/test_heading.py index cb7ac8b..5e6776a 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -52,9 +52,7 @@ class TestHeading(TreeEqualityTestCase): def test_strip(self): """test Heading.__strip__()""" node = Heading(wraptext("foobar"), 3) - for a in (True, False): - for b in (True, False): - self.assertEqual("foobar", node.__strip__(a, b)) + self.assertEqual("foobar", node.__strip__()) def test_showtree(self): """test Heading.__showtree__()""" diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 4aa176f..4db1c13 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -57,13 +57,13 @@ class TestHTMLEntity(TreeEqualityTestCase): node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("e9", named=False, hexadecimal=True) - for a in (True, False): - self.assertEqual("\xa0", node1.__strip__(True, a)) - self.assertEqual(" ", node1.__strip__(False, a)) - self.assertEqual("k", node2.__strip__(True, a)) - self.assertEqual("k", node2.__strip__(False, a)) - self.assertEqual("é", node3.__strip__(True, a)) - self.assertEqual("é", node3.__strip__(False, a)) + + self.assertEqual("\xa0", node1.__strip__(normalize=True)) + self.assertEqual(" ", node1.__strip__(normalize=False)) + self.assertEqual("k", node2.__strip__(normalize=True)) + self.assertEqual("k", node2.__strip__(normalize=False)) + self.assertEqual("é", node3.__strip__(normalize=True)) + self.assertEqual("é", node3.__strip__(normalize=False)) def test_showtree(self): """test HTMLEntity.__showtree__()""" diff --git a/tests/test_tag.py b/tests/test_tag.py index 0ac75a9..2e6d8a3 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -103,11 +103,10 @@ class TestTag(TreeEqualityTestCase): node1 = Tag(wraptext("i"), wraptext("foobar")) node2 = Tag(wraptext("math"), wraptext("foobar")) node3 = Tag(wraptext("br"), self_closing=True) - for a in (True, False): - for b in (True, False): - self.assertEqual("foobar", node1.__strip__(a, b)) - self.assertEqual(None, node2.__strip__(a, b)) - self.assertEqual(None, node3.__strip__(a, b)) + + self.assertEqual("foobar", node1.__strip__()) + self.assertEqual(None, node2.__strip__()) + self.assertEqual(None, node3.__strip__()) def test_showtree(self): """test Tag.__showtree__()""" diff --git a/tests/test_template.py b/tests/test_template.py index a97d6de..76a45cf 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -67,12 +67,19 @@ class TestTemplate(TreeEqualityTestCase): def test_strip(self): """test Template.__strip__()""" node1 = Template(wraptext("foobar")) - node2 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgens("abc", "def")]) - for a in (True, False): - for b in (True, False): - self.assertEqual(None, node1.__strip__(a, b)) - self.assertEqual(None, node2.__strip__(a, b)) + node2 = Template(wraptext("foo"), [ + pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")]) + node3 = Template(wraptext("foo"), [ + pgenh("1", "foo"), + Parameter(wraptext("2"), wrap([Template(wraptext("hello"))]), + showkey=False), + pgenh("3", "bar")]) + + self.assertEqual(None, node1.__strip__(keep_template_params=False)) + self.assertEqual(None, node2.__strip__(keep_template_params=False)) + self.assertEqual("", node1.__strip__(keep_template_params=True)) + self.assertEqual("bar def", node2.__strip__(keep_template_params=True)) + self.assertEqual("foo bar", node3.__strip__(keep_template_params=True)) def test_showtree(self): """test Template.__showtree__()""" diff --git a/tests/test_text.py b/tests/test_text.py index d890323..aaf8db2 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -49,9 +49,7 @@ class TestText(unittest.TestCase): def test_strip(self): """test Text.__strip__()""" node = Text("foobar") - for a in (True, False): - for b in (True, False): - self.assertIs(node, node.__strip__(a, b)) + self.assertIs(node, node.__strip__()) def test_showtree(self): """test Text.__showtree__()""" diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index d0c11fd..5457920 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -433,7 +433,7 @@ class TestWikicode(TreeEqualityTestCase): """test Wikicode.strip_code()""" # Since individual nodes have test cases for their __strip__ methods, # we're only going to do an integration test: - code = parse("Foo [[bar]]\n\n{{baz}}\n\n[[a|b]] Σ") + code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] Σ") self.assertEqual("Foo bar\n\nb Σ", code.strip_code(normalize=True, collapse=True)) self.assertEqual("Foo bar\n\n\n\nb Σ", @@ -442,6 +442,9 @@ class TestWikicode(TreeEqualityTestCase): code.strip_code(normalize=False, collapse=True)) self.assertEqual("Foo bar\n\n\n\nb Σ", code.strip_code(normalize=False, collapse=False)) + self.assertEqual("Foo bar\n\nhello\n\nb Σ", + code.strip_code(normalize=True, collapse=True, + keep_template_params=True)) def test_get_tree(self): """test Wikicode.get_tree()""" diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 965d8d5..80116ca 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -58,10 +58,8 @@ class TestWikilink(TreeEqualityTestCase): """test Wikilink.__strip__()""" node = Wikilink(wraptext("foobar")) node2 = Wikilink(wraptext("foo"), wraptext("bar")) - for a in (True, False): - for b in (True, False): - self.assertEqual("foobar", node.__strip__(a, b)) - self.assertEqual("bar", node2.__strip__(a, b)) + self.assertEqual("foobar", node.__strip__()) + self.assertEqual("bar", node2.__strip__()) def test_showtree(self): """test Wikilink.__showtree__()"""