diff --git a/CHANGELOG b/CHANGELOG index 99eff38..230236b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +v0.4 (unreleased): + +- The 'matches' argument of Wikicode's filter methods now accepts a function + (taking one argument, a Node, and returning a bool) in addition to a regex. + v0.3.2 (released September 1, 2013): - Added support for Python 3.2 (along with current support for 3.3 and 2.7). diff --git a/docs/changelog.rst b/docs/changelog.rst index e72baef..ed8372d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,16 @@ Changelog ========= +v0.4 +---- + +Unreleased +(`changes `__): + +- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` + :py:meth:`.filter` methods now accepts a function (taking one argument, a + :py:class:`.Node`, and returning a bool) in addition to a regex. + v0.3.2 ------ diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 6569d96..3c011d0 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.3.2" +__version__ = "0.4.dev" __email__ = "ben.kurtovic@verizon.net" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 08fd469..be751ed 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -378,17 +378,21 @@ class Wikicode(StringMixIn): """Iterate over nodes in our list matching certain conditions. If *recursive* is ``True``, we will iterate over our children and all - descendants of our children, otherwise just our immediate children. If - *matches* is given, we will only yield the nodes that match the given - regular expression (with :py:func:`re.search`). The default flags used - are :py:const:`re.IGNORECASE`, :py:const:`re.DOTALL`, and - :py:const:`re.UNICODE`, but custom flags can be specified by passing - *flags*. If *forcetype* is given, only nodes that are instances of this - type are yielded. + of their descendants, otherwise just our immediate children. If + *forcetype* is given, only nodes that are instances of this type are + yielded. *matches* can be used to further restrict the nodes, either as + a function (taking a single :py:class:`.Node` and returning a boolean) + or a regular expression (matched against the node's string + representation with :py:func:`re.search`). If *matches* is a regex, the + flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`, + :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can + be specified by passing *flags*. """ + if matches and not callable(matches): + pat, matches = matches, lambda obj: re.search(pat, str(obj), flags) for node in (self._get_all_nodes(self) if recursive else self.nodes): if not forcetype or isinstance(node, forcetype): - if not matches or re.search(matches, str(node), flags): + if not matches or matches(node): yield node def filter(self, recursive=True, matches=None, flags=FLAGS, diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 14d801c..31fa82f 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -256,7 +256,7 @@ class TestWikicode(TreeEqualityTestCase): def genlist(gen): self.assertIsInstance(gen, GeneratorType) return list(gen) - ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) + ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") for func in (code.filter, ifilter(code)): @@ -292,21 +292,27 @@ class TestWikicode(TreeEqualityTestCase): "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], func(recursive=True, forcetype=Template)) - code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") + code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") for func in (code3.filter, ifilter(code3)): - self.assertEqual(["{{foobar}}", "{{FOO}}"], func(recursive=False, matches=r"foo")) + self.assertEqual(["{{foobar}}", "{{barfoo}}"], + func(False, matches=lambda node: "foo" in node)) + self.assertEqual(["{{foobar}}", "{{FOO}}", "{{barfoo}}"], + func(False, matches=r"foo")) self.assertEqual(["{{foobar}}", "{{FOO}}"], - func(recursive=False, matches=r"^{{foo.*?}}")) + func(matches=r"^{{foo.*?}}")) self.assertEqual(["{{foobar}}"], - func(recursive=False, matches=r"^{{foo.*?}}", flags=re.UNICODE)) - self.assertEqual(["{{baz}}", "{{bz}}"], func(recursive=False, matches=r"^{{b.*?z")) - self.assertEqual(["{{baz}}"], func(recursive=False, matches=r"^{{b.+?z}}")) + func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) + self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) + self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], code2.filter_templates(recursive=False)) self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], code2.filter_templates(recursive=True)) + + self.assertEqual(["{{foobar}}"], code3.filter_templates( + matches=lambda node: node.name.matches("Foobar"))) self.assertEqual(["{{baz}}", "{{bz}}"], code3.filter_templates(matches=r"^{{b.*?z")) self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z"))