diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 365eab7..f258921 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -335,34 +335,36 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) - def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, - include_headings=True): + def get_sections(self, levels=None, matches=None, flags=FLAGS, + include_lead=True, include_headings=True): """Return a list of sections within the page. Sections are returned as :py:class:`~.Wikicode` objects with a shared node list (implemented using :py:class:`~.SmartList`) so that changes to sections are reflected in the parent Wikicode object. - With *flat* as ``True``, each returned section contains all of its - subsections within the :py:class:`~.Wikicode`; otherwise, the returned - sections contain only the section up to the next heading, regardless of - its size. If *matches* is given, it should be a regex to be matched - against the titles of section headings; only sections whose headings - match the regex will be included. If *levels* is given, it should be a - iterable of integers; only sections whose heading levels are within it - will be returned. If *include_headings* is ``True``, the section's - beginning :py:class:`~.Heading` object will be included in returned - :py:class:`~.Wikicode` objects; otherwise, this is skipped. + Each section contains all of its subsections. If *levels* is given, it + should be a iterable of integers; only sections whose heading levels + are within it will be returned.If *matches* is given, it should be a + regex to be matched against the titles of section headings; only + sections whose headings match the regex will be included. *flags* can + be used to override the default regex flags (see :py:meth:`ifilter`) if + *matches* is used. + + If *include_lead* is ``True``, the first, lead section (without a + heading) will be included in the list. If *include_headings* is + ``True``, the section's beginning :py:class:`~.Heading` object will be + included; otherwise, this is skipped. """ if matches: matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter(recursive=True, matches=matches, flags=flags, - forcetype=Heading) + headings = self.filter_headings(recursive=True, matches=matches, + flags=flags) if levels: headings = [head for head in headings if head.level in levels] sections = [] - buffers = [(maxsize, 0)] + buffers = [(maxsize, 0)] if include_lead else [] i = 0 while i < len(self.nodes): if self.nodes[i] in headings: diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 69600c4..4aa07f1 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -26,11 +26,11 @@ from types import GeneratorType import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, - Tag, Template, Text, Wikilink) + Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse -from mwparserfromhell.compat import str +from mwparserfromhell.compat import py3k, str from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext @@ -276,7 +276,37 @@ class TestWikicode(TreeEqualityTestCase): def test_get_sections(self): """test Wikicode.get_sections()""" - pass + page1 = "" + page2 = "==Heading==" + page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n" + page4 = """ +This is a lead. +== Section I == +Section I body. {{and a|template}} +=== Section I.A === +Section I.A [[body]]. +=== Section I.B === +==== Section I.B.1 ==== +Section I.B.1 body. + +•Some content. + +== Section II == +Section II body. + +== Section III == +=== Section III.A === +Text. +===== Section III.A.1.a ===== +More text. +==== Section III.A.2 ==== +Even more text. +======= section III.A.2.a.i.1 ======= +An invalid section!""" + + self.assertEqual([], parse(page1).get_sections()) + self.assertEqual(["==Heading=="], parse(page2).get_sections()) + self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections()) def test_strip_code(self): """test Wikicode.strip_code()""" @@ -284,7 +314,13 @@ class TestWikicode(TreeEqualityTestCase): def test_get_tree(self): """test Wikicode.get_tree()""" - pass + # Since individual nodes have test cases for their __showtree___ + # methods, and the docstring covers all possibilities, this doesn't + # need to test anything other than it: + code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") + expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ + "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" + self.assertEqual(expected.expandtabs(4), code.get_tree()) if __name__ == "__main__": unittest.main(verbosity=2)