diff --git a/CHANGELOG b/CHANGELOG index fc6cd3c..5e19dc5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ v0.4 (unreleased): is a breaking change if you rely on the default behavior. - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. +- Wikicode.get_sections() now returns sections in the correct order. - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects instead of just a single string or Wikicode. - C code cleanup and speed improvements. diff --git a/docs/changelog.rst b/docs/changelog.rst index c7c5c56..e44d17b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -14,6 +14,7 @@ Unreleased - The *matches* argument of :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods now accepts a function (taking one argument, a :py:class:`.Node`, and returning a bool) in addition to a regex. +- :py:meth:`.Wikicode.get_sections` now returns sections in the correct order. - :py:meth:`.Wikicode.matches` now accepts a tuple or list of strings/:py:class:`.Wikicode` objects instead of just a single string or :py:class:`.Wikicode`. diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 3b94394..19ad4f4 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -434,34 +434,31 @@ class Wikicode(StringMixIn): """ if matches: matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter_headings() - filtered = self.filter_headings(matches=matches, flags=flags) + headings = self.filter_headings(recursive=False, matches=matches, + flags=flags) if levels: - filtered = [head for head in filtered if head.level in levels] + headings = [head for head in headings if head.level in levels] - if matches or include_lead is False or (not include_lead and levels): - buffers = [] - else: - buffers = [(maxsize, 0)] sections = [] - i = 0 - while i < len(self.nodes): - if self.nodes[i] in headings: - this = self.nodes[i].level - for (level, start) in buffers: - if this <= level: - sections.append(Wikicode(self.nodes[start:i])) - buffers = [buf for buf in buffers if buf[0] < this] - if self.nodes[i] in filtered: - if not include_headings: - i += 1 - if i >= len(self.nodes): - break - buffers.append((this, i)) - i += 1 - for (level, start) in buffers: - if start != i: - sections.append(Wikicode(self.nodes[start:i])) + if include_lead or not (include_lead is not None or matches or levels): + iterator = self.ifilter_headings(recursive=False) + try: + first = self.index(next(iterator)) + sections.append(Wikicode(self.nodes[:first])) + except StopIteration: # No headings in page + sections.append(Wikicode(self.nodes[:])) + + for heading in headings: + start = self.index(heading) + i = start + 1 + if not include_headings: + start += 1 + while i < len(self.nodes): + node = self.nodes[i] + if isinstance(node, Heading) and node.level <= heading.level: + break + i += 1 + sections.append(Wikicode(self.nodes[start:i])) return sections def strip_code(self, normalize=True, collapse=True): diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 5824c15..3e12cac 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -351,12 +351,12 @@ class TestWikicode(TreeEqualityTestCase): p4_III = "== Section III ==\n" + p4_IIIA page4 = parse(p4_lead + p4_I + p4_II + p4_III) - self.assertEqual([], page1.get_sections()) + self.assertEqual([""], page1.get_sections()) self.assertEqual(["", "==Heading=="], page2.get_sections()) self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], page3.get_sections()) - self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, - p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, + p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], page4.get_sections()) self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) @@ -370,16 +370,16 @@ class TestWikicode(TreeEqualityTestCase): page3.get_sections(include_lead=False)) self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) - self.assertEqual([""], page2.get_sections(include_headings=False)) + self.assertEqual(["", ""], page2.get_sections(include_headings=False)) self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", "\nEven more text.\n" + p4_IIIA2ai1], page4.get_sections(levels=[4], include_headings=False)) self.assertEqual([], page4.get_sections(matches=r"body")) - self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], + self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1], page4.get_sections(matches=r"Section\sI[.\s].*?")) - self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], page4.get_sections(matches=r".*?a.*?")) self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], page4.get_sections(matches=r".*?a.*?", flags=re.U)) @@ -387,6 +387,11 @@ class TestWikicode(TreeEqualityTestCase): page4.get_sections(matches=r".*?a.*?", flags=re.U, include_headings=False)) + sections = page2.get_sections(include_headings=False) + sections[0].append("Lead!\n") + sections[1].append("\nFirst section!") + self.assertEqual("Lead!\n==Heading==\nFirst section!", page2) + page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") section = page5.get_sections(matches="Foo")[0] section.replace("\nBar\n", "\nBarf ")