From 4ae10eccf510cd2494dcddc8330567972286da42 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 18 May 2013 18:29:44 -0400 Subject: [PATCH] Fix bugs in get_sections(); finish test_get_sections() --- mwparserfromhell/wikicode.py | 31 +++++++++------ tests/test_wikicode.py | 90 +++++++++++++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 42 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 1d5de5d..581707d 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -336,7 +336,7 @@ class Wikicode(StringMixIn): return list(self.ifilter(recursive, matches, flags, forcetype)) def get_sections(self, levels=None, matches=None, flags=FLAGS, - include_lead=True, include_headings=True): + include_lead=None, include_headings=True): """Return a list of sections within the page. Sections are returned as :py:class:`~.Wikicode` objects with a shared @@ -352,30 +352,39 @@ class Wikicode(StringMixIn): *matches* is used. If *include_lead* is ``True``, the first, lead section (without a - heading) will be included in the list. If *include_headings* is - ``True``, the section's beginning :py:class:`~.Heading` object will be - included; otherwise, this is skipped. + heading) will be included in the list; ``False`` will not include it; + the default will include it only if no specific *levels* were given. If + *include_headings* is ``True``, the section's beginning + :py:class:`~.Heading` object will be included; otherwise, this is + skipped. """ if matches: matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter_headings(recursive=True, matches=matches, + headings = self.filter_headings(recursive=True) + filtered = self.filter_headings(recursive=True, matches=matches, flags=flags) if levels: - headings = [head for head in headings if head.level in levels] + filtered = [head for head in filtered if head.level in levels] + if matches or include_lead is False or (not include_lead and levels): + buffers = [] + else: + buffers = [(maxsize, 0)] sections = [] - buffers = [(maxsize, 0)] if include_lead else [] i = 0 while i < len(self.nodes): if self.nodes[i] in headings: this = self.nodes[i].level for (level, start) in buffers: if this <= level: - buffers.remove((level, start)) sections.append(Wikicode(self.nodes[start:i])) - buffers.append((this, i)) - if not include_headings: - i += 1 + buffers = [buf for buf in buffers if buf[0] < this] + if self.nodes[i] in filtered: + if not include_headings: + i += 1 + if i >= len(self.nodes): + break + buffers.append((this, i)) i += 1 for (level, start) in buffers: if start != i: diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 1eacb11..a6ad950 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -276,37 +276,65 @@ class TestWikicode(TreeEqualityTestCase): def test_get_sections(self): """test Wikicode.get_sections()""" - page1 = "" - page2 = "==Heading==" - page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n" - page4 = """ -This is a lead. -== Section I == -Section I body. {{and a|template}} -=== Section I.A === -Section I.A [[body]]. -=== Section I.B === -==== Section I.B.1 ==== -Section I.B.1 body. - -•Some content. - -== Section II == -Section II body. - -== Section III == -=== Section III.A === -Text. -===== Section III.A.1.a ===== -More text. -==== Section III.A.2 ==== -Even more text. -======= section III.A.2.a.i.1 ======= -An invalid section!""" - - self.assertEqual([], parse(page1).get_sections()) - self.assertEqual(["", "==Heading=="], parse(page2).get_sections()) - self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections()) + page1 = parse("") + page2 = parse("==Heading==") + page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n") + + p4_lead = "This is a lead.\n" + p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n" + p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n•Some content.\n\n" + p4_IB = "=== Section I.B ===\n" + p4_IB1 + p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB + p4_II = "== Section II ==\nSection II body.\n\n" + p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n" + p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!" + p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1 + p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2 + p4_III = "== Section III ==\n" + p4_IIIA + page4 = parse(p4_lead + p4_I + p4_II + p4_III) + + self.assertEqual([], page1.get_sections()) + self.assertEqual(["", "==Heading=="], page2.get_sections()) + self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", + "====Gnidaeh====\n"], page3.get_sections()) + self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, + p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + page4.get_sections()) + + self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) + self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], + page3.get_sections(levels=(2, 3))) + self.assertEqual([], page3.get_sections(levels=[0])) + self.assertEqual(["", "====Gnidaeh====\n"], + page3.get_sections(levels=[4], include_lead=True)) + self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", + "====Gnidaeh====\n"], + page3.get_sections(include_lead=False)) + + self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) + self.assertEqual([""], page2.get_sections(include_headings=False)) + self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", + "\nEven more text.\n" + p4_IIIA2ai1], + page4.get_sections(levels=[4], + include_headings=False)) + + self.assertEqual([], page4.get_sections(matches=r"body")) + self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], + page4.get_sections(matches=r"Section\sI[.\s].*?")) + self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + page4.get_sections(matches=r".*?a.*?")) + self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], + page4.get_sections(matches=r".*?a.*?", flags=re.U)) + self.assertEqual(["\nMore text.\n", "\nAn invalid section!"], + page4.get_sections(matches=r".*?a.*?", flags=re.U, + include_headings=False)) + + page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") + section = page5.get_sections(matches="Foo")[0] + section.replace("\nBar\n", "\nBarf ") + section.append("{{Haha}}\n") + self.assertEqual("== Foo ==\nBarf {{Haha}}\n", section) + self.assertEqual("X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz", page5) def test_strip_code(self): """test Wikicode.strip_code()"""