From 4ae10eccf510cd2494dcddc8330567972286da42 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sat, 18 May 2013 18:29:44 -0400
Subject: [PATCH] Fix bugs in get_sections(); finish test_get_sections()

---
 mwparserfromhell/wikicode.py | 31 +++++++++------
 tests/test_wikicode.py       | 90 +++++++++++++++++++++++++++++---------------
 2 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 1d5de5d..581707d 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -336,7 +336,7 @@ class Wikicode(StringMixIn):
         return list(self.ifilter(recursive, matches, flags, forcetype))
 
     def get_sections(self, levels=None, matches=None, flags=FLAGS,
-                     include_lead=True, include_headings=True):
+                     include_lead=None, include_headings=True):
         """Return a list of sections within the page.
 
         Sections are returned as :py:class:`~.Wikicode` objects with a shared
@@ -352,30 +352,39 @@ class Wikicode(StringMixIn):
         *matches* is used.
 
         If *include_lead* is ``True``, the first, lead section (without a
-        heading) will be included in the list. If *include_headings* is
-        ``True``, the section's beginning :py:class:`~.Heading` object will be
-        included; otherwise, this is skipped.
+        heading) will be included in the list; ``False`` will not include it;
+        the default will include it only if no specific *levels* were given. If
+        *include_headings* is ``True``, the section's beginning
+        :py:class:`~.Heading` object will be included; otherwise, this is
+        skipped.
         """
         if matches:
             matches = r"^(=+?)\s*" + matches + r"\s*\1$"
-        headings = self.filter_headings(recursive=True, matches=matches,
+        headings = self.filter_headings(recursive=True)
+        filtered = self.filter_headings(recursive=True, matches=matches,
                                         flags=flags)
         if levels:
-            headings = [head for head in headings if head.level in levels]
+            filtered = [head for head in filtered if head.level in levels]
 
+        if matches or include_lead is False or (not include_lead and levels):
+            buffers = []
+        else:
+            buffers = [(maxsize, 0)]
         sections = []
-        buffers = [(maxsize, 0)] if include_lead else []
         i = 0
         while i < len(self.nodes):
             if self.nodes[i] in headings:
                 this = self.nodes[i].level
                 for (level, start) in buffers:
                     if this <= level:
-                        buffers.remove((level, start))
                         sections.append(Wikicode(self.nodes[start:i]))
-                buffers.append((this, i))
-                if not include_headings:
-                    i += 1
+                buffers = [buf for buf in buffers if buf[0] < this]
+                if self.nodes[i] in filtered:
+                    if not include_headings:
+                        i += 1
+                        if i >= len(self.nodes):
+                            break
+                    buffers.append((this, i))
             i += 1
         for (level, start) in buffers:
             if start != i:
diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py
index 1eacb11..a6ad950 100644
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -276,37 +276,65 @@ class TestWikicode(TreeEqualityTestCase):
 
     def test_get_sections(self):
         """test Wikicode.get_sections()"""
-        page1 = ""
-        page2 = "==Heading=="
-        page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
-        page4 = """
-This is a lead.
-== Section I ==
-Section I body. {{and a|template}}
-=== Section I.A ===
-Section I.A [[body]].
-=== Section I.B ===
-==== Section I.B.1 ====
-Section I.B.1 body.
-
-&bull;Some content.
-
-== Section II ==
-Section II body.
-
-== Section III ==
-=== Section III.A ===
-Text.
-===== Section III.A.1.a =====
-More text.
-==== Section III.A.2 ====
-Even more text.
-======= section III.A.2.a.i.1 =======
-An invalid section!"""
-
-        self.assertEqual([], parse(page1).get_sections())
-        self.assertEqual(["", "==Heading=="], parse(page2).get_sections())
-        self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections())
+        page1 = parse("")
+        page2 = parse("==Heading==")
+        page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n")
+
+        p4_lead = "This is a lead.\n"
+        p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n"
+        p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n&bull;Some content.\n\n"
+        p4_IB = "=== Section I.B ===\n" + p4_IB1
+        p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB
+        p4_II = "== Section II ==\nSection II body.\n\n"
+        p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n"
+        p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!"
+        p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1
+        p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2
+        p4_III = "== Section III ==\n" + p4_IIIA
+        page4 = parse(p4_lead + p4_I + p4_II + p4_III)
+
+        self.assertEqual([], page1.get_sections())
+        self.assertEqual(["", "==Heading=="], page2.get_sections())
+        self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n",
+                          "====Gnidaeh====\n"], page3.get_sections())
+        self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II,
+                          p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
+                         page4.get_sections())
+
+        self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4]))
+        self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"],
+                         page3.get_sections(levels=(2, 3)))
+        self.assertEqual([], page3.get_sections(levels=[0]))
+        self.assertEqual(["", "====Gnidaeh====\n"],
+                         page3.get_sections(levels=[4], include_lead=True))
+        self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n",
+                          "====Gnidaeh====\n"],
+                         page3.get_sections(include_lead=False))
+
+        self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4]))
+        self.assertEqual([""], page2.get_sections(include_headings=False))
+        self.assertEqual(["\nSection I.B.1 body.\n\n&bull;Some content.\n\n",
+                          "\nEven more text.\n" + p4_IIIA2ai1],
+                         page4.get_sections(levels=[4],
+                                            include_headings=False))
+
+        self.assertEqual([], page4.get_sections(matches=r"body"))
+        self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1],
+                         page4.get_sections(matches=r"Section\sI[.\s].*?"))
+        self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
+                         page4.get_sections(matches=r".*?a.*?"))
+        self.assertEqual([p4_IIIA1a, p4_IIIA2ai1],
+                         page4.get_sections(matches=r".*?a.*?", flags=re.U))
+        self.assertEqual(["\nMore text.\n", "\nAn invalid section!"],
+                         page4.get_sections(matches=r".*?a.*?", flags=re.U,
+                                            include_headings=False))
+
+        page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz")
+        section = page5.get_sections(matches="Foo")[0]
+        section.replace("\nBar\n", "\nBarf ")
+        section.append("{{Haha}}\n")
+        self.assertEqual("== Foo ==\nBarf {{Haha}}\n", section)
+        self.assertEqual("X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz", page5)
 
     def test_strip_code(self):
         """test Wikicode.strip_code()"""