Browse Source

Fix bugs in get_sections(); finish test_get_sections()

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
4ae10eccf5
2 changed files with 79 additions and 42 deletions
  1. +20
    -11
      mwparserfromhell/wikicode.py
  2. +59
    -31
      tests/test_wikicode.py

+ 20
- 11
mwparserfromhell/wikicode.py View File

@@ -336,7 +336,7 @@ class Wikicode(StringMixIn):
return list(self.ifilter(recursive, matches, flags, forcetype))

def get_sections(self, levels=None, matches=None, flags=FLAGS,
include_lead=True, include_headings=True):
include_lead=None, include_headings=True):
"""Return a list of sections within the page.

Sections are returned as :py:class:`~.Wikicode` objects with a shared
@@ -352,30 +352,39 @@ class Wikicode(StringMixIn):
*matches* is used.

If *include_lead* is ``True``, the first, lead section (without a
heading) will be included in the list. If *include_headings* is
``True``, the section's beginning :py:class:`~.Heading` object will be
included; otherwise, this is skipped.
heading) will be included in the list; ``False`` will not include it;
the default will include it only if no specific *levels* were given. If
*include_headings* is ``True``, the section's beginning
:py:class:`~.Heading` object will be included; otherwise, this is
skipped.
"""
if matches:
matches = r"^(=+?)\s*" + matches + r"\s*\1$"
headings = self.filter_headings(recursive=True, matches=matches,
headings = self.filter_headings(recursive=True)
filtered = self.filter_headings(recursive=True, matches=matches,
flags=flags)
if levels:
headings = [head for head in headings if head.level in levels]
filtered = [head for head in filtered if head.level in levels]

if matches or include_lead is False or (not include_lead and levels):
buffers = []
else:
buffers = [(maxsize, 0)]
sections = []
buffers = [(maxsize, 0)] if include_lead else []
i = 0
while i < len(self.nodes):
if self.nodes[i] in headings:
this = self.nodes[i].level
for (level, start) in buffers:
if this <= level:
buffers.remove((level, start))
sections.append(Wikicode(self.nodes[start:i]))
buffers.append((this, i))
if not include_headings:
i += 1
buffers = [buf for buf in buffers if buf[0] < this]
if self.nodes[i] in filtered:
if not include_headings:
i += 1
if i >= len(self.nodes):
break
buffers.append((this, i))
i += 1
for (level, start) in buffers:
if start != i:


+ 59
- 31
tests/test_wikicode.py View File

@@ -276,37 +276,65 @@ class TestWikicode(TreeEqualityTestCase):

def test_get_sections(self):
"""test Wikicode.get_sections()"""
page1 = ""
page2 = "==Heading=="
page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
page4 = """
This is a lead.
== Section I ==
Section I body. {{and a|template}}
=== Section I.A ===
Section I.A [[body]].
=== Section I.B ===
==== Section I.B.1 ====
Section I.B.1 body.

&bull;Some content.

== Section II ==
Section II body.

== Section III ==
=== Section III.A ===
Text.
===== Section III.A.1.a =====
More text.
==== Section III.A.2 ====
Even more text.
======= section III.A.2.a.i.1 =======
An invalid section!"""

self.assertEqual([], parse(page1).get_sections())
self.assertEqual(["", "==Heading=="], parse(page2).get_sections())
self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections())
page1 = parse("")
page2 = parse("==Heading==")
page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n")

p4_lead = "This is a lead.\n"
p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n"
p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n&bull;Some content.\n\n"
p4_IB = "=== Section I.B ===\n" + p4_IB1
p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB
p4_II = "== Section II ==\nSection II body.\n\n"
p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n"
p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!"
p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1
p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2
p4_III = "== Section III ==\n" + p4_IIIA
page4 = parse(p4_lead + p4_I + p4_II + p4_III)

self.assertEqual([], page1.get_sections())
self.assertEqual(["", "==Heading=="], page2.get_sections())
self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n",
"====Gnidaeh====\n"], page3.get_sections())
self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II,
p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
page4.get_sections())

self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4]))
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"],
page3.get_sections(levels=(2, 3)))
self.assertEqual([], page3.get_sections(levels=[0]))
self.assertEqual(["", "====Gnidaeh====\n"],
page3.get_sections(levels=[4], include_lead=True))
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n",
"====Gnidaeh====\n"],
page3.get_sections(include_lead=False))

self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4]))
self.assertEqual([""], page2.get_sections(include_headings=False))
self.assertEqual(["\nSection I.B.1 body.\n\n&bull;Some content.\n\n",
"\nEven more text.\n" + p4_IIIA2ai1],
page4.get_sections(levels=[4],
include_headings=False))

self.assertEqual([], page4.get_sections(matches=r"body"))
self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1],
page4.get_sections(matches=r"Section\sI[.\s].*?"))
self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
page4.get_sections(matches=r".*?a.*?"))
self.assertEqual([p4_IIIA1a, p4_IIIA2ai1],
page4.get_sections(matches=r".*?a.*?", flags=re.U))
self.assertEqual(["\nMore text.\n", "\nAn invalid section!"],
page4.get_sections(matches=r".*?a.*?", flags=re.U,
include_headings=False))

page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz")
section = page5.get_sections(matches="Foo")[0]
section.replace("\nBar\n", "\nBarf ")
section.append("{{Haha}}\n")
self.assertEqual("== Foo ==\nBarf {{Haha}}\n", section)
self.assertEqual("X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz", page5)

def test_strip_code(self):
"""test Wikicode.strip_code()"""


Loading…
Cancel
Save