Browse Source

Wikicode.get_sections() now returns sections in the correct order.

Reported by Σ.
tags/v0.3.3
Ben Kurtovic 11 years ago
parent
commit
79bf42df1c
4 changed files with 35 additions and 31 deletions
  1. +1
    -0
      CHANGELOG
  2. +1
    -0
      docs/changelog.rst
  3. +22
    -25
      mwparserfromhell/wikicode.py
  4. +11
    -6
      tests/test_wikicode.py

+ 1
- 0
CHANGELOG View File

@@ -5,6 +5,7 @@ v0.4 (unreleased):
is a breaking change if you rely on the default behavior. is a breaking change if you rely on the default behavior.
- The 'matches' argument of Wikicode's filter methods now accepts a function - The 'matches' argument of Wikicode's filter methods now accepts a function
(taking one argument, a Node, and returning a bool) in addition to a regex. (taking one argument, a Node, and returning a bool) in addition to a regex.
- Wikicode.get_sections() now returns sections in the correct order.
- Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects
instead of just a single string or Wikicode. instead of just a single string or Wikicode.
- C code cleanup and speed improvements. - C code cleanup and speed improvements.


+ 1
- 0
docs/changelog.rst View File

@@ -14,6 +14,7 @@ Unreleased
- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` - The *matches* argument of :py:class:`Wikicode's <.Wikicode>`
:py:meth:`.filter` methods now accepts a function (taking one argument, a :py:meth:`.filter` methods now accepts a function (taking one argument, a
:py:class:`.Node`, and returning a bool) in addition to a regex. :py:class:`.Node`, and returning a bool) in addition to a regex.
- :py:meth:`.Wikicode.get_sections` now returns sections in the correct order.
- :py:meth:`.Wikicode.matches` now accepts a tuple or list of - :py:meth:`.Wikicode.matches` now accepts a tuple or list of
strings/:py:class:`.Wikicode` objects instead of just a single string or strings/:py:class:`.Wikicode` objects instead of just a single string or
:py:class:`.Wikicode`. :py:class:`.Wikicode`.


+ 22
- 25
mwparserfromhell/wikicode.py View File

@@ -434,34 +434,31 @@ class Wikicode(StringMixIn):
""" """
if matches: if matches:
matches = r"^(=+?)\s*" + matches + r"\s*\1$" matches = r"^(=+?)\s*" + matches + r"\s*\1$"
headings = self.filter_headings()
filtered = self.filter_headings(matches=matches, flags=flags)
headings = self.filter_headings(recursive=False, matches=matches,
flags=flags)
if levels: if levels:
filtered = [head for head in filtered if head.level in levels]
headings = [head for head in headings if head.level in levels]


if matches or include_lead is False or (not include_lead and levels):
buffers = []
else:
buffers = [(maxsize, 0)]
sections = [] sections = []
i = 0
while i < len(self.nodes):
if self.nodes[i] in headings:
this = self.nodes[i].level
for (level, start) in buffers:
if this <= level:
sections.append(Wikicode(self.nodes[start:i]))
buffers = [buf for buf in buffers if buf[0] < this]
if self.nodes[i] in filtered:
if not include_headings:
i += 1
if i >= len(self.nodes):
break
buffers.append((this, i))
i += 1
for (level, start) in buffers:
if start != i:
sections.append(Wikicode(self.nodes[start:i]))
if include_lead or not (include_lead is not None or matches or levels):
iterator = self.ifilter_headings(recursive=False)
try:
first = self.index(next(iterator))
sections.append(Wikicode(self.nodes[:first]))
except StopIteration: # No headings in page
sections.append(Wikicode(self.nodes[:]))

for heading in headings:
start = self.index(heading)
i = start + 1
if not include_headings:
start += 1
while i < len(self.nodes):
node = self.nodes[i]
if isinstance(node, Heading) and node.level <= heading.level:
break
i += 1
sections.append(Wikicode(self.nodes[start:i]))
return sections return sections


def strip_code(self, normalize=True, collapse=True): def strip_code(self, normalize=True, collapse=True):


+ 11
- 6
tests/test_wikicode.py View File

@@ -351,12 +351,12 @@ class TestWikicode(TreeEqualityTestCase):
p4_III = "== Section III ==\n" + p4_IIIA p4_III = "== Section III ==\n" + p4_IIIA
page4 = parse(p4_lead + p4_I + p4_II + p4_III) page4 = parse(p4_lead + p4_I + p4_II + p4_III)


self.assertEqual([], page1.get_sections())
self.assertEqual([""], page1.get_sections())
self.assertEqual(["", "==Heading=="], page2.get_sections()) self.assertEqual(["", "==Heading=="], page2.get_sections())
self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n",
"====Gnidaeh====\n"], page3.get_sections()) "====Gnidaeh====\n"], page3.get_sections())
self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II,
p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II,
p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1],
page4.get_sections()) page4.get_sections())


self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4]))
@@ -370,16 +370,16 @@ class TestWikicode(TreeEqualityTestCase):
page3.get_sections(include_lead=False)) page3.get_sections(include_lead=False))


self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4]))
self.assertEqual([""], page2.get_sections(include_headings=False))
self.assertEqual(["", ""], page2.get_sections(include_headings=False))
self.assertEqual(["\nSection I.B.1 body.\n\n&bull;Some content.\n\n", self.assertEqual(["\nSection I.B.1 body.\n\n&bull;Some content.\n\n",
"\nEven more text.\n" + p4_IIIA2ai1], "\nEven more text.\n" + p4_IIIA2ai1],
page4.get_sections(levels=[4], page4.get_sections(levels=[4],
include_headings=False)) include_headings=False))


self.assertEqual([], page4.get_sections(matches=r"body")) self.assertEqual([], page4.get_sections(matches=r"body"))
self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1],
self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1],
page4.get_sections(matches=r"Section\sI[.\s].*?")) page4.get_sections(matches=r"Section\sI[.\s].*?"))
self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1],
self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1],
page4.get_sections(matches=r".*?a.*?")) page4.get_sections(matches=r".*?a.*?"))
self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], self.assertEqual([p4_IIIA1a, p4_IIIA2ai1],
page4.get_sections(matches=r".*?a.*?", flags=re.U)) page4.get_sections(matches=r".*?a.*?", flags=re.U))
@@ -387,6 +387,11 @@ class TestWikicode(TreeEqualityTestCase):
page4.get_sections(matches=r".*?a.*?", flags=re.U, page4.get_sections(matches=r".*?a.*?", flags=re.U,
include_headings=False)) include_headings=False))


sections = page2.get_sections(include_headings=False)
sections[0].append("Lead!\n")
sections[1].append("\nFirst section!")
self.assertEqual("Lead!\n==Heading==\nFirst section!", page2)

page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz")
section = page5.get_sections(matches="Foo")[0] section = page5.get_sections(matches="Foo")[0]
section.replace("\nBar\n", "\nBarf ") section.replace("\nBar\n", "\nBarf ")


Loading…
Cancel
Save