Browse Source

Improve Wikicode.get_sections(); implement test_get_tree(); part of test_get_sections()

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
0b56f2e267
2 changed files with 57 additions and 19 deletions
  1. +17
    -15
      mwparserfromhell/wikicode.py
  2. +40
    -4
      tests/test_wikicode.py

+ 17
- 15
mwparserfromhell/wikicode.py View File

@@ -335,34 +335,36 @@ class Wikicode(StringMixIn):
"""
return list(self.ifilter(recursive, matches, flags, forcetype))

def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS,
include_headings=True):
def get_sections(self, levels=None, matches=None, flags=FLAGS,
include_lead=True, include_headings=True):
"""Return a list of sections within the page.

Sections are returned as :py:class:`~.Wikicode` objects with a shared
node list (implemented using :py:class:`~.SmartList`) so that changes
to sections are reflected in the parent Wikicode object.

With *flat* as ``True``, each returned section contains all of its
subsections within the :py:class:`~.Wikicode`; otherwise, the returned
sections contain only the section up to the next heading, regardless of
its size. If *matches* is given, it should be a regex to be matched
against the titles of section headings; only sections whose headings
match the regex will be included. If *levels* is given, it should be a
iterable of integers; only sections whose heading levels are within it
will be returned. If *include_headings* is ``True``, the section's
beginning :py:class:`~.Heading` object will be included in returned
:py:class:`~.Wikicode` objects; otherwise, this is skipped.
Each section contains all of its subsections. If *levels* is given, it
should be a iterable of integers; only sections whose heading levels
are within it will be returned.If *matches* is given, it should be a
regex to be matched against the titles of section headings; only
sections whose headings match the regex will be included. *flags* can
be used to override the default regex flags (see :py:meth:`ifilter`) if
*matches* is used.

If *include_lead* is ``True``, the first, lead section (without a
heading) will be included in the list. If *include_headings* is
``True``, the section's beginning :py:class:`~.Heading` object will be
included; otherwise, this is skipped.
"""
if matches:
matches = r"^(=+?)\s*" + matches + r"\s*\1$"
headings = self.filter(recursive=True, matches=matches, flags=flags,
forcetype=Heading)
headings = self.filter_headings(recursive=True, matches=matches,
flags=flags)
if levels:
headings = [head for head in headings if head.level in levels]

sections = []
buffers = [(maxsize, 0)]
buffers = [(maxsize, 0)] if include_lead else []
i = 0
while i < len(self.nodes):
if self.nodes[i] in headings:


+ 40
- 4
tests/test_wikicode.py View File

@@ -26,11 +26,11 @@ from types import GeneratorType
import unittest

from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
Tag, Template, Text, Wikilink)
Node, Tag, Template, Text, Wikilink)
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode
from mwparserfromhell import parse
from mwparserfromhell.compat import str
from mwparserfromhell.compat import py3k, str

from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

@@ -276,7 +276,37 @@ class TestWikicode(TreeEqualityTestCase):

def test_get_sections(self):
"""test Wikicode.get_sections()"""
pass
page1 = ""
page2 = "==Heading=="
page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
page4 = """
This is a lead.
== Section I ==
Section I body. {{and a|template}}
=== Section I.A ===
Section I.A [[body]].
=== Section I.B ===
==== Section I.B.1 ====
Section I.B.1 body.

&bull;Some content.

== Section II ==
Section II body.

== Section III ==
=== Section III.A ===
Text.
===== Section III.A.1.a =====
More text.
==== Section III.A.2 ====
Even more text.
======= section III.A.2.a.i.1 =======
An invalid section!"""

self.assertEqual([], parse(page1).get_sections())
self.assertEqual(["==Heading=="], parse(page2).get_sections())
self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections())

def test_strip_code(self):
"""test Wikicode.strip_code()"""
@@ -284,7 +314,13 @@ class TestWikicode(TreeEqualityTestCase):

def test_get_tree(self):
"""test Wikicode.get_tree()"""
pass
# Since individual nodes have test cases for their __showtree___
# methods, and the docstring covers all possibilities, this doesn't
# need to test anything other than it:
code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}")
expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \
"{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}"
self.assertEqual(expected.expandtabs(4), code.get_tree())

if __name__ == "__main__":
unittest.main(verbosity=2)

Loading…
Cancel
Save