Browse Source

Improve Wikicode.get_sections(); implement test_get_tree(); part of test_get_sections()

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
0b56f2e267
2 changed files with 57 additions and 19 deletions
  1. +17
    -15
      mwparserfromhell/wikicode.py
  2. +40
    -4
      tests/test_wikicode.py

+ 17
- 15
mwparserfromhell/wikicode.py View File

@@ -335,34 +335,36 @@ class Wikicode(StringMixIn):
""" """
return list(self.ifilter(recursive, matches, flags, forcetype)) return list(self.ifilter(recursive, matches, flags, forcetype))


def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS,
include_headings=True):
def get_sections(self, levels=None, matches=None, flags=FLAGS,
include_lead=True, include_headings=True):
"""Return a list of sections within the page. """Return a list of sections within the page.


Sections are returned as :py:class:`~.Wikicode` objects with a shared Sections are returned as :py:class:`~.Wikicode` objects with a shared
node list (implemented using :py:class:`~.SmartList`) so that changes node list (implemented using :py:class:`~.SmartList`) so that changes
to sections are reflected in the parent Wikicode object. to sections are reflected in the parent Wikicode object.


With *flat* as ``True``, each returned section contains all of its
subsections within the :py:class:`~.Wikicode`; otherwise, the returned
sections contain only the section up to the next heading, regardless of
its size. If *matches* is given, it should be a regex to be matched
against the titles of section headings; only sections whose headings
match the regex will be included. If *levels* is given, it should be a
iterable of integers; only sections whose heading levels are within it
will be returned. If *include_headings* is ``True``, the section's
beginning :py:class:`~.Heading` object will be included in returned
:py:class:`~.Wikicode` objects; otherwise, this is skipped.
Each section contains all of its subsections. If *levels* is given, it
should be a iterable of integers; only sections whose heading levels
are within it will be returned.If *matches* is given, it should be a
regex to be matched against the titles of section headings; only
sections whose headings match the regex will be included. *flags* can
be used to override the default regex flags (see :py:meth:`ifilter`) if
*matches* is used.

If *include_lead* is ``True``, the first, lead section (without a
heading) will be included in the list. If *include_headings* is
``True``, the section's beginning :py:class:`~.Heading` object will be
included; otherwise, this is skipped.
""" """
if matches: if matches:
matches = r"^(=+?)\s*" + matches + r"\s*\1$" matches = r"^(=+?)\s*" + matches + r"\s*\1$"
headings = self.filter(recursive=True, matches=matches, flags=flags,
forcetype=Heading)
headings = self.filter_headings(recursive=True, matches=matches,
flags=flags)
if levels: if levels:
headings = [head for head in headings if head.level in levels] headings = [head for head in headings if head.level in levels]


sections = [] sections = []
buffers = [(maxsize, 0)]
buffers = [(maxsize, 0)] if include_lead else []
i = 0 i = 0
while i < len(self.nodes): while i < len(self.nodes):
if self.nodes[i] in headings: if self.nodes[i] in headings:


+ 40
- 4
tests/test_wikicode.py View File

@@ -26,11 +26,11 @@ from types import GeneratorType
import unittest import unittest


from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
Tag, Template, Text, Wikilink)
Node, Tag, Template, Text, Wikilink)
from mwparserfromhell.smart_list import SmartList from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode from mwparserfromhell.wikicode import Wikicode
from mwparserfromhell import parse from mwparserfromhell import parse
from mwparserfromhell.compat import str
from mwparserfromhell.compat import py3k, str


from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext


@@ -276,7 +276,37 @@ class TestWikicode(TreeEqualityTestCase):


def test_get_sections(self): def test_get_sections(self):
"""test Wikicode.get_sections()""" """test Wikicode.get_sections()"""
pass
page1 = ""
page2 = "==Heading=="
page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n"
page4 = """
This is a lead.
== Section I ==
Section I body. {{and a|template}}
=== Section I.A ===
Section I.A [[body]].
=== Section I.B ===
==== Section I.B.1 ====
Section I.B.1 body.

&bull;Some content.

== Section II ==
Section II body.

== Section III ==
=== Section III.A ===
Text.
===== Section III.A.1.a =====
More text.
==== Section III.A.2 ====
Even more text.
======= section III.A.2.a.i.1 =======
An invalid section!"""

self.assertEqual([], parse(page1).get_sections())
self.assertEqual(["==Heading=="], parse(page2).get_sections())
self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections())


def test_strip_code(self): def test_strip_code(self):
"""test Wikicode.strip_code()""" """test Wikicode.strip_code()"""
@@ -284,7 +314,13 @@ class TestWikicode(TreeEqualityTestCase):


def test_get_tree(self): def test_get_tree(self):
"""test Wikicode.get_tree()""" """test Wikicode.get_tree()"""
pass
# Since individual nodes have test cases for their __showtree___
# methods, and the docstring covers all possibilities, this doesn't
# need to test anything other than it:
code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}")
expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \
"{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}"
self.assertEqual(expected.expandtabs(4), code.get_tree())


if __name__ == "__main__": if __name__ == "__main__":
unittest.main(verbosity=2) unittest.main(verbosity=2)

Loading…
Cancel
Save