From 33f8924cf9c40902f84e945205863c2b396f2e33 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 1 Aug 2012 18:00:18 -0400 Subject: [PATCH] Implement Wikicode.get_sections(), starting SmartList. --- mwparserfromhell/__init__.py | 2 +- mwparserfromhell/parser/demo.py | 3 +- mwparserfromhell/smart_list.py | 91 +++++++++++++++++++++++++++++++++++++++++ mwparserfromhell/wikicode.py | 31 +++++++++++++- 4 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 mwparserfromhell/smart_list.py diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index e6ae477..cf63afd 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -32,6 +32,6 @@ __license__ = "MIT License" __version__ = "0.1.dev" __email__ = "ben.kurtovic@verizon.net" -from mwparserfromhell import nodes, parser, string_mixin, wikicode +from mwparserfromhell import nodes, parser, smart_list, string_mixin, wikicode parse = lambda text: parser.Parser(text).parse() diff --git a/mwparserfromhell/parser/demo.py b/mwparserfromhell/parser/demo.py index fe683e6..5405f62 100644 --- a/mwparserfromhell/parser/demo.py +++ b/mwparserfromhell/parser/demo.py @@ -22,6 +22,7 @@ from mwparserfromhell.nodes import Template, Text from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode __all__ = ["DemoParser"] @@ -48,5 +49,5 @@ class DemoParser(object): node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True) node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2]) node5 = Text(u".") - parsed = Wikicode([node1, node2, node3, node4, node5]) + parsed = Wikicode(SmartList([node1, node2, node3, node4, node5])) return parsed diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py new file mode 100644 index 0000000..0146e40 --- /dev/null +++ b/mwparserfromhell/smart_list.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys + +__all__ = ["SmartList"] + +class SmartList(list): + def __init__(self, iterable=None): + if iterable: + super(SmartList, self).__init__(iterable) + else: + super(SmartList, self).__init__() + self._children = {} + + def __getslice__(self, start, stop): + sublist = super(SmartList, self).__getslice__(start, stop) + sliceinfo = [start, stop, 1] + child = _ListProxy(self, sliceinfo) + self._children[id(child)] = (child, sliceinfo) + return child + + # def __setslice__(self, start, stop): + + def append(self, obj): + super(SmartList, self).append(obj) + for child, (start, stop, step) in self._children.itervalues(): + if stop >= len(self) - 1 and stop != sys.maxint: + self._children[id(child)][1][1] += 1 + + +class _ListProxy(list): + def __init__(self, parent, sliceinfo): + super(_ListProxy, self).__init__() + self._parent = parent + self._sliceinfo = sliceinfo + + def __len__(self): + return (self._stop - self._start) / self._step + + def __iter__(self): + i = self._start + while i < self._stop: + yield self._parent[i] + i += self._step + + def __getitem__(self, index): + return self._render()[index] + + def __getslice__(self, start, stop): + return self._render()[start:stop] + + @property + def _start(self): + return self._sliceinfo[0] + + @property + def _stop(self): + return self._sliceinfo[1] + + @property + def _step(self): + return self._sliceinfo[2] + + def _render(self): + return self._parent[self._start:self._stop:self._step] + + def append(self, obj): + self._parent.insert(self._stop, obj) + + def insert(self, index, obj): + self._parent.insert(self._start + index, obj) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 907737d..59e9abc 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -21,8 +21,9 @@ # SOFTWARE. import re +import sys -from mwparserfromhell.nodes import Node, Tag, Template, Text +from mwparserfromhell.nodes import Heading, Node, Tag, Template, Text from mwparserfromhell.string_mixin import StringMixIn from mwparserfromhell.utils import parse_anything @@ -190,6 +191,34 @@ class Wikicode(StringMixIn): def filter_tags(self, recursive=False, matches=None, flags=FLAGS): return list(self.ifilter_tags(recursive, matches, flags)) + def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, + include_headings=True): + if matches: + matches = r"^(=+?)\s*" + matches + r"\s*\1$" + headings = self.filter(recursive=True, matches=matches, flags=flags, + forcetype=Heading) + if levels: + headings = [head for head in headings if head.level in levels] + + sections = [] + buffers = [[sys.maxint, 0]] + i = 0 + while i < len(self.nodes): + if self.nodes[i] in headings: + this = self.nodes[i].level + for (level, start) in buffers: + if not flat or this <= level: + buffers.remove([level, start]) + sections.append(self.nodes[start:i]) + buffers.append([this, i]) + if not include_headings: + i += 1 + i += 1 + for (level, start) in buffers: + if start != i: + sections.append(self.nodes[start:i]) + return sections + def strip_code(self, normalize=True, collapse=True): nodes = [] for node in self.nodes: