Browse Source

Implement Wikicode.get_sections(), starting SmartList.

tags/v0.1
Ben Kurtovic 11 years ago
parent
commit
33f8924cf9
4 changed files with 124 additions and 3 deletions
  1. +1
    -1
      mwparserfromhell/__init__.py
  2. +2
    -1
      mwparserfromhell/parser/demo.py
  3. +91
    -0
      mwparserfromhell/smart_list.py
  4. +30
    -1
      mwparserfromhell/wikicode.py

+ 1
- 1
mwparserfromhell/__init__.py View File

@@ -32,6 +32,6 @@ __license__ = "MIT License"
__version__ = "0.1.dev"
__email__ = "ben.kurtovic@verizon.net"

from mwparserfromhell import nodes, parser, string_mixin, wikicode
from mwparserfromhell import nodes, parser, smart_list, string_mixin, wikicode

parse = lambda text: parser.Parser(text).parse()

+ 2
- 1
mwparserfromhell/parser/demo.py View File

@@ -22,6 +22,7 @@

from mwparserfromhell.nodes import Template, Text
from mwparserfromhell.nodes.extras import Parameter
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode

__all__ = ["DemoParser"]
@@ -48,5 +49,5 @@ class DemoParser(object):
node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True)
node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2])
node5 = Text(u".")
parsed = Wikicode([node1, node2, node3, node4, node5])
parsed = Wikicode(SmartList([node1, node2, node3, node4, node5]))
return parsed

+ 91
- 0
mwparserfromhell/smart_list.py View File

@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import sys

__all__ = ["SmartList"]

class SmartList(list):
def __init__(self, iterable=None):
if iterable:
super(SmartList, self).__init__(iterable)
else:
super(SmartList, self).__init__()
self._children = {}

def __getslice__(self, start, stop):
sublist = super(SmartList, self).__getslice__(start, stop)
sliceinfo = [start, stop, 1]
child = _ListProxy(self, sliceinfo)
self._children[id(child)] = (child, sliceinfo)
return child

# def __setslice__(self, start, stop):

def append(self, obj):
super(SmartList, self).append(obj)
for child, (start, stop, step) in self._children.itervalues():
if stop >= len(self) - 1 and stop != sys.maxint:
self._children[id(child)][1][1] += 1


class _ListProxy(list):
def __init__(self, parent, sliceinfo):
super(_ListProxy, self).__init__()
self._parent = parent
self._sliceinfo = sliceinfo

def __len__(self):
return (self._stop - self._start) / self._step

def __iter__(self):
i = self._start
while i < self._stop:
yield self._parent[i]
i += self._step

def __getitem__(self, index):
return self._render()[index]

def __getslice__(self, start, stop):
return self._render()[start:stop]

@property
def _start(self):
return self._sliceinfo[0]

@property
def _stop(self):
return self._sliceinfo[1]

@property
def _step(self):
return self._sliceinfo[2]

def _render(self):
return self._parent[self._start:self._stop:self._step]

def append(self, obj):
self._parent.insert(self._stop, obj)

def insert(self, index, obj):
self._parent.insert(self._start + index, obj)

+ 30
- 1
mwparserfromhell/wikicode.py View File

@@ -21,8 +21,9 @@
# SOFTWARE.

import re
import sys

from mwparserfromhell.nodes import Node, Tag, Template, Text
from mwparserfromhell.nodes import Heading, Node, Tag, Template, Text
from mwparserfromhell.string_mixin import StringMixIn
from mwparserfromhell.utils import parse_anything

@@ -190,6 +191,34 @@ class Wikicode(StringMixIn):
def filter_tags(self, recursive=False, matches=None, flags=FLAGS):
return list(self.ifilter_tags(recursive, matches, flags))

def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS,
include_headings=True):
if matches:
matches = r"^(=+?)\s*" + matches + r"\s*\1$"
headings = self.filter(recursive=True, matches=matches, flags=flags,
forcetype=Heading)
if levels:
headings = [head for head in headings if head.level in levels]

sections = []
buffers = [[sys.maxint, 0]]
i = 0
while i < len(self.nodes):
if self.nodes[i] in headings:
this = self.nodes[i].level
for (level, start) in buffers:
if not flat or this <= level:
buffers.remove([level, start])
sections.append(self.nodes[start:i])
buffers.append([this, i])
if not include_headings:
i += 1
i += 1
for (level, start) in buffers:
if start != i:
sections.append(self.nodes[start:i])
return sections

def strip_code(self, normalize=True, collapse=True):
nodes = []
for node in self.nodes:


Loading…
Cancel
Save