From 253c812fb310733d724f5a765a77d1963c45bba8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 27 Jul 2012 19:21:38 -0400 Subject: [PATCH] Implement __strip__ API for Wikicode. --- mwparserfromhell/nodes/__init__.py | 3 ++- mwparserfromhell/nodes/heading.py | 3 +++ mwparserfromhell/nodes/html_entity.py | 5 +++++ mwparserfromhell/nodes/tag.py | 5 +++++ mwparserfromhell/nodes/text.py | 3 +++ mwparserfromhell/wikicode.py | 16 +++------------- 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 2d5d1c2..c03785b 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -25,7 +25,8 @@ from mwparserfromhell.string_mixin import StringMixIn __all__ = ["Node"] class Node(StringMixIn): - pass + def __strip__(self, normalize=True, collapse=True): + return None from mwparserfromhell.nodes import extras from mwparserfromhell.nodes.text import Text diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 71181e6..67851d5 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -32,6 +32,9 @@ class Heading(Node): def __unicode__(self): return ("=" * self.level) + self.title + ("=" * self.level) + def __strip__(self, normalize=True, collapse=True): + return self.title + @property def title(self): return self._title diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 8276d7e..aede31f 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -53,6 +53,11 @@ class HTMLEntity(Node): return u"&#x{0};".format(self.value) return u"&#{0};".format(self.value) + def __strip__(self, normalize=True, collapse=True): + if normalize: + return self.normalize() + return self + def _unichr(self, value): """Implement the builtin unichr() with support for non-BMP code points. diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index f395fe4..f0a7748 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -100,6 +100,11 @@ class Tag(Node): result += "" return result + def __strip__(self, normalize=True, collapse=True): + if self.type in self.TAGS_VISIBLE: + return self.contents.strip_code(normalize, collapse) + return None + def translate(self): translations { self.TAG_ITALIC: ("''", "''"), diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index afdc235..d6b929c 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -31,6 +31,9 @@ class Text(Node): def __unicode__(self): return unicode(self.value) + def __strip__(self, normalize=True, collapse=True): + return self + @property def value(self): return self._value diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 044d635..ae05879 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -253,20 +253,10 @@ class Wikicode(StringMixIn): def strip_code(self, normalize=True, collapse=True): nodes = [] for node in self.nodes: - if isinstance(node, Heading): - nodes.append(child.title) - elif isinstance(node, HTMLEntity): - if normalize: - nodes.append(node.normalize()) - else: - nodes.append(node) - elif isinstance(node, Tag): - if node.type in node.TAGS_VISIBLE: - nodes.append(node.contents.strip_code(normalize, collapse)) - elif isinstance(node, Text): - nodes.append(node) + stripped = node.__strip__(normalize) + if stripped: + nodes.append(unicode(stripped)) - nodes = map(unicode, nodes) if collapse: stripped = u"".join(nodes).strip("\n") while "\n\n\n" in stripped: