@@ -28,5 +28,6 @@ class Node(StringMixIn): | |||||
pass | pass | ||||
from mwparserfromhell.nodes import extras | from mwparserfromhell.nodes import extras | ||||
from mwparserfromhell.nodes.html_entity import HTMLEntity | |||||
from mwparserfromhell.nodes.template import Template | from mwparserfromhell.nodes.template import Template | ||||
from mwparserfromhell.nodes.text import Text | from mwparserfromhell.nodes.text import Text |
@@ -0,0 +1,50 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import htmlentitydefs | |||||
from mwparserfromhell.nodes import Node | |||||
__all__ = ["HTMLEntity"] | |||||
class HTMLEntity(Node): | |||||
def __init__(self, value, named): | |||||
self._value = value | |||||
self._named = named | |||||
def __unicode__(self): | |||||
if self.named: | |||||
return u"&{0};".format(self.value) | |||||
return u"&#{0};".format(self.value) | |||||
@property | |||||
def value(self): | |||||
return self._value | |||||
@property | |||||
def named(self): | |||||
return self._named | |||||
def normalize(self): | |||||
if self.named: | |||||
return unichr(htmlentitydefs.name2codepoint[self.value]) | |||||
return unichr(self.value) |
@@ -22,7 +22,7 @@ | |||||
__all__ = ["StringMixIn"] | __all__ = ["StringMixIn"] | ||||
class StringMixIn(object): | |||||
class StringMixIn(unicode): | |||||
def __str__(self): | def __str__(self): | ||||
return unicode(self).encode("utf8") | return unicode(self).encode("utf8") | ||||
@@ -20,11 +20,10 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import htmlentitydefs | |||||
import re | import re | ||||
import mwparserfromhell | import mwparserfromhell | ||||
from mwparserfromhell.nodes import Node, Template, Text | |||||
from mwparserfromhell.nodes import HTMLEntity, Node, Template, Text | |||||
from mwparserfromhell.string_mixin import StringMixIn | from mwparserfromhell.string_mixin import StringMixIn | ||||
__all__ = ["Wikicode"] | __all__ = ["Wikicode"] | ||||
@@ -43,10 +42,18 @@ class Wikicode(StringMixIn): | |||||
return value.nodes | return value.nodes | ||||
if isinstance(value, Node): | if isinstance(value, Node): | ||||
return [value] | return [value] | ||||
if isinstance(value, str) or isinstance(value, unicode): | |||||
if isinstance(value, basestring): | |||||
return mwparserfromhell.parse(value).nodes | return mwparserfromhell.parse(value).nodes | ||||
error = "Needs string, Node, or Wikicode object, but got {0}: {1}" | |||||
raise ValueError(error.format(type(value), value)) | |||||
try: | |||||
nodelist = list(value) | |||||
except TypeError: | |||||
error = "Needs string, Node, iterable of Nodes, or Wikicode object, but got {0}: {1}" | |||||
raise ValueError(error.format(type(value), value)) | |||||
if not all([isinstance(node, Node) for node in nodelist]): | |||||
error = "Was passed an interable {0}, but it did not contain all Nodes: {1}" | |||||
raise ValueError(error.format(type(value), value)) | |||||
return nodelist | |||||
def _get_children(self, node): | def _get_children(self, node): | ||||
yield node | yield node | ||||
@@ -217,8 +224,17 @@ class Wikicode(StringMixIn): | |||||
return list(self.ifilter_text(recursive, matches, flags)) | return list(self.ifilter_text(recursive, matches, flags)) | ||||
def strip_code(self, normalize=True): | def strip_code(self, normalize=True): | ||||
# Magic with htmlentitydefs if normalize | |||||
return normalized(u" ".join(self.ifilter_text())) | |||||
nodes = [] | |||||
for node in self.nodes: | |||||
if isinstance(node, Text): | |||||
nodes.append(node) | |||||
elif isinstance(node, HTMLEntity): | |||||
if normalize: | |||||
nodes.append(node.normalize()) | |||||
else: | |||||
nodes.append(node) | |||||
return u" ".join(nodes) | |||||
def get_tree(self): | def get_tree(self): | ||||
marker = object() # Random object we can find with certainty in a list | marker = object() # Random object we can find with certainty in a list | ||||