From 83c5e3c9eceb714e186cd8fb642c609d49ddad45 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 11 Jul 2012 20:28:50 -0400 Subject: [PATCH] A unicode fix and a quick stub for strip_code() --- mwparserfromhell/string_mixin.py | 2 +- mwparserfromhell/wikicode.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index ba2c7c6..6f0027f 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -27,7 +27,7 @@ class StringMixIn(object): return unicode(self).encode("utf8") def __repr__(self): - return repr(unicode(self).encode("utf8")) + return repr(unicode(self)) def __lt__(self, other): if isinstance(other, unicodeingMixin): diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index bb32216..bab3a0f 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import htmlentitydefs import re import mwparserfromhell @@ -182,9 +183,9 @@ class Wikicode(StringMixIn): def filter_text(self, recursive=False, matches=None, flags=FLAGS): return list(self.ifilter_text(recursive, matches, flags)) - def normalize(self): - ## Create a deep copy of self ## - return normalized + def strip_code(self, normalize=True): + # Magic with htmlentitydefs if normalize + return normalized(u" ".join(self.ifilter_text())) def show_tree(self): marker = object() # Random object we can find with certainty in a list