From 2eb76e7de0ba51731027b710737d95e69c8123e2 Mon Sep 17 00:00:00 2001
From: Riamse <andrewwang43@gmail.com>
Date: Mon, 20 Aug 2012 18:34:08 -0700
Subject: [PATCH] Adding Python 3 support

---
 .gitignore                            |   1 +
 mwparserfromhell/nodes/html_entity.py |  11 +--
 mwparserfromhell/nodes/tag.py         |   5 +-
 mwparserfromhell/nodes/template.py    |  24 +++---
 mwparserfromhell/nodes/text.py        |   5 +-
 mwparserfromhell/parser/builder.py    |   5 +-
 mwparserfromhell/parser/tokenizer.py  |   3 +-
 mwparserfromhell/parser/tokens.py     |  18 +++--
 mwparserfromhell/string_mixin.py      | 134 ++++++++++++++++++----------------
 mwparserfromhell/utils.py             |   5 +-
 mwparserfromhell/wikicode.py          |  12 +--
 11 files changed, 130 insertions(+), 93 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4984243..ba02a04 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@
 *.egg
 *.egg-info
 .DS_Store
+__pycache__
 build
 docs/_build
diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 98bfd2e..f0df4b3 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -20,9 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-import htmlentitydefs
+from __future__ import unicode_literals
 
 from . import Node
+from ..compat import str, bytes, htmlentitydefs
 
 __all__ = ["HTMLEntity"]
 
@@ -50,10 +51,10 @@ class HTMLEntity(Node):
 
     def __unicode__(self):
         if self.named:
-            return u"&{0};".format(self.value)
+            return "&{0};".format(self.value)
         if self.hexadecimal:
-            return u"&#{0}{1};".format(self.hex_char, self.value)
-        return u"&#{0};".format(self.value)
+            return "&#{0}{1};".format(self.hex_char, self.value)
+        return "&#{0};".format(self.value)
 
     def __strip__(self, normalize, collapse):
         if normalize:
@@ -71,7 +72,7 @@ class HTMLEntity(Node):
         except ValueError:
             # Test whether we're on the wide or narrow Python build. Check the
             # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY):
-            if len(u"\U0001F64A") == 2:
+            if len("\U0001F64A") == 2:
                 # Ensure this is within the range we can encode:
                 if value > 0x10FFFF:
                     raise ValueError("unichr() arg not in range(0x110000)")
diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py
index 24654b9..90cfb58 100644
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -20,7 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
+
 from . import Node, Text
+from ..compat import str, bytes
 
 __all__ = ["Tag"]
 
@@ -92,7 +95,7 @@ class Tag(Node):
 
         result = "<" + unicode(self.tag)
         if self.attrs:
-            result += " " + u" ".join([unicode(attr) for attr in self.attrs])
+            result += " " + " ".join([unicode(attr) for attr in self.attrs])
         if self.self_closing:
             result += " " * self.open_padding + "/>"
         else:
diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py
index 581e8ce..b2a3b0d 100644
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -20,12 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 from collections import defaultdict
 import re
 
 from . import HTMLEntity, Node, Text
 from .extras import Parameter
 from ..utils import parse_anything
+from ..compat import str, bytes, basestring
 
 __all__ = ["Template"]
 
@@ -42,10 +44,10 @@ class Template(Node):
 
     def __unicode__(self):
         if self.params:
-            params = u"|".join([unicode(param) for param in self.params])
-            return "{{" + unicode(self.name) + "|" + params + "}}"
+            params = "|".join([str(param) for param in self.params])
+            return "{{" + str(self.name) + "|" + params + "}}"
         else:
-            return "{{" + unicode(self.name) + "}}"
+            return "{{" + str(self.name) + "}}"
 
     def __iternodes__(self, getter):
         yield None, self
@@ -77,7 +79,7 @@ class Template(Node):
                 code.replace(node, node.replace(char, replacement))
 
     def _blank_param_value(self, value):
-        match = re.search(r"^(\s*).*?(\s*)$", unicode(value), FLAGS)
+        match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
         value.nodes = [Text(match.group(1)), Text(match.group(2))]
 
     def _select_theory(self, theories):
@@ -85,13 +87,13 @@ class Template(Node):
             best = max(theories.values())
             confidence = float(best) / sum(theories.values())
             if confidence > 0.75:
-                return theories.keys()[theories.values().index(best)]
+                return tuple(theories.keys())[tuple(theories.values()).index(best)]
 
     def _get_spacing_conventions(self):
         before_theories = defaultdict(lambda: 0)
         after_theories = defaultdict(lambda: 0)
         for param in self.params:
-            match = re.search(r"^(\s*).*?(\s*)$", unicode(param.value), FLAGS)
+            match = re.search(r"^(\s*).*?(\s*)$", str(param.value), FLAGS)
             before, after = match.group(1), match.group(2)
             before_theories[before] += 1
             after_theories[after] += 1
@@ -124,7 +126,7 @@ class Template(Node):
         return self._params
 
     def has_param(self, name, ignore_empty=True):
-        name = name.strip() if isinstance(name, basestring) else unicode(name)
+        name = name.strip() if isinstance(name, basestring) else str(name)
         for param in self.params:
             if param.name.strip() == name:
                 if ignore_empty and not param.value.strip():
@@ -133,7 +135,7 @@ class Template(Node):
         return False
 
     def get(self, name):
-        name = name.strip() if isinstance(name, basestring) else unicode(name)
+        name = name.strip() if isinstance(name, basestring) else str(name)
         for param in reversed(self.params):
             if param.name.strip() == name:
                 return param
@@ -159,7 +161,7 @@ class Template(Node):
 
         if showkey is None:
             try:
-                int_name = int(unicode(name))
+                int_name = int(str(name))
             except ValueError:
                 showkey = True
             else:
@@ -167,7 +169,7 @@ class Template(Node):
                 for param in self.params:
                     if not param.showkey:
                         if re.match(r"[1-9][0-9]*$", param.name.strip()):
-                            int_keys.add(int(unicode(param.name)))
+                            int_keys.add(int(str(param.name)))
                 expected = min(set(range(1, len(int_keys) + 2)) - int_keys)
                 if expected == int_name:
                     showkey = False
@@ -188,7 +190,7 @@ class Template(Node):
         return param
 
     def remove(self, name, keep_field=False, force_no_field=False):
-        name = name.strip() if isinstance(name, basestring) else unicode(name)
+        name = name.strip() if isinstance(name, basestring) else str(name)
         removed = False
         for i, param in enumerate(self.params):
             if param.name.strip() == name:
diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py
index 4b4d4ac..ce5b513 100644
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -20,7 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
+
 from . import Node
+from ..compat import str, bytes, basestring
 
 __all__ = ["Text"]
 
@@ -30,7 +33,7 @@ class Text(Node):
         self._value = value
 
     def __unicode__(self):
-        return unicode(self.value)
+        return str(self.value)
 
     def __strip__(self, normalize, collapse):
         return self
diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py
index d352321..fd94788 100644
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -20,11 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
+
 from . import tokens
 from ..nodes import Heading, HTMLEntity, Tag, Template, Text
 from ..nodes.extras import Attribute, Parameter
 from ..smart_list import SmartList
 from ..wikicode import Wikicode
+from ..compat import str, bytes
 
 __all__ = ["Builder"]
 
@@ -62,7 +65,7 @@ class Builder(object):
                 self._tokens.append(token)
                 value = self._pop()
                 if not key:
-                    key = self._wrap([Text(unicode(default))])
+                    key = self._wrap([Text(str(default))])
                 return Parameter(key, value, showkey)
             else:
                 self._write(self._handle_token(token))
diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py
index 159ba67..3b80c98 100644
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -20,13 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-import htmlentitydefs
+from __future__ import unicode_literals
 from math import log
 import re
 import string
 
 from . import contexts
 from . import tokens
+from ..compat import htmlentitydefs
 
 __all__ = ["Tokenizer"]
 
diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py
index 3cb73c9..b76df0d 100644
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -20,6 +20,9 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
+from ..compat import str, bytes, v
+
 __all__ = ["Token"]
 
 class Token(object):
@@ -33,7 +36,7 @@ class Token(object):
                 args.append(key + "=" + repr(value[:97] + "..."))
             else:
                 args.append(key + "=" + repr(value))
-        return u"{0}({1})".format(type(self).__name__, u", ".join(args))
+        return "{0}({1})".format(type(self).__name__, ", ".join(args))
 
     def __eq__(self, other):
         if isinstance(other, type(self)):
@@ -49,10 +52,15 @@ class Token(object):
     def __delattr__(self, key):
         del self._kwargs[key]
 
-
-def make(name):
-    __all__.append(name)
-    return type(name, (Token,), {})
+if v >= 3:
+    def make(name):
+        __all__.append(name)
+        return type(name, (Token,), {})
+else:
+    def make(name):
+        name = name.encode("utf-8")
+        __all__.append(name)
+        return type(name, (Token,), {})
 
 Text = make("Text")
 
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index e0c8364..99981ae 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -20,230 +20,240 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
+from .compat import str, bytes, v
+
 __all__ = ["StringMixIn"]
 
 def inheritdoc(method):
-    method.__doc__ = getattr(unicode, method.func_name).__doc__
+    try:
+        method.__doc__ = getattr(str, method.__name__).__doc__
+    except AttributeError:
+        method.__doc__ = "This feature is only available on Python 2."
     return method
 
 
 class StringMixIn(object):
-    def __str__(self):
-        return unicode(self).encode("utf8")
+    if v >= 3:
+      def __str__(self):
+          return self.__unicode__()
+    else:
+        def __str__(self):
+            return self.__unicode__().encode("utf8")
 
     def __repr__(self):
-        return repr(unicode(self))
+        return repr(self.__unicode__())
 
     def __lt__(self, other):
         if isinstance(other, StringMixIn):
-            return unicode(self) < unicode(other)
-        return unicode(self) < other
+            return self.__unicode__() < other.__unicode__()
+        return self.__unicode__() < other
 
     def __le__(self, other):
         if isinstance(other, StringMixIn):
-            return unicode(self) <= unicode(other)
-        return unicode(self) <= other
+            return self.__unicode__() <= other.__unicode__()
+        return self.__unicode__() <= other
 
     def __eq__(self, other):
         if isinstance(other, StringMixIn):
-            return unicode(self) == unicode(other)
-        return unicode(self) == other
+            return self.__unicode__() == other.__unicode__()
+        return self.__unicode__() == other
 
     def __ne__(self, other):
         if isinstance(other, StringMixIn):
-            return unicode(self) != unicode(other)
-        return unicode(self) != other
+            return self.__unicode__() != other.__unicode__()
+        return self.__unicode__() != other
 
     def __gt__(self, other):
         if isinstance(other, StringMixIn):
-            return unicode(self) > unicode(other)
-        return unicode(self) > other
+            return self.__unicode__() > other.__unicode__()
+        return self.__unicode__() > other
 
     def __ge__(self, other):
         if isinstance(other, StringMixIn):
-            return unicode(self) >= unicode(other)
-        return unicode(self) >= other
+            return self.__unicode__() >= other.__unicode__()
+        return self.__unicode__() >= other
 
     def __nonzero__(self):
-        return bool(unicode(self))
+        return bool(self.__unicode__())
 
     def __unicode__(self):
         raise NotImplementedError()
 
     def __len__(self):
-        return len(unicode(self))
+        return len(self.__unicode__())
 
     def __iter__(self):
-        for char in unicode(self):
+        for char in self.__unicode__():
             yield char
 
     def __getitem__(self, key):
-        return unicode(self)[key]
+        return self.__unicode__()[key]
 
     def __contains__(self, item):
         if isinstance(item, StringMixIn):
-            return unicode(item) in unicode(self)
-        return item in unicode(self)
+            return unicode(item) in self.__unicode__()
+        return item in self.__unicode__()
 
     @inheritdoc
     def capitalize(self):
-        return unicode(self).capitalize()
+        return self.__unicode__().capitalize()
 
     @inheritdoc
     def center(self, width, fillchar=None):
-        return unicode(self).center(width, fillchar)
+        return self.__unicode__().center(width, fillchar)
 
     @inheritdoc
     def count(self, sub=None, start=None, end=None):
-        return unicode(self).count(sub, start, end)
+        return self.__unicode__().count(sub, start, end)
 
     @inheritdoc
     def decode(self, encoding=None, errors=None):
-        return unicode(self).decode(encoding, errors)
+        return self.__unicode__().decode(encoding, errors)
 
     @inheritdoc
     def encode(self, encoding=None, errors=None):
-        return unicode(self).encode(encoding, errors)
+        return self.__unicode__().encode(encoding, errors)
 
     @inheritdoc
     def endswith(self, prefix, start=None, end=None):
-        return unicode(self).endswith(prefix, start, end)
+        return self.__unicode__().endswith(prefix, start, end)
 
     @inheritdoc
     def expandtabs(self, tabsize=None):
-        return unicode(self).expandtabs(tabsize)
+        return self.__unicode__().expandtabs(tabsize)
 
     @inheritdoc
     def find(self, sub=None, start=None, end=None):
-        return unicode(self).find(sub, start, end)
+        return self.__unicode__().find(sub, start, end)
 
     @inheritdoc
     def format(self, *args, **kwargs):
-        return unicode(self).format(*args, **kwargs)
+        return self.__unicode__().format(*args, **kwargs)
 
     @inheritdoc
     def index(self, sub=None, start=None, end=None):
-        return unicode(self).index(sub, start, end)
+        return self.__unicode__().index(sub, start, end)
 
     @inheritdoc
     def isalnum(self):
-        return unicode(self).isalnum()
+        return self.__unicode__().isalnum()
 
     @inheritdoc
     def isalpha(self):
-        return unicode(self).isalpha()
+        return self.__unicode__().isalpha()
 
     @inheritdoc
     def isdecimal(self):
-        return unicode(self).isdecimal()
+        return self.__unicode__().isdecimal()
 
     @inheritdoc
     def isdigit(self):
-        return unicode(self).isdigit()
+        return self.__unicode__().isdigit()
 
     @inheritdoc
     def islower(self):
-        return unicode(self).islower()
+        return self.__unicode__().islower()
 
     @inheritdoc
     def isnumeric(self):
-        return unicode(self).isnumeric()
+        return self.__unicode__().isnumeric()
 
     @inheritdoc
     def isspace(self):
-        return unicode(self).isspace()
+        return self.__unicode__().isspace()
 
     @inheritdoc
     def istitle(self):
-        return unicode(self).istitle()
+        return self.__unicode__().istitle()
 
     @inheritdoc
     def isupper(self):
-        return unicode(self).isupper()
+        return self.__unicode__().isupper()
 
     @inheritdoc
     def join(self, iterable):
-        return unicode(self).join(iterable)
+        return self.__unicode__().join(iterable)
 
     @inheritdoc
     def ljust(self, width, fillchar=None):
-        return unicode(self).ljust(width, fillchar)
+        return self.__unicode__().ljust(width, fillchar)
 
     @inheritdoc
     def lower(self):
-        return unicode(self).lower()
+        return self.__unicode__().lower()
 
     @inheritdoc
     def lstrip(self, chars=None):
-        return unicode(self).lstrip(chars)
+        return self.__unicode__().lstrip(chars)
 
     @inheritdoc
     def partition(self, sep):
-        return unicode(self).partition(sep)
+        return self.__unicode__().partition(sep)
 
     @inheritdoc
     def replace(self, old, new, count):
-        return unicode(self).replace(old, new, count)
+        return self.__unicode__().replace(old, new, count)
 
     @inheritdoc
     def rfind(self, sub=None, start=None, end=None):
-        return unicode(self).rfind(sub, start, end)
+        return self.__unicode__().rfind(sub, start, end)
 
     @inheritdoc
     def rindex(self, sub=None, start=None, end=None):
-        return unicode(self).rindex(sub, start, end)
+        return self.__unicode__().rindex(sub, start, end)
 
     @inheritdoc
     def rjust(self, width, fillchar=None):
-        return unicode(self).rjust(width, fillchar)
+        return self.__unicode__().rjust(width, fillchar)
 
     @inheritdoc
     def rpartition(self, sep):
-        return unicode(self).rpartition(sep)
+        return self.__unicode__().rpartition(sep)
 
     @inheritdoc
     def rsplit(self, sep=None, maxsplit=None):
-        return unicode(self).rsplit(sep, maxsplit)
+        return self.__unicode__().rsplit(sep, maxsplit)
 
     @inheritdoc
     def rstrip(self, chars=None):
-        return unicode(self).rstrip(chars)
+        return self.__unicode__().rstrip(chars)
 
     @inheritdoc
     def split(self, sep=None, maxsplit=None):
-        return unicode(self).split(sep, maxsplit)
+        return self.__unicode__().split(sep, maxsplit)
 
     @inheritdoc
     def splitlines(self, keepends=None):
-        return unicode(self).splitlines(keepends)
+        return self.__unicode__().splitlines(keepends)
 
     @inheritdoc
     def startswith(self, prefix, start=None, end=None):
-        return unicode(self).startswith(prefix, start, end)
+        return self.__unicode__().startswith(prefix, start, end)
 
     @inheritdoc
     def strip(self, chars=None):
-        return unicode(self).strip(chars)
+        return self.__unicode__().strip(chars)
 
     @inheritdoc
     def swapcase(self):
-        return unicode(self).swapcase()
+        return self.__unicode__().swapcase()
 
     @inheritdoc
     def title(self):
-        return unicode(self).title()
+        return self.__unicode__().title()
 
     @inheritdoc
     def translate(self, table, deletechars=None):
-        return unicode(self).translate(table, deletechars)
+        return self.__unicode__().translate(table, deletechars)
 
     @inheritdoc
     def upper(self):
-        return unicode(self).upper()
+        return self.__unicode__().upper()
 
     @inheritdoc
     def zfill(self, width):
-        return unicode(self).zfill(width)
+        return self.__unicode__().zfill(width)
 
 
 del inheritdoc
diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py
index 9c32c10..b92609a 100644
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -20,9 +20,12 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
+
 import mwparserfromhell
 from .nodes import Node
 from .smart_list import SmartList
+from .compat import str, bytes, basestring
 
 def parse_anything(value):
     wikicode = mwparserfromhell.wikicode.Wikicode
@@ -33,7 +36,7 @@ def parse_anything(value):
     if isinstance(value, basestring):
         return mwparserfromhell.parse(value)
     if isinstance(value, int):
-        return mwparserfromhell.parse(unicode(value))
+        return mwparserfromhell.parse(str(value))
     if value is None:
         return wikicode(SmartList())
     try:
diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py
index 1199e4b..390fad0 100644
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -20,12 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import unicode_literals
 import re
 import sys
 
 from .nodes import Heading, Node, Tag, Template, Text
 from .string_mixin import StringMixIn
 from .utils import parse_anything
+from .compat import str, bytes
 
 __all__ = ["Wikicode"]
 
@@ -37,7 +39,7 @@ class Wikicode(StringMixIn):
         self._nodes = nodes
 
     def __unicode__(self):
-        return "".join([unicode(node) for node in self.nodes])
+        return "".join([str(node) for node in self.nodes])
 
     def _get_children(self, node):
         for context, child in node.__iternodes__(self._get_all_nodes):
@@ -171,7 +173,7 @@ class Wikicode(StringMixIn):
             nodes = self.nodes
         for node in nodes:
             if not forcetype or isinstance(node, forcetype):
-                if not matches or re.search(matches, unicode(node), flags):
+                if not matches or re.search(matches, str(node), flags):
                     yield node
 
     def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
@@ -229,15 +231,15 @@ class Wikicode(StringMixIn):
         for node in self.nodes:
             stripped = node.__strip__(normalize, collapse)
             if stripped:
-                nodes.append(unicode(stripped))
+                nodes.append(str(stripped))
 
         if collapse:
-            stripped = u"".join(nodes).strip("\n")
+            stripped = "".join(nodes).strip("\n")
             while "\n\n\n" in stripped:
                 stripped = stripped.replace("\n\n\n", "\n\n")
             return stripped
         else:
-            return u"".join(nodes)
+            return "".join(nodes)
 
     def get_tree(self):
         marker = object()  # Random object we can find with certainty in a list