From 7f87a1c4b371f813d5006b25cf39f2b40b4dc58e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Wed, 27 Mar 2013 19:39:12 -0400
Subject: [PATCH] Apply bugfixes so that some tests pass on Python 3.

- Skip CTokenizer tests if CTokenizer is not available.
- TestStringMixin: Don't make assumptions about default encoding.
- Add urllib stuff to mwparserfromhell.compat.
- Fix compat.py's line endings.
- gen.next() -> next(gen)
- assert*Equals() -> assert*Equal()
---
 mwparserfromhell/compat.py       | 69 +++++++++++++++++++++-------------------
 mwparserfromhell/string_mixin.py |  2 +-
 tests/test_ctokenizer.py         |  6 +++-
 tests/test_docs.py               | 14 ++++----
 tests/test_smart_list.py         |  6 ++--
 tests/test_string_mixin.py       | 33 ++++++++++++-------
 tests/test_tokens.py             | 17 ++++++----
 7 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py
index 48b9807..34870e6 100755
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -1,33 +1,36 @@
-# -*- coding: utf-8 -*-
-
-"""
-Implements support for both Python 2 and Python 3 by defining common types in
-terms of their Python 2/3 variants. For example, :py:class:`str` is set to
-:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
-:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
-types are meant to be imported directly from within the parser's modules.
-"""
-
-import sys
-
-py3k = sys.version_info[0] == 3
-
-if py3k:
-    bytes = bytes
-    str = str
-    basestring = str
-    range = range
-    maxsize = sys.maxsize
-    import html.entities as htmlentities
-    from io import StringIO
-
-else:
-    bytes = str
-    str = unicode
-    basestring = basestring
-    range = xrange
-    maxsize = sys.maxint
-    import htmlentitydefs as htmlentities
-    from StringIO import StringIO
-
-del sys
+# -*- coding: utf-8 -*-
+
+"""
+Implements support for both Python 2 and Python 3 by defining common types in
+terms of their Python 2/3 variants. For example, :py:class:`str` is set to
+:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
+:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
+types are meant to be imported directly from within the parser's modules.
+"""
+
+import sys
+
+py3k = sys.version_info[0] == 3
+
+if py3k:
+    bytes = bytes
+    str = str
+    basestring = str
+    range = range
+    maxsize = sys.maxsize
+    import html.entities as htmlentities
+    from io import StringIO
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
+
+else:
+    bytes = str
+    str = unicode
+    basestring = basestring
+    range = xrange
+    maxsize = sys.maxint
+    import htmlentitydefs as htmlentities
+    from StringIO import StringIO
+    from urllib import urlencode, urlopen
+
+del sys
diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py
index eee58b9..6bee9c4 100644
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -252,8 +252,8 @@ class StringMixIn(object):
         return self.__unicode__().lstrip(chars)
 
     if py3k:
-        @inheritdoc
         @staticmethod
+        @inheritdoc
         def maketrans(self, x, y=None, z=None):
             if z is None:
                 if y is None:
diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py
index 7ef8975..f21378c 100644
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -23,10 +23,14 @@
 from __future__ import unicode_literals
 import unittest
 
-from mwparserfromhell.parser._tokenizer import CTokenizer
+try:
+    from mwparserfromhell.parser._tokenizer import CTokenizer
+except ImportError:
+    CTokenizer = None
 
 from _test_tokenizer import TokenizerTestCase
 
+@unittest.skipUnless(CTokenizer, "C tokenizer not available")
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
     """Test cases for the C tokenizer."""
 
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 971c5d1..3b23bb7 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -23,10 +23,9 @@
 from __future__ import print_function, unicode_literals
 import json
 import unittest
-import urllib
 
 import mwparserfromhell
-from mwparserfromhell.compat import py3k, str, StringIO
+from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen
 
 class TestDocs(unittest.TestCase):
     """Integration test cases for mwparserfromhell's documentation."""
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase):
         data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                 "rvprop": "content", "format": "json", "titles": title}
         try:
-            raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
+            raw = urlopen(url1, urlencode(data).encode("utf8")).read()
+        except IOError:
+            self.skipTest("cannot continue because of unsuccessful web call")
+        res = json.loads(raw.decode("utf8"))
+        text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
+        try:
+            expected = urlopen(url2.format(title)).read().decode("utf8")
         except IOError:
             self.skipTest("cannot continue because of unsuccessful web call")
-        res = json.loads(raw)
-        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
-        expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
         actual = mwparserfromhell.parse(text)
         self.assertEqual(expected, actual)
 
diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py
index d821ccd..01caca7 100644
--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase):
         gen1 = iter(list1)
         out = []
         for i in range(len(list1)):
-            out.append(gen1.next())
-        self.assertRaises(StopIteration, gen1.next)
+            out.append(next(gen1))
+        self.assertRaises(StopIteration, next, gen1)
         self.assertEqual([0, 1, 2, 3, "one", "two"], out)
         gen2 = iter(list2)
-        self.assertRaises(StopIteration, gen2.next)
+        self.assertRaises(StopIteration, next, gen2)
 
         self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
         self.assertEqual([], list(reversed(list2)))
diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py
index 6ef6344..6d10609 100644
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -21,6 +21,7 @@
 # SOFTWARE.
 
 from __future__ import unicode_literals
+from sys import getdefaultencoding
 from types import GeneratorType
 import unittest
 
@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase):
 
         out = []
         for i in range(len(str1)):
-            out.append(gen1.next())
-        self.assertRaises(StopIteration, gen1.next)
+            out.append(next(gen1))
+        self.assertRaises(StopIteration, next, gen1)
         self.assertEqual(expected, out)
-        self.assertRaises(StopIteration, gen2.next)
+        self.assertRaises(StopIteration, next, gen2)
 
         self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
         self.assertEqual([], list(reversed(str2)))
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase):
             self.assertEqual("", str2.decode("punycode", "ignore"))
 
         str3 = _FakeString("𐌲𐌿𐍄")
+        actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
         self.assertEqual(b"fake string", str1.encode())
-        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
-                          str3.encode("utf8"))
-        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
-                          str3.encode(encoding="utf8"))
-        self.assertRaises(UnicodeEncodeError, str3.encode)
+        self.assertEqual(actual, str3.encode("utf-8"))
+        self.assertEqual(actual, str3.encode(encoding="utf-8"))
+        if getdefaultencoding() == "ascii":
+            self.assertRaises(UnicodeEncodeError, str3.encode)
+        elif getdefaultencoding() == "utf-8":
+            self.assertEqual(actual, str3.encode())
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
         self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
-        self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
-        self.assertEqual("", str3.encode("ascii", "ignore"))
-        self.assertEqual("", str3.encode(errors="ignore"))
+        if getdefaultencoding() == "ascii":
+            self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
+        elif getdefaultencoding() == "utf-8":
+            self.assertEqual(actual, str3.encode(errors="strict"))
+        self.assertEqual(b"", str3.encode("ascii", "ignore"))
+        if getdefaultencoding() == "ascii":
+            self.assertEqual(b"", str3.encode(errors="ignore"))
+        elif getdefaultencoding() == "utf-8":
+            self.assertEqual(actual, str3.encode(errors="ignore"))
 
         self.assertTrue(str1.endswith("ing"))
         self.assertFalse(str1.endswith("ingh"))
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase):
         actual = ["   this is a   sentence with", "", "whitespace", ""]
         self.assertEqual(actual, str25.rsplit(" ", 3))
         if py3k:
+            actual = ["   this is a", "sentence", "with", "whitespace"]
             self.assertEqual(actual, str25.rsplit(maxsplit=3))
 
         self.assertEqual("fake string", str1.rstrip())
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase):
         actual = ["", "", "", "this is a   sentence with  whitespace "]
         self.assertEqual(actual, str25.split(" ", 3))
         if py3k:
+            actual = ["this", "is", "a", "sentence with  whitespace "]
             self.assertEqual(actual, str25.split(maxsplit=3))
 
         str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 1449ad2..4620982 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase):
 
         self.assertEqual("Token()", repr(token1))
         if py3k:
-            token2repr = "Token(foo='bar', baz=123)"
+            token2repr1 = "Token(foo='bar', baz=123)"
+            token2repr2 = "Token(baz=123, foo='bar')"
             token3repr = "Text(text='" + hundredchars + "')"
         else:
-            token2repr = "Token(foo=u'bar', baz=123)"
+            token2repr1 = "Token(foo=u'bar', baz=123)"
+            token2repr2 = "Token(baz=123, foo=u'bar')"
             token3repr = "Text(text=u'" + hundredchars + "')"
-        self.assertEqual(token2repr, repr(token2))
+        token2repr = repr(token2)
+        self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
         self.assertEqual(token3repr, repr(token3))
 
     def test_equality(self):
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase):
         self.assertEqual(token2, token1)
         self.assertEqual(token4, token5)
         self.assertEqual(token5, token4)
-        self.assertNotEquals(token1, token3)
-        self.assertNotEquals(token2, token3)
-        self.assertNotEquals(token4, token6)
-        self.assertNotEquals(token5, token6)
+        self.assertNotEqual(token1, token3)
+        self.assertNotEqual(token2, token3)
+        self.assertNotEqual(token4, token6)
+        self.assertNotEqual(token5, token6)
 
     def test_repr_equality(self):
         "check that eval(repr(token)) == token"