Apply bugfixes so that some tests pass on Python 3.

- Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal()
11 years ago · 7f87a1c4b3
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -1,33 +1,36 @@
 # -*- coding: utf-8 -*-

 """
 Implements support for both Python 2 and Python 3 by defining common types in
 terms of their Python 2/3 variants. For example, :py:class:`str` is set to
 :py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
 :py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
 types are meant to be imported directly from within the parser's modules.
 """

 import sys

 py3k = sys.version_info[0] == 3

 if py3k:
    bytes = bytes
    str = str
    basestring = str
    range = range
    maxsize = sys.maxsize
    import html.entities as htmlentities
    from io import StringIO

 else:
    bytes = str
    str = unicode
    basestring = basestring
    range = xrange
    maxsize = sys.maxint
    import htmlentitydefs as htmlentities
    from StringIO import StringIO

 del sys
 # -*- coding: utf-8 -*-

 """
 Implements support for both Python 2 and Python 3 by defining common types in
 terms of their Python 2/3 variants. For example, :py:class:`str` is set to
 :py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
 :py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
 types are meant to be imported directly from within the parser's modules.
 """

 import sys

 py3k = sys.version_info[0] == 3

 if py3k:
    bytes = bytes
    str = str
    basestring = str
    range = range
    maxsize = sys.maxsize
    import html.entities as htmlentities
    from io import StringIO
    from urllib.parse import urlencode
    from urllib.request import urlopen

 else:
    bytes = str
    str = unicode
    basestring = basestring
    range = xrange
    maxsize = sys.maxint
    import htmlentitydefs as htmlentities
    from StringIO import StringIO
    from urllib import urlencode, urlopen

 del sys
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -252,8 +252,8 @@ class StringMixIn(object):
        return self.__unicode__().lstrip(chars)

    if py3k:
        @inheritdoc
        @staticmethod
        @inheritdoc
        def maketrans(self, x, y=None, z=None):
            if z is None:
                if y is None:
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -23,10 +23,14 @@
 from __future__ import unicode_literals
 import unittest

 from mwparserfromhell.parser._tokenizer import CTokenizer
 try:
    from mwparserfromhell.parser._tokenizer import CTokenizer
 except ImportError:
    CTokenizer = None

 from _test_tokenizer import TokenizerTestCase

@unittest.skipUnless(CTokenizer, "C tokenizer not available")
 class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
    """Test cases for the C tokenizer."""

--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -23,10 +23,9 @@
 from __future__ import print_function, unicode_literals
 import json
 import unittest
 import urllib

 import mwparserfromhell
 from mwparserfromhell.compat import py3k, str, StringIO
 from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen

 class TestDocs(unittest.TestCase):
    """Integration test cases for mwparserfromhell's documentation."""
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase):
        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                "rvprop": "content", "format": "json", "titles": title}
        try:
            raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
            raw = urlopen(url1, urlencode(data).encode("utf8")).read()
        except IOError:
            self.skipTest("cannot continue because of unsuccessful web call")
        res = json.loads(raw.decode("utf8"))
        text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
        try:
            expected = urlopen(url2.format(title)).read().decode("utf8")
        except IOError:
            self.skipTest("cannot continue because of unsuccessful web call")
        res = json.loads(raw)
        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
        expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
        actual = mwparserfromhell.parse(text)
        self.assertEqual(expected, actual)

--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase):
        gen1 = iter(list1)
        out = []
        for i in range(len(list1)):
            out.append(gen1.next())
        self.assertRaises(StopIteration, gen1.next)
            out.append(next(gen1))
        self.assertRaises(StopIteration, next, gen1)
        self.assertEqual([0, 1, 2, 3, "one", "two"], out)
        gen2 = iter(list2)
        self.assertRaises(StopIteration, gen2.next)
        self.assertRaises(StopIteration, next, gen2)

        self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
        self.assertEqual([], list(reversed(list2)))
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -21,6 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals
 from sys import getdefaultencoding
 from types import GeneratorType
 import unittest

@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase):

        out = []
        for i in range(len(str1)):
            out.append(gen1.next())
        self.assertRaises(StopIteration, gen1.next)
            out.append(next(gen1))
        self.assertRaises(StopIteration, next, gen1)
        self.assertEqual(expected, out)
        self.assertRaises(StopIteration, gen2.next)
        self.assertRaises(StopIteration, next, gen2)

        self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
        self.assertEqual([], list(reversed(str2)))
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase):
            self.assertEqual("", str2.decode("punycode", "ignore"))

        str3 = _FakeString("𐌲𐌿𐍄")
        actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
        self.assertEqual(b"fake string", str1.encode())
        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
                          str3.encode("utf8"))
        self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
                          str3.encode(encoding="utf8"))
        self.assertRaises(UnicodeEncodeError, str3.encode)
        self.assertEqual(actual, str3.encode("utf-8"))
        self.assertEqual(actual, str3.encode(encoding="utf-8"))
        if getdefaultencoding() == "ascii":
            self.assertRaises(UnicodeEncodeError, str3.encode)
        elif getdefaultencoding() == "utf-8":
            self.assertEqual(actual, str3.encode())
        self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
        self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
        self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
        self.assertEqual("", str3.encode("ascii", "ignore"))
        self.assertEqual("", str3.encode(errors="ignore"))
        if getdefaultencoding() == "ascii":
            self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
        elif getdefaultencoding() == "utf-8":
            self.assertEqual(actual, str3.encode(errors="strict"))
        self.assertEqual(b"", str3.encode("ascii", "ignore"))
        if getdefaultencoding() == "ascii":
            self.assertEqual(b"", str3.encode(errors="ignore"))
        elif getdefaultencoding() == "utf-8":
            self.assertEqual(actual, str3.encode(errors="ignore"))

        self.assertTrue(str1.endswith("ing"))
        self.assertFalse(str1.endswith("ingh"))
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase):
        actual = ["   this is a   sentence with", "", "whitespace", ""]
        self.assertEqual(actual, str25.rsplit(" ", 3))
        if py3k:
            actual = ["   this is a", "sentence", "with", "whitespace"]
            self.assertEqual(actual, str25.rsplit(maxsplit=3))

        self.assertEqual("fake string", str1.rstrip())
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase):
        actual = ["", "", "", "this is a   sentence with  whitespace "]
        self.assertEqual(actual, str25.split(" ", 3))
        if py3k:
            actual = ["this", "is", "a", "sentence with  whitespace "]
            self.assertEqual(actual, str25.split(maxsplit=3))

        str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase):

        self.assertEqual("Token()", repr(token1))
        if py3k:
            token2repr = "Token(foo='bar', baz=123)"
            token2repr1 = "Token(foo='bar', baz=123)"
            token2repr2 = "Token(baz=123, foo='bar')"
            token3repr = "Text(text='" + hundredchars + "')"
        else:
            token2repr = "Token(foo=u'bar', baz=123)"
            token2repr1 = "Token(foo=u'bar', baz=123)"
            token2repr2 = "Token(baz=123, foo=u'bar')"
            token3repr = "Text(text=u'" + hundredchars + "')"
        self.assertEqual(token2repr, repr(token2))
        token2repr = repr(token2)
        self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
        self.assertEqual(token3repr, repr(token3))

    def test_equality(self):
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase):
        self.assertEqual(token2, token1)
        self.assertEqual(token4, token5)
        self.assertEqual(token5, token4)
        self.assertNotEquals(token1, token3)
        self.assertNotEquals(token2, token3)
        self.assertNotEquals(token4, token6)
        self.assertNotEquals(token5, token6)
        self.assertNotEqual(token1, token3)
        self.assertNotEqual(token2, token3)
        self.assertNotEqual(token4, token6)
        self.assertNotEqual(token5, token6)

    def test_repr_equality(self):
        "check that eval(repr(token)) == token"