From 7f87a1c4b371f813d5006b25cf39f2b40b4dc58e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 19:39:12 -0400 Subject: [PATCH] Apply bugfixes so that some tests pass on Python 3. - Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal() --- mwparserfromhell/compat.py | 69 +++++++++++++++++++++------------------- mwparserfromhell/string_mixin.py | 2 +- tests/test_ctokenizer.py | 6 +++- tests/test_docs.py | 14 ++++---- tests/test_smart_list.py | 6 ++-- tests/test_string_mixin.py | 33 ++++++++++++------- tests/test_tokens.py | 17 ++++++---- 7 files changed, 85 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 48b9807..34870e6 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -1,33 +1,36 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :py:class:`str` is set to -:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, -:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These -types are meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = sys.version_info[0] == 3 - -if py3k: - bytes = bytes - str = str - basestring = str - range = range - maxsize = sys.maxsize - import html.entities as htmlentities - from io import StringIO - -else: - bytes = str - str = unicode - basestring = basestring - range = xrange - maxsize = sys.maxint - import htmlentitydefs as htmlentities - from StringIO import StringIO - -del sys +# -*- coding: utf-8 -*- + +""" +Implements support for both Python 2 and Python 3 by defining common types in +terms of their Python 2/3 variants. For example, :py:class:`str` is set to +:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, +:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These +types are meant to be imported directly from within the parser's modules. +""" + +import sys + +py3k = sys.version_info[0] == 3 + +if py3k: + bytes = bytes + str = str + basestring = str + range = range + maxsize = sys.maxsize + import html.entities as htmlentities + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + bytes = str + str = unicode + basestring = basestring + range = xrange + maxsize = sys.maxint + import htmlentitydefs as htmlentities + from StringIO import StringIO + from urllib import urlencode, urlopen + +del sys diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index eee58b9..6bee9c4 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -252,8 +252,8 @@ class StringMixIn(object): return self.__unicode__().lstrip(chars) if py3k: - @inheritdoc @staticmethod + @inheritdoc def maketrans(self, x, y=None, z=None): if z is None: if y is None: diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7ef8975..f21378c 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,10 +23,14 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.parser._tokenizer import CTokenizer +try: + from mwparserfromhell.parser._tokenizer import CTokenizer +except ImportError: + CTokenizer = None from _test_tokenizer import TokenizerTestCase +@unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" diff --git a/tests/test_docs.py b/tests/test_docs.py index 971c5d1..3b23bb7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -23,10 +23,9 @@ from __future__ import print_function, unicode_literals import json import unittest -import urllib import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO +from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" @@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase): data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} try: - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + raw = urlopen(url1, urlencode(data).encode("utf8")).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + res = json.loads(raw.decode("utf8")) + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + try: + expected = urlopen(url2.format(title)).read().decode("utf8") except IOError: self.skipTest("cannot continue because of unsuccessful web call") - res = json.loads(raw) - text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - expected = urllib.urlopen(url2.format(title)).read().decode("utf8") actual = mwparserfromhell.parse(text) self.assertEqual(expected, actual) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index d821ccd..01caca7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase): gen1 = iter(list1) out = [] for i in range(len(list1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) self.assertEqual([], list(reversed(list2))) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6ef6344..6d10609 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals +from sys import getdefaultencoding from types import GeneratorType import unittest @@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase): out = [] for i in range(len(str1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual(expected, out) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) self.assertEqual([], list(reversed(str2))) @@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("πŒ²πŒΏπ„") + actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" self.assertEqual(b"fake string", str1.encode()) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode("utf8")) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode(encoding="utf8")) - self.assertRaises(UnicodeEncodeError, str3.encode) + self.assertEqual(actual, str3.encode("utf-8")) + self.assertEqual(actual, str3.encode(encoding="utf-8")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode()) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") - self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEqual("", str3.encode("ascii", "ignore")) - self.assertEqual("", str3.encode(errors="ignore")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="strict")) + self.assertEqual(b"", str3.encode("ascii", "ignore")) + if getdefaultencoding() == "ascii": + self.assertEqual(b"", str3.encode(errors="ignore")) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) @@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase): actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: + actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual("fake string", str1.rstrip()) @@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase): actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) if py3k: + actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 1449ad2..4620982 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase): self.assertEqual("Token()", repr(token1)) if py3k: - token2repr = "Token(foo='bar', baz=123)" + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" token3repr = "Text(text='" + hundredchars + "')" else: - token2repr = "Token(foo=u'bar', baz=123)" + token2repr1 = "Token(foo=u'bar', baz=123)" + token2repr2 = "Token(baz=123, foo=u'bar')" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEqual(token2repr, repr(token2)) + token2repr = repr(token2) + self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) self.assertEqual(token3repr, repr(token3)) def test_equality(self): @@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase): self.assertEqual(token2, token1) self.assertEqual(token4, token5) self.assertEqual(token5, token4) - self.assertNotEquals(token1, token3) - self.assertNotEquals(token2, token3) - self.assertNotEquals(token4, token6) - self.assertNotEquals(token5, token6) + self.assertNotEqual(token1, token3) + self.assertNotEqual(token2, token3) + self.assertNotEqual(token4, token6) + self.assertNotEqual(token5, token6) def test_repr_equality(self): "check that eval(repr(token)) == token"