- Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal()tags/v0.2
@@ -1,33 +1,36 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Implements support for both Python 2 and Python 3 by defining common types in | |||||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||||
types are meant to be imported directly from within the parser's modules. | |||||
""" | |||||
import sys | |||||
py3k = sys.version_info[0] == 3 | |||||
if py3k: | |||||
bytes = bytes | |||||
str = str | |||||
basestring = str | |||||
range = range | |||||
maxsize = sys.maxsize | |||||
import html.entities as htmlentities | |||||
from io import StringIO | |||||
else: | |||||
bytes = str | |||||
str = unicode | |||||
basestring = basestring | |||||
range = xrange | |||||
maxsize = sys.maxint | |||||
import htmlentitydefs as htmlentities | |||||
from StringIO import StringIO | |||||
del sys | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Implements support for both Python 2 and Python 3 by defining common types in | |||||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||||
types are meant to be imported directly from within the parser's modules. | |||||
""" | |||||
import sys | |||||
py3k = sys.version_info[0] == 3 | |||||
if py3k: | |||||
bytes = bytes | |||||
str = str | |||||
basestring = str | |||||
range = range | |||||
maxsize = sys.maxsize | |||||
import html.entities as htmlentities | |||||
from io import StringIO | |||||
from urllib.parse import urlencode | |||||
from urllib.request import urlopen | |||||
else: | |||||
bytes = str | |||||
str = unicode | |||||
basestring = basestring | |||||
range = xrange | |||||
maxsize = sys.maxint | |||||
import htmlentitydefs as htmlentities | |||||
from StringIO import StringIO | |||||
from urllib import urlencode, urlopen | |||||
del sys |
@@ -252,8 +252,8 @@ class StringMixIn(object): | |||||
return self.__unicode__().lstrip(chars) | return self.__unicode__().lstrip(chars) | ||||
if py3k: | if py3k: | ||||
@inheritdoc | |||||
@staticmethod | @staticmethod | ||||
@inheritdoc | |||||
def maketrans(self, x, y=None, z=None): | def maketrans(self, x, y=None, z=None): | ||||
if z is None: | if z is None: | ||||
if y is None: | if y is None: | ||||
@@ -23,10 +23,14 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | import unittest | ||||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||||
try: | |||||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||||
except ImportError: | |||||
CTokenizer = None | |||||
from _test_tokenizer import TokenizerTestCase | from _test_tokenizer import TokenizerTestCase | ||||
@unittest.skipUnless(CTokenizer, "C tokenizer not available") | |||||
class TestCTokenizer(TokenizerTestCase, unittest.TestCase): | class TestCTokenizer(TokenizerTestCase, unittest.TestCase): | ||||
"""Test cases for the C tokenizer.""" | """Test cases for the C tokenizer.""" | ||||
@@ -23,10 +23,9 @@ | |||||
from __future__ import print_function, unicode_literals | from __future__ import print_function, unicode_literals | ||||
import json | import json | ||||
import unittest | import unittest | ||||
import urllib | |||||
import mwparserfromhell | import mwparserfromhell | ||||
from mwparserfromhell.compat import py3k, str, StringIO | |||||
from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen | |||||
class TestDocs(unittest.TestCase): | class TestDocs(unittest.TestCase): | ||||
"""Integration test cases for mwparserfromhell's documentation.""" | """Integration test cases for mwparserfromhell's documentation.""" | ||||
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase): | |||||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | data = {"action": "query", "prop": "revisions", "rvlimit": 1, | ||||
"rvprop": "content", "format": "json", "titles": title} | "rvprop": "content", "format": "json", "titles": title} | ||||
try: | try: | ||||
raw = urllib.urlopen(url1, urllib.urlencode(data)).read() | |||||
raw = urlopen(url1, urlencode(data).encode("utf8")).read() | |||||
except IOError: | |||||
self.skipTest("cannot continue because of unsuccessful web call") | |||||
res = json.loads(raw.decode("utf8")) | |||||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||||
try: | |||||
expected = urlopen(url2.format(title)).read().decode("utf8") | |||||
except IOError: | except IOError: | ||||
self.skipTest("cannot continue because of unsuccessful web call") | self.skipTest("cannot continue because of unsuccessful web call") | ||||
res = json.loads(raw) | |||||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||||
expected = urllib.urlopen(url2.format(title)).read().decode("utf8") | |||||
actual = mwparserfromhell.parse(text) | actual = mwparserfromhell.parse(text) | ||||
self.assertEqual(expected, actual) | self.assertEqual(expected, actual) | ||||
@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase): | |||||
gen1 = iter(list1) | gen1 = iter(list1) | ||||
out = [] | out = [] | ||||
for i in range(len(list1)): | for i in range(len(list1)): | ||||
out.append(gen1.next()) | |||||
self.assertRaises(StopIteration, gen1.next) | |||||
out.append(next(gen1)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertEqual([0, 1, 2, 3, "one", "two"], out) | self.assertEqual([0, 1, 2, 3, "one", "two"], out) | ||||
gen2 = iter(list2) | gen2 = iter(list2) | ||||
self.assertRaises(StopIteration, gen2.next) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | ||||
self.assertEqual([], list(reversed(list2))) | self.assertEqual([], list(reversed(list2))) | ||||
@@ -21,6 +21,7 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from sys import getdefaultencoding | |||||
from types import GeneratorType | from types import GeneratorType | ||||
import unittest | import unittest | ||||
@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase): | |||||
out = [] | out = [] | ||||
for i in range(len(str1)): | for i in range(len(str1)): | ||||
out.append(gen1.next()) | |||||
self.assertRaises(StopIteration, gen1.next) | |||||
out.append(next(gen1)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertEqual(expected, out) | self.assertEqual(expected, out) | ||||
self.assertRaises(StopIteration, gen2.next) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) | self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) | ||||
self.assertEqual([], list(reversed(str2))) | self.assertEqual([], list(reversed(str2))) | ||||
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase): | |||||
self.assertEqual("", str2.decode("punycode", "ignore")) | self.assertEqual("", str2.decode("punycode", "ignore")) | ||||
str3 = _FakeString("𐌲𐌿𐍄") | str3 = _FakeString("𐌲𐌿𐍄") | ||||
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" | |||||
self.assertEqual(b"fake string", str1.encode()) | self.assertEqual(b"fake string", str1.encode()) | ||||
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", | |||||
str3.encode("utf8")) | |||||
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", | |||||
str3.encode(encoding="utf8")) | |||||
self.assertRaises(UnicodeEncodeError, str3.encode) | |||||
self.assertEqual(actual, str3.encode("utf-8")) | |||||
self.assertEqual(actual, str3.encode(encoding="utf-8")) | |||||
if getdefaultencoding() == "ascii": | |||||
self.assertRaises(UnicodeEncodeError, str3.encode) | |||||
elif getdefaultencoding() == "utf-8": | |||||
self.assertEqual(actual, str3.encode()) | |||||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") | self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") | ||||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") | self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") | ||||
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") | |||||
self.assertEqual("", str3.encode("ascii", "ignore")) | |||||
self.assertEqual("", str3.encode(errors="ignore")) | |||||
if getdefaultencoding() == "ascii": | |||||
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") | |||||
elif getdefaultencoding() == "utf-8": | |||||
self.assertEqual(actual, str3.encode(errors="strict")) | |||||
self.assertEqual(b"", str3.encode("ascii", "ignore")) | |||||
if getdefaultencoding() == "ascii": | |||||
self.assertEqual(b"", str3.encode(errors="ignore")) | |||||
elif getdefaultencoding() == "utf-8": | |||||
self.assertEqual(actual, str3.encode(errors="ignore")) | |||||
self.assertTrue(str1.endswith("ing")) | self.assertTrue(str1.endswith("ing")) | ||||
self.assertFalse(str1.endswith("ingh")) | self.assertFalse(str1.endswith("ingh")) | ||||
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase): | |||||
actual = [" this is a sentence with", "", "whitespace", ""] | actual = [" this is a sentence with", "", "whitespace", ""] | ||||
self.assertEqual(actual, str25.rsplit(" ", 3)) | self.assertEqual(actual, str25.rsplit(" ", 3)) | ||||
if py3k: | if py3k: | ||||
actual = [" this is a", "sentence", "with", "whitespace"] | |||||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | self.assertEqual(actual, str25.rsplit(maxsplit=3)) | ||||
self.assertEqual("fake string", str1.rstrip()) | self.assertEqual("fake string", str1.rstrip()) | ||||
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase): | |||||
actual = ["", "", "", "this is a sentence with whitespace "] | actual = ["", "", "", "this is a sentence with whitespace "] | ||||
self.assertEqual(actual, str25.split(" ", 3)) | self.assertEqual(actual, str25.split(" ", 3)) | ||||
if py3k: | if py3k: | ||||
actual = ["this", "is", "a", "sentence with whitespace "] | |||||
self.assertEqual(actual, str25.split(maxsplit=3)) | self.assertEqual(actual, str25.split(maxsplit=3)) | ||||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | ||||
@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase): | |||||
self.assertEqual("Token()", repr(token1)) | self.assertEqual("Token()", repr(token1)) | ||||
if py3k: | if py3k: | ||||
token2repr = "Token(foo='bar', baz=123)" | |||||
token2repr1 = "Token(foo='bar', baz=123)" | |||||
token2repr2 = "Token(baz=123, foo='bar')" | |||||
token3repr = "Text(text='" + hundredchars + "')" | token3repr = "Text(text='" + hundredchars + "')" | ||||
else: | else: | ||||
token2repr = "Token(foo=u'bar', baz=123)" | |||||
token2repr1 = "Token(foo=u'bar', baz=123)" | |||||
token2repr2 = "Token(baz=123, foo=u'bar')" | |||||
token3repr = "Text(text=u'" + hundredchars + "')" | token3repr = "Text(text=u'" + hundredchars + "')" | ||||
self.assertEqual(token2repr, repr(token2)) | |||||
token2repr = repr(token2) | |||||
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) | |||||
self.assertEqual(token3repr, repr(token3)) | self.assertEqual(token3repr, repr(token3)) | ||||
def test_equality(self): | def test_equality(self): | ||||
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase): | |||||
self.assertEqual(token2, token1) | self.assertEqual(token2, token1) | ||||
self.assertEqual(token4, token5) | self.assertEqual(token4, token5) | ||||
self.assertEqual(token5, token4) | self.assertEqual(token5, token4) | ||||
self.assertNotEquals(token1, token3) | |||||
self.assertNotEquals(token2, token3) | |||||
self.assertNotEquals(token4, token6) | |||||
self.assertNotEquals(token5, token6) | |||||
self.assertNotEqual(token1, token3) | |||||
self.assertNotEqual(token2, token3) | |||||
self.assertNotEqual(token4, token6) | |||||
self.assertNotEqual(token5, token6) | |||||
def test_repr_equality(self): | def test_repr_equality(self): | ||||
"check that eval(repr(token)) == token" | "check that eval(repr(token)) == token" | ||||