- Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal()tags/v0.2
@@ -1,33 +1,36 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Implements support for both Python 2 and Python 3 by defining common types in | |||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||
types are meant to be imported directly from within the parser's modules. | |||
""" | |||
import sys | |||
py3k = sys.version_info[0] == 3 | |||
if py3k: | |||
bytes = bytes | |||
str = str | |||
basestring = str | |||
range = range | |||
maxsize = sys.maxsize | |||
import html.entities as htmlentities | |||
from io import StringIO | |||
else: | |||
bytes = str | |||
str = unicode | |||
basestring = basestring | |||
range = xrange | |||
maxsize = sys.maxint | |||
import htmlentitydefs as htmlentities | |||
from StringIO import StringIO | |||
del sys | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Implements support for both Python 2 and Python 3 by defining common types in | |||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||
types are meant to be imported directly from within the parser's modules. | |||
""" | |||
import sys | |||
py3k = sys.version_info[0] == 3 | |||
if py3k: | |||
bytes = bytes | |||
str = str | |||
basestring = str | |||
range = range | |||
maxsize = sys.maxsize | |||
import html.entities as htmlentities | |||
from io import StringIO | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
else: | |||
bytes = str | |||
str = unicode | |||
basestring = basestring | |||
range = xrange | |||
maxsize = sys.maxint | |||
import htmlentitydefs as htmlentities | |||
from StringIO import StringIO | |||
from urllib import urlencode, urlopen | |||
del sys |
@@ -252,8 +252,8 @@ class StringMixIn(object): | |||
return self.__unicode__().lstrip(chars) | |||
if py3k: | |||
@inheritdoc | |||
@staticmethod | |||
@inheritdoc | |||
def maketrans(self, x, y=None, z=None): | |||
if z is None: | |||
if y is None: | |||
@@ -23,10 +23,14 @@ | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
try: | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
except ImportError: | |||
CTokenizer = None | |||
from _test_tokenizer import TokenizerTestCase | |||
@unittest.skipUnless(CTokenizer, "C tokenizer not available") | |||
class TestCTokenizer(TokenizerTestCase, unittest.TestCase): | |||
"""Test cases for the C tokenizer.""" | |||
@@ -23,10 +23,9 @@ | |||
from __future__ import print_function, unicode_literals | |||
import json | |||
import unittest | |||
import urllib | |||
import mwparserfromhell | |||
from mwparserfromhell.compat import py3k, str, StringIO | |||
from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen | |||
class TestDocs(unittest.TestCase): | |||
"""Integration test cases for mwparserfromhell's documentation.""" | |||
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase): | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
try: | |||
raw = urllib.urlopen(url1, urllib.urlencode(data)).read() | |||
raw = urlopen(url1, urlencode(data).encode("utf8")).read() | |||
except IOError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
res = json.loads(raw.decode("utf8")) | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
try: | |||
expected = urlopen(url2.format(title)).read().decode("utf8") | |||
except IOError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
expected = urllib.urlopen(url2.format(title)).read().decode("utf8") | |||
actual = mwparserfromhell.parse(text) | |||
self.assertEqual(expected, actual) | |||
@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase): | |||
gen1 = iter(list1) | |||
out = [] | |||
for i in range(len(list1)): | |||
out.append(gen1.next()) | |||
self.assertRaises(StopIteration, gen1.next) | |||
out.append(next(gen1)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertEqual([0, 1, 2, 3, "one", "two"], out) | |||
gen2 = iter(list2) | |||
self.assertRaises(StopIteration, gen2.next) | |||
self.assertRaises(StopIteration, next, gen2) | |||
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | |||
self.assertEqual([], list(reversed(list2))) | |||
@@ -21,6 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from types import GeneratorType | |||
import unittest | |||
@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase): | |||
out = [] | |||
for i in range(len(str1)): | |||
out.append(gen1.next()) | |||
self.assertRaises(StopIteration, gen1.next) | |||
out.append(next(gen1)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertEqual(expected, out) | |||
self.assertRaises(StopIteration, gen2.next) | |||
self.assertRaises(StopIteration, next, gen2) | |||
self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) | |||
self.assertEqual([], list(reversed(str2))) | |||
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual("", str2.decode("punycode", "ignore")) | |||
str3 = _FakeString("𐌲𐌿𐍄") | |||
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" | |||
self.assertEqual(b"fake string", str1.encode()) | |||
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", | |||
str3.encode("utf8")) | |||
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", | |||
str3.encode(encoding="utf8")) | |||
self.assertRaises(UnicodeEncodeError, str3.encode) | |||
self.assertEqual(actual, str3.encode("utf-8")) | |||
self.assertEqual(actual, str3.encode(encoding="utf-8")) | |||
if getdefaultencoding() == "ascii": | |||
self.assertRaises(UnicodeEncodeError, str3.encode) | |||
elif getdefaultencoding() == "utf-8": | |||
self.assertEqual(actual, str3.encode()) | |||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") | |||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") | |||
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") | |||
self.assertEqual("", str3.encode("ascii", "ignore")) | |||
self.assertEqual("", str3.encode(errors="ignore")) | |||
if getdefaultencoding() == "ascii": | |||
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") | |||
elif getdefaultencoding() == "utf-8": | |||
self.assertEqual(actual, str3.encode(errors="strict")) | |||
self.assertEqual(b"", str3.encode("ascii", "ignore")) | |||
if getdefaultencoding() == "ascii": | |||
self.assertEqual(b"", str3.encode(errors="ignore")) | |||
elif getdefaultencoding() == "utf-8": | |||
self.assertEqual(actual, str3.encode(errors="ignore")) | |||
self.assertTrue(str1.endswith("ing")) | |||
self.assertFalse(str1.endswith("ingh")) | |||
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase): | |||
actual = [" this is a sentence with", "", "whitespace", ""] | |||
self.assertEqual(actual, str25.rsplit(" ", 3)) | |||
if py3k: | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||
self.assertEqual("fake string", str1.rstrip()) | |||
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase): | |||
actual = ["", "", "", "this is a sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(" ", 3)) | |||
if py3k: | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | |||
@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase): | |||
self.assertEqual("Token()", repr(token1)) | |||
if py3k: | |||
token2repr = "Token(foo='bar', baz=123)" | |||
token2repr1 = "Token(foo='bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo='bar')" | |||
token3repr = "Text(text='" + hundredchars + "')" | |||
else: | |||
token2repr = "Token(foo=u'bar', baz=123)" | |||
token2repr1 = "Token(foo=u'bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo=u'bar')" | |||
token3repr = "Text(text=u'" + hundredchars + "')" | |||
self.assertEqual(token2repr, repr(token2)) | |||
token2repr = repr(token2) | |||
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) | |||
self.assertEqual(token3repr, repr(token3)) | |||
def test_equality(self): | |||
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase): | |||
self.assertEqual(token2, token1) | |||
self.assertEqual(token4, token5) | |||
self.assertEqual(token5, token4) | |||
self.assertNotEquals(token1, token3) | |||
self.assertNotEquals(token2, token3) | |||
self.assertNotEquals(token4, token6) | |||
self.assertNotEquals(token5, token6) | |||
self.assertNotEqual(token1, token3) | |||
self.assertNotEqual(token2, token3) | |||
self.assertNotEqual(token4, token6) | |||
self.assertNotEqual(token5, token6) | |||
def test_repr_equality(self): | |||
"check that eval(repr(token)) == token" | |||