Browse Source

Apply bugfixes so that some tests pass on Python 3.

- Skip CTokenizer tests if CTokenizer is not available.
- TestStringMixin: Don't make assumptions about default encoding.
- Add urllib stuff to mwparserfromhell.compat.
- Fix compat.py's line endings.
- gen.next() -> next(gen)
- assert*Equals() -> assert*Equal()
tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
7f87a1c4b3
7 changed files with 85 additions and 62 deletions
  1. +36
    -33
      mwparserfromhell/compat.py
  2. +1
    -1
      mwparserfromhell/string_mixin.py
  3. +5
    -1
      tests/test_ctokenizer.py
  4. +8
    -6
      tests/test_docs.py
  5. +3
    -3
      tests/test_smart_list.py
  6. +22
    -11
      tests/test_string_mixin.py
  7. +10
    -7
      tests/test_tokens.py

+ 36
- 33
mwparserfromhell/compat.py View File

@@ -1,33 +1,36 @@
# -*- coding: utf-8 -*-
"""
Implements support for both Python 2 and Python 3 by defining common types in
terms of their Python 2/3 variants. For example, :py:class:`str` is set to
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
types are meant to be imported directly from within the parser's modules.
"""
import sys
py3k = sys.version_info[0] == 3
if py3k:
bytes = bytes
str = str
basestring = str
range = range
maxsize = sys.maxsize
import html.entities as htmlentities
from io import StringIO
else:
bytes = str
str = unicode
basestring = basestring
range = xrange
maxsize = sys.maxint
import htmlentitydefs as htmlentities
from StringIO import StringIO
del sys
# -*- coding: utf-8 -*-

"""
Implements support for both Python 2 and Python 3 by defining common types in
terms of their Python 2/3 variants. For example, :py:class:`str` is set to
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
types are meant to be imported directly from within the parser's modules.
"""

import sys

py3k = sys.version_info[0] == 3

if py3k:
bytes = bytes
str = str
basestring = str
range = range
maxsize = sys.maxsize
import html.entities as htmlentities
from io import StringIO
from urllib.parse import urlencode
from urllib.request import urlopen

else:
bytes = str
str = unicode
basestring = basestring
range = xrange
maxsize = sys.maxint
import htmlentitydefs as htmlentities
from StringIO import StringIO
from urllib import urlencode, urlopen

del sys

+ 1
- 1
mwparserfromhell/string_mixin.py View File

@@ -252,8 +252,8 @@ class StringMixIn(object):
return self.__unicode__().lstrip(chars) return self.__unicode__().lstrip(chars)


if py3k: if py3k:
@inheritdoc
@staticmethod @staticmethod
@inheritdoc
def maketrans(self, x, y=None, z=None): def maketrans(self, x, y=None, z=None):
if z is None: if z is None:
if y is None: if y is None:


+ 5
- 1
tests/test_ctokenizer.py View File

@@ -23,10 +23,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import unittest import unittest


from mwparserfromhell.parser._tokenizer import CTokenizer
try:
from mwparserfromhell.parser._tokenizer import CTokenizer
except ImportError:
CTokenizer = None


from _test_tokenizer import TokenizerTestCase from _test_tokenizer import TokenizerTestCase


@unittest.skipUnless(CTokenizer, "C tokenizer not available")
class TestCTokenizer(TokenizerTestCase, unittest.TestCase): class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
"""Test cases for the C tokenizer.""" """Test cases for the C tokenizer."""




+ 8
- 6
tests/test_docs.py View File

@@ -23,10 +23,9 @@
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import json import json
import unittest import unittest
import urllib


import mwparserfromhell import mwparserfromhell
from mwparserfromhell.compat import py3k, str, StringIO
from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen


class TestDocs(unittest.TestCase): class TestDocs(unittest.TestCase):
"""Integration test cases for mwparserfromhell's documentation.""" """Integration test cases for mwparserfromhell's documentation."""
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase):
data = {"action": "query", "prop": "revisions", "rvlimit": 1, data = {"action": "query", "prop": "revisions", "rvlimit": 1,
"rvprop": "content", "format": "json", "titles": title} "rvprop": "content", "format": "json", "titles": title}
try: try:
raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
raw = urlopen(url1, urlencode(data).encode("utf8")).read()
except IOError:
self.skipTest("cannot continue because of unsuccessful web call")
res = json.loads(raw.decode("utf8"))
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
try:
expected = urlopen(url2.format(title)).read().decode("utf8")
except IOError: except IOError:
self.skipTest("cannot continue because of unsuccessful web call") self.skipTest("cannot continue because of unsuccessful web call")
res = json.loads(raw)
text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
actual = mwparserfromhell.parse(text) actual = mwparserfromhell.parse(text)
self.assertEqual(expected, actual) self.assertEqual(expected, actual)




+ 3
- 3
tests/test_smart_list.py View File

@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase):
gen1 = iter(list1) gen1 = iter(list1)
out = [] out = []
for i in range(len(list1)): for i in range(len(list1)):
out.append(gen1.next())
self.assertRaises(StopIteration, gen1.next)
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual([0, 1, 2, 3, "one", "two"], out) self.assertEqual([0, 1, 2, 3, "one", "two"], out)
gen2 = iter(list2) gen2 = iter(list2)
self.assertRaises(StopIteration, gen2.next)
self.assertRaises(StopIteration, next, gen2)


self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
self.assertEqual([], list(reversed(list2))) self.assertEqual([], list(reversed(list2)))


+ 22
- 11
tests/test_string_mixin.py View File

@@ -21,6 +21,7 @@
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals from __future__ import unicode_literals
from sys import getdefaultencoding
from types import GeneratorType from types import GeneratorType
import unittest import unittest


@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase):


out = [] out = []
for i in range(len(str1)): for i in range(len(str1)):
out.append(gen1.next())
self.assertRaises(StopIteration, gen1.next)
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual(expected, out) self.assertEqual(expected, out)
self.assertRaises(StopIteration, gen2.next)
self.assertRaises(StopIteration, next, gen2)


self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
self.assertEqual([], list(reversed(str2))) self.assertEqual([], list(reversed(str2)))
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase):
self.assertEqual("", str2.decode("punycode", "ignore")) self.assertEqual("", str2.decode("punycode", "ignore"))


str3 = _FakeString("𐌲𐌿𐍄") str3 = _FakeString("𐌲𐌿𐍄")
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
self.assertEqual(b"fake string", str1.encode()) self.assertEqual(b"fake string", str1.encode())
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
str3.encode("utf8"))
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
str3.encode(encoding="utf8"))
self.assertRaises(UnicodeEncodeError, str3.encode)
self.assertEqual(actual, str3.encode("utf-8"))
self.assertEqual(actual, str3.encode(encoding="utf-8"))
if getdefaultencoding() == "ascii":
self.assertRaises(UnicodeEncodeError, str3.encode)
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode())
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
self.assertEqual("", str3.encode("ascii", "ignore"))
self.assertEqual("", str3.encode(errors="ignore"))
if getdefaultencoding() == "ascii":
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode(errors="strict"))
self.assertEqual(b"", str3.encode("ascii", "ignore"))
if getdefaultencoding() == "ascii":
self.assertEqual(b"", str3.encode(errors="ignore"))
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode(errors="ignore"))


self.assertTrue(str1.endswith("ing")) self.assertTrue(str1.endswith("ing"))
self.assertFalse(str1.endswith("ingh")) self.assertFalse(str1.endswith("ingh"))
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase):
actual = [" this is a sentence with", "", "whitespace", ""] actual = [" this is a sentence with", "", "whitespace", ""]
self.assertEqual(actual, str25.rsplit(" ", 3)) self.assertEqual(actual, str25.rsplit(" ", 3))
if py3k: if py3k:
actual = [" this is a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual(actual, str25.rsplit(maxsplit=3))


self.assertEqual("fake string", str1.rstrip()) self.assertEqual("fake string", str1.rstrip())
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase):
actual = ["", "", "", "this is a sentence with whitespace "] actual = ["", "", "", "this is a sentence with whitespace "]
self.assertEqual(actual, str25.split(" ", 3)) self.assertEqual(actual, str25.split(" ", 3))
if py3k: if py3k:
actual = ["this", "is", "a", "sentence with whitespace "]
self.assertEqual(actual, str25.split(maxsplit=3)) self.assertEqual(actual, str25.split(maxsplit=3))


str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")


+ 10
- 7
tests/test_tokens.py View File

@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase):


self.assertEqual("Token()", repr(token1)) self.assertEqual("Token()", repr(token1))
if py3k: if py3k:
token2repr = "Token(foo='bar', baz=123)"
token2repr1 = "Token(foo='bar', baz=123)"
token2repr2 = "Token(baz=123, foo='bar')"
token3repr = "Text(text='" + hundredchars + "')" token3repr = "Text(text='" + hundredchars + "')"
else: else:
token2repr = "Token(foo=u'bar', baz=123)"
token2repr1 = "Token(foo=u'bar', baz=123)"
token2repr2 = "Token(baz=123, foo=u'bar')"
token3repr = "Text(text=u'" + hundredchars + "')" token3repr = "Text(text=u'" + hundredchars + "')"
self.assertEqual(token2repr, repr(token2))
token2repr = repr(token2)
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
self.assertEqual(token3repr, repr(token3)) self.assertEqual(token3repr, repr(token3))


def test_equality(self): def test_equality(self):
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase):
self.assertEqual(token2, token1) self.assertEqual(token2, token1)
self.assertEqual(token4, token5) self.assertEqual(token4, token5)
self.assertEqual(token5, token4) self.assertEqual(token5, token4)
self.assertNotEquals(token1, token3)
self.assertNotEquals(token2, token3)
self.assertNotEquals(token4, token6)
self.assertNotEquals(token5, token6)
self.assertNotEqual(token1, token3)
self.assertNotEqual(token2, token3)
self.assertNotEqual(token4, token6)
self.assertNotEqual(token5, token6)


def test_repr_equality(self): def test_repr_equality(self):
"check that eval(repr(token)) == token" "check that eval(repr(token)) == token"


Loading…
Cancel
Save