Browse Source

Apply bugfixes so that some tests pass on Python 3.

- Skip CTokenizer tests if CTokenizer is not available.
- TestStringMixin: Don't make assumptions about default encoding.
- Add urllib stuff to mwparserfromhell.compat.
- Fix compat.py's line endings.
- gen.next() -> next(gen)
- assert*Equals() -> assert*Equal()
tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
7f87a1c4b3
7 changed files with 85 additions and 62 deletions
  1. +36
    -33
      mwparserfromhell/compat.py
  2. +1
    -1
      mwparserfromhell/string_mixin.py
  3. +5
    -1
      tests/test_ctokenizer.py
  4. +8
    -6
      tests/test_docs.py
  5. +3
    -3
      tests/test_smart_list.py
  6. +22
    -11
      tests/test_string_mixin.py
  7. +10
    -7
      tests/test_tokens.py

+ 36
- 33
mwparserfromhell/compat.py View File

@@ -1,33 +1,36 @@
# -*- coding: utf-8 -*-
"""
Implements support for both Python 2 and Python 3 by defining common types in
terms of their Python 2/3 variants. For example, :py:class:`str` is set to
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
types are meant to be imported directly from within the parser's modules.
"""
import sys
py3k = sys.version_info[0] == 3
if py3k:
bytes = bytes
str = str
basestring = str
range = range
maxsize = sys.maxsize
import html.entities as htmlentities
from io import StringIO
else:
bytes = str
str = unicode
basestring = basestring
range = xrange
maxsize = sys.maxint
import htmlentitydefs as htmlentities
from StringIO import StringIO
del sys
# -*- coding: utf-8 -*-

"""
Implements support for both Python 2 and Python 3 by defining common types in
terms of their Python 2/3 variants. For example, :py:class:`str` is set to
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
types are meant to be imported directly from within the parser's modules.
"""

import sys

py3k = sys.version_info[0] == 3

if py3k:
bytes = bytes
str = str
basestring = str
range = range
maxsize = sys.maxsize
import html.entities as htmlentities
from io import StringIO
from urllib.parse import urlencode
from urllib.request import urlopen

else:
bytes = str
str = unicode
basestring = basestring
range = xrange
maxsize = sys.maxint
import htmlentitydefs as htmlentities
from StringIO import StringIO
from urllib import urlencode, urlopen

del sys

+ 1
- 1
mwparserfromhell/string_mixin.py View File

@@ -252,8 +252,8 @@ class StringMixIn(object):
return self.__unicode__().lstrip(chars)

if py3k:
@inheritdoc
@staticmethod
@inheritdoc
def maketrans(self, x, y=None, z=None):
if z is None:
if y is None:


+ 5
- 1
tests/test_ctokenizer.py View File

@@ -23,10 +23,14 @@
from __future__ import unicode_literals
import unittest

from mwparserfromhell.parser._tokenizer import CTokenizer
try:
from mwparserfromhell.parser._tokenizer import CTokenizer
except ImportError:
CTokenizer = None

from _test_tokenizer import TokenizerTestCase

@unittest.skipUnless(CTokenizer, "C tokenizer not available")
class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
"""Test cases for the C tokenizer."""



+ 8
- 6
tests/test_docs.py View File

@@ -23,10 +23,9 @@
from __future__ import print_function, unicode_literals
import json
import unittest
import urllib

import mwparserfromhell
from mwparserfromhell.compat import py3k, str, StringIO
from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen

class TestDocs(unittest.TestCase):
"""Integration test cases for mwparserfromhell's documentation."""
@@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase):
data = {"action": "query", "prop": "revisions", "rvlimit": 1,
"rvprop": "content", "format": "json", "titles": title}
try:
raw = urllib.urlopen(url1, urllib.urlencode(data)).read()
raw = urlopen(url1, urlencode(data).encode("utf8")).read()
except IOError:
self.skipTest("cannot continue because of unsuccessful web call")
res = json.loads(raw.decode("utf8"))
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
try:
expected = urlopen(url2.format(title)).read().decode("utf8")
except IOError:
self.skipTest("cannot continue because of unsuccessful web call")
res = json.loads(raw)
text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
expected = urllib.urlopen(url2.format(title)).read().decode("utf8")
actual = mwparserfromhell.parse(text)
self.assertEqual(expected, actual)



+ 3
- 3
tests/test_smart_list.py View File

@@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase):
gen1 = iter(list1)
out = []
for i in range(len(list1)):
out.append(gen1.next())
self.assertRaises(StopIteration, gen1.next)
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual([0, 1, 2, 3, "one", "two"], out)
gen2 = iter(list2)
self.assertRaises(StopIteration, gen2.next)
self.assertRaises(StopIteration, next, gen2)

self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
self.assertEqual([], list(reversed(list2)))


+ 22
- 11
tests/test_string_mixin.py View File

@@ -21,6 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals
from sys import getdefaultencoding
from types import GeneratorType
import unittest

@@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase):

out = []
for i in range(len(str1)):
out.append(gen1.next())
self.assertRaises(StopIteration, gen1.next)
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual(expected, out)
self.assertRaises(StopIteration, gen2.next)
self.assertRaises(StopIteration, next, gen2)

self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
self.assertEqual([], list(reversed(str2)))
@@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase):
self.assertEqual("", str2.decode("punycode", "ignore"))

str3 = _FakeString("𐌲𐌿𐍄")
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
self.assertEqual(b"fake string", str1.encode())
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
str3.encode("utf8"))
self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84",
str3.encode(encoding="utf8"))
self.assertRaises(UnicodeEncodeError, str3.encode)
self.assertEqual(actual, str3.encode("utf-8"))
self.assertEqual(actual, str3.encode(encoding="utf-8"))
if getdefaultencoding() == "ascii":
self.assertRaises(UnicodeEncodeError, str3.encode)
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode())
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
self.assertEqual("", str3.encode("ascii", "ignore"))
self.assertEqual("", str3.encode(errors="ignore"))
if getdefaultencoding() == "ascii":
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode(errors="strict"))
self.assertEqual(b"", str3.encode("ascii", "ignore"))
if getdefaultencoding() == "ascii":
self.assertEqual(b"", str3.encode(errors="ignore"))
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode(errors="ignore"))

self.assertTrue(str1.endswith("ing"))
self.assertFalse(str1.endswith("ingh"))
@@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase):
actual = [" this is a sentence with", "", "whitespace", ""]
self.assertEqual(actual, str25.rsplit(" ", 3))
if py3k:
actual = [" this is a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.rsplit(maxsplit=3))

self.assertEqual("fake string", str1.rstrip())
@@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase):
actual = ["", "", "", "this is a sentence with whitespace "]
self.assertEqual(actual, str25.split(" ", 3))
if py3k:
actual = ["this", "is", "a", "sentence with whitespace "]
self.assertEqual(actual, str25.split(maxsplit=3))

str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")


+ 10
- 7
tests/test_tokens.py View File

@@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase):

self.assertEqual("Token()", repr(token1))
if py3k:
token2repr = "Token(foo='bar', baz=123)"
token2repr1 = "Token(foo='bar', baz=123)"
token2repr2 = "Token(baz=123, foo='bar')"
token3repr = "Text(text='" + hundredchars + "')"
else:
token2repr = "Token(foo=u'bar', baz=123)"
token2repr1 = "Token(foo=u'bar', baz=123)"
token2repr2 = "Token(baz=123, foo=u'bar')"
token3repr = "Text(text=u'" + hundredchars + "')"
self.assertEqual(token2repr, repr(token2))
token2repr = repr(token2)
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
self.assertEqual(token3repr, repr(token3))

def test_equality(self):
@@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase):
self.assertEqual(token2, token1)
self.assertEqual(token4, token5)
self.assertEqual(token5, token4)
self.assertNotEquals(token1, token3)
self.assertNotEquals(token2, token3)
self.assertNotEquals(token4, token6)
self.assertNotEquals(token5, token6)
self.assertNotEqual(token1, token3)
self.assertNotEqual(token2, token3)
self.assertNotEqual(token4, token6)
self.assertNotEqual(token5, token6)

def test_repr_equality(self):
"check that eval(repr(token)) == token"


Loading…
Cancel
Save