Browse Source

Make unit tests work in Python 3; add a unicode text test.

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
357b6dc447
2 changed files with 22 additions and 9 deletions
  1. +12
    -6
      tests/_test_tokenizer.py
  2. +10
    -3
      tests/tokenizer/text.test

+ 12
- 6
tests/_test_tokenizer.py View File

@@ -23,6 +23,7 @@
from __future__ import print_function, unicode_literals
from os import listdir, path

from mwparserfromhell.compat import py3k
from mwparserfromhell.parser import tokens

class _TestParseError(Exception):
@@ -36,12 +37,14 @@ class TokenizerTestCase(object):
def inner(self):
actual = self.tokenizer().tokenize(data["input"])
self.assertEqual(actual, data["output"])
inner.__name__ = funcname.encode("utf8")
if not py3k:
inner.__name__ = funcname.encode("utf8")
inner.__doc__ = data["label"]
return inner

@classmethod
def _load_tests(cls, filename, text):
counter = 1
tests = text.split("\n---\n")
for test in tests:
data = {"name": "", "label": "", "input": "", "output": []}
@@ -55,7 +58,7 @@ class TokenizerTestCase(object):
raw = line[len("input:"):].strip()
if raw[0] == '"' and raw[-1] == '"':
raw = raw[1:-1]
data["input"] = raw.decode("unicode_escape")
data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape")
elif line.startswith("output:"):
raw = line[len("output:"):].strip()
data["output"] = eval(raw, vars(tokens))
@@ -74,9 +77,10 @@ class TokenizerTestCase(object):
error = "Test {0} in {1} was ignored because it lacked an input or an output"
print(error.format(data["name"], filename))
continue
funcname = "test_" + filename + "_" + data["name"]
meth = cls._build_test_method(funcname, data)
setattr(cls, funcname, meth)
fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
meth = cls._build_test_method(fname, data)
setattr(cls, fname, meth)
counter += 1

@classmethod
def build(cls):
@@ -86,7 +90,9 @@ class TokenizerTestCase(object):
if not filename.endswith(extension):
continue
with open(path.join(directory, filename), "r") as fp:
text = fp.read().decode("utf8")
text = fp.read()
if not py3k:
text = text.decode("utf8")
cls._load_tests(filename[:0-len(extension)], text)

TokenizerTestCase.build()

+ 10
- 3
tests/tokenizer/text.test View File

@@ -5,7 +5,14 @@ output: [Text(text="foobar")]

---

name: basic2
name: newlines
label: slightly more complex text parsing, with newlines
input: "This is a line of text.\nThis is another line of text."
output: [Text(text="This is a line of text.\nThis is another line of text.")]
input: "This is a line of text.\nThis is another line of text.\nThis is another."
output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")]

---

name: unicode
label: ensure unicode data is handled properly
input: "Thís ís å sëñtënce with diœcritiçs."
output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")]

Loading…
Cancel
Save