|
|
@@ -42,8 +42,8 @@ class TokenizerTestCase(object): |
|
|
|
directory. |
|
|
|
""" |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def _build_test_method(cls, funcname, data): |
|
|
|
@staticmethod |
|
|
|
def _build_test_method(funcname, data): |
|
|
|
"""Create and return a method to be treated as a test case method. |
|
|
|
|
|
|
|
*data* is a dict containing multiple keys: the *input* text to be |
|
|
@@ -58,13 +58,35 @@ class TokenizerTestCase(object): |
|
|
|
expected = data["output"] |
|
|
|
actual = self.tokenizer().tokenize(data["input"]) |
|
|
|
self.assertEqual(expected, actual) |
|
|
|
|
|
|
|
if not py3k: |
|
|
|
inner.__name__ = funcname.encode("utf8") |
|
|
|
inner.__doc__ = data["label"] |
|
|
|
return inner |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def _parse_test(test, data): |
|
|
|
"""Parse an individual *test*, storing its info in *data*.""" |
|
|
|
for line in test.strip().splitlines(): |
|
|
|
if line.startswith("name:"): |
|
|
|
data["name"] = line[len("name:"):].strip() |
|
|
|
elif line.startswith("label:"): |
|
|
|
data["label"] = line[len("label:"):].strip() |
|
|
|
elif line.startswith("input:"): |
|
|
|
raw = line[len("input:"):].strip() |
|
|
|
if raw[0] == '"' and raw[-1] == '"': |
|
|
|
raw = raw[1:-1] |
|
|
|
raw = raw.encode("raw_unicode_escape") |
|
|
|
data["input"] = raw.decode("unicode_escape") |
|
|
|
elif line.startswith("output:"): |
|
|
|
raw = line[len("output:"):].strip() |
|
|
|
try: |
|
|
|
data["output"] = eval(raw, vars(tokens)) |
|
|
|
except Exception as err: |
|
|
|
raise _TestParseError(err) |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def _load_tests(cls, filename, name, text): |
|
|
|
def _load_tests(cls, filename, name, text, restrict=None): |
|
|
|
"""Load all tests in *text* from the file *filename*.""" |
|
|
|
tests = text.split("\n---\n") |
|
|
|
counter = 1 |
|
|
@@ -72,23 +94,7 @@ class TokenizerTestCase(object): |
|
|
|
for test in tests: |
|
|
|
data = {"name": None, "label": None, "input": None, "output": None} |
|
|
|
try: |
|
|
|
for line in test.strip().splitlines(): |
|
|
|
if line.startswith("name:"): |
|
|
|
data["name"] = line[len("name:"):].strip() |
|
|
|
elif line.startswith("label:"): |
|
|
|
data["label"] = line[len("label:"):].strip() |
|
|
|
elif line.startswith("input:"): |
|
|
|
raw = line[len("input:"):].strip() |
|
|
|
if raw[0] == '"' and raw[-1] == '"': |
|
|
|
raw = raw[1:-1] |
|
|
|
raw = raw.encode("raw_unicode_escape") |
|
|
|
data["input"] = raw.decode("unicode_escape") |
|
|
|
elif line.startswith("output:"): |
|
|
|
raw = line[len("output:"):].strip() |
|
|
|
try: |
|
|
|
data["output"] = eval(raw, vars(tokens)) |
|
|
|
except Exception as err: |
|
|
|
raise _TestParseError(err) |
|
|
|
cls._parse_test(test, data) |
|
|
|
except _TestParseError as err: |
|
|
|
if data["name"]: |
|
|
|
error = "Could not parse test '{0}' in '{1}':\n\t{2}" |
|
|
@@ -97,6 +103,7 @@ class TokenizerTestCase(object): |
|
|
|
error = "Could not parse a test in '{0}':\n\t{1}" |
|
|
|
print(error.format(filename, err)) |
|
|
|
continue |
|
|
|
|
|
|
|
if not data["name"]: |
|
|
|
error = "A test in '{0}' was ignored because it lacked a name" |
|
|
|
print(error.format(filename)) |
|
|
@@ -105,27 +112,35 @@ class TokenizerTestCase(object): |
|
|
|
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" |
|
|
|
print(error.format(data["name"], filename)) |
|
|
|
continue |
|
|
|
|
|
|
|
number = str(counter).zfill(digits) |
|
|
|
counter += 1 |
|
|
|
if restrict and data["name"] != restrict: |
|
|
|
continue |
|
|
|
|
|
|
|
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) |
|
|
|
meth = cls._build_test_method(fname, data) |
|
|
|
setattr(cls, fname, meth) |
|
|
|
counter += 1 |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def build(cls): |
|
|
|
"""Load and install all tests from the 'tokenizer' directory.""" |
|
|
|
def load_file(filename): |
|
|
|
def load_file(filename, restrict=None): |
|
|
|
with codecs.open(filename, "rU", encoding="utf8") as fp: |
|
|
|
text = fp.read() |
|
|
|
name = path.split(filename)[1][:0-len(extension)] |
|
|
|
cls._load_tests(filename, name, text) |
|
|
|
name = path.split(filename)[1][:-len(extension)] |
|
|
|
cls._load_tests(filename, name, text, restrict) |
|
|
|
|
|
|
|
directory = path.join(path.dirname(__file__), "tokenizer") |
|
|
|
extension = ".mwtest" |
|
|
|
if len(sys.argv) > 2 and sys.argv[1] == "--use": |
|
|
|
for name in sys.argv[2:]: |
|
|
|
load_file(path.join(directory, name + extension)) |
|
|
|
sys.argv = [sys.argv[0]] # So unittest doesn't try to load these |
|
|
|
if "." in name: |
|
|
|
name, test = name.split(".", 1) |
|
|
|
else: |
|
|
|
test = None |
|
|
|
load_file(path.join(directory, name + extension), test) |
|
|
|
sys.argv = [sys.argv[0]] # So unittest doesn't try to parse this |
|
|
|
cls.skip_others = True |
|
|
|
else: |
|
|
|
for filename in listdir(directory): |
|
|
|