From efc571c5c0e18782f2514b39b9bf351c19fafce4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 11 Jun 2015 21:45:34 -0400 Subject: [PATCH] Refactor _test_tokenizer; add syntax for running just one test. --- tests/_test_tokenizer.py | 67 +++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 1cbbc3d..cacf166 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -42,8 +42,8 @@ class TokenizerTestCase(object): directory. """ - @classmethod - def _build_test_method(cls, funcname, data): + @staticmethod + def _build_test_method(funcname, data): """Create and return a method to be treated as a test case method. *data* is a dict containing multiple keys: the *input* text to be @@ -58,13 +58,35 @@ class TokenizerTestCase(object): expected = data["output"] actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(expected, actual) + if not py3k: inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] return inner + @staticmethod + def _parse_test(test, data): + """Parse an individual *test*, storing its info in *data*.""" + for line in test.strip().splitlines(): + if line.startswith("name:"): + data["name"] = line[len("name:"):].strip() + elif line.startswith("label:"): + data["label"] = line[len("label:"):].strip() + elif line.startswith("input:"): + raw = line[len("input:"):].strip() + if raw[0] == '"' and raw[-1] == '"': + raw = raw[1:-1] + raw = raw.encode("raw_unicode_escape") + data["input"] = raw.decode("unicode_escape") + elif line.startswith("output:"): + raw = line[len("output:"):].strip() + try: + data["output"] = eval(raw, vars(tokens)) + except Exception as err: + raise _TestParseError(err) + @classmethod - def _load_tests(cls, filename, name, text): + def _load_tests(cls, filename, name, text, restrict=None): """Load all tests in *text* from the file *filename*.""" tests = text.split("\n---\n") counter = 1 @@ -72,23 +94,7 @@ class TokenizerTestCase(object): for test in tests: data = {"name": None, "label": None, "input": None, "output": None} try: - for line in test.strip().splitlines(): - if line.startswith("name:"): - data["name"] = line[len("name:"):].strip() - elif line.startswith("label:"): - data["label"] = line[len("label:"):].strip() - elif line.startswith("input:"): - raw = line[len("input:"):].strip() - if raw[0] == '"' and raw[-1] == '"': - raw = raw[1:-1] - raw = raw.encode("raw_unicode_escape") - data["input"] = raw.decode("unicode_escape") - elif line.startswith("output:"): - raw = line[len("output:"):].strip() - try: - data["output"] = eval(raw, vars(tokens)) - except Exception as err: - raise _TestParseError(err) + cls._parse_test(test, data) except _TestParseError as err: if data["name"]: error = "Could not parse test '{0}' in '{1}':\n\t{2}" @@ -97,6 +103,7 @@ class TokenizerTestCase(object): error = "Could not parse a test in '{0}':\n\t{1}" print(error.format(filename, err)) continue + if not data["name"]: error = "A test in '{0}' was ignored because it lacked a name" print(error.format(filename)) @@ -105,27 +112,35 @@ class TokenizerTestCase(object): error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue + number = str(counter).zfill(digits) + counter += 1 + if restrict and data["name"] != restrict: + continue + fname = "test_{0}{1}_{2}".format(name, number, data["name"]) meth = cls._build_test_method(fname, data) setattr(cls, fname, meth) - counter += 1 @classmethod def build(cls): """Load and install all tests from the 'tokenizer' directory.""" - def load_file(filename): + def load_file(filename, restrict=None): with codecs.open(filename, "rU", encoding="utf8") as fp: text = fp.read() - name = path.split(filename)[1][:0-len(extension)] - cls._load_tests(filename, name, text) + name = path.split(filename)[1][:-len(extension)] + cls._load_tests(filename, name, text, restrict) directory = path.join(path.dirname(__file__), "tokenizer") extension = ".mwtest" if len(sys.argv) > 2 and sys.argv[1] == "--use": for name in sys.argv[2:]: - load_file(path.join(directory, name + extension)) - sys.argv = [sys.argv[0]] # So unittest doesn't try to load these + if "." in name: + name, test = name.split(".", 1) + else: + test = None + load_file(path.join(directory, name + extension), test) + sys.argv = [sys.argv[0]] # So unittest doesn't try to parse this cls.skip_others = True else: for filename in listdir(directory):