From ecfb2c628f742c7c703fe67e8a0f7b5a51d62570 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 14:16:17 -0500 Subject: [PATCH] Another test; handle errors when reading output line better. --- tests/_test_tokenizer.py | 16 ++++++++++------ tests/tokenizer/text.test | 7 +++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 98d9434..bafb593 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -58,23 +58,27 @@ class TokenizerTestCase(object): raw = line[len("input:"):].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] - data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape") + raw = raw.encode("raw_unicode_escape") + data["input"] = raw.decode("unicode_escape") elif line.startswith("output:"): raw = line[len("output:"):].strip() - data["output"] = eval(raw, vars(tokens)) + try: + data["output"] = eval(raw, vars(tokens)) + except Exception: + raise _TestParseError() except _TestParseError: if data["name"]: - error = "Could not parse test {0} in {1}" + error = "Could not parse test '{0}' in '{1}'" print(error.format(data["name"], filename)) else: - print("Could not parse a test in {0}".format(filename)) + print("Could not parse a test in '{0}'".format(filename)) continue if not data["name"]: - error = "A test in {0} was ignored because it lacked a name" + error = "A test in '{0}' was ignored because it lacked a name" print(error.format(filename)) continue if not data["input"] or not data["output"]: - error = "Test {0} in {1} was ignored because it lacked an input or an output" + error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test index eb5b9b4..77d5f50 100644 --- a/tests/tokenizer/text.test +++ b/tests/tokenizer/text.test @@ -16,3 +16,10 @@ name: unicode label: ensure unicode data is handled properly input: "Thís ís å sëñtënce with diœcritiçs." output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] + +--- + +name: unicode2 +label: additional unicode check for non-BMP codepoints +input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" +output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]