From ecfb2c628f742c7c703fe67e8a0f7b5a51d62570 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@verizon.net>
Date: Sun, 3 Feb 2013 14:16:17 -0500
Subject: [PATCH] Another test; handle errors when reading output line better.

---
 tests/_test_tokenizer.py  | 16 ++++++++++------
 tests/tokenizer/text.test |  7 +++++++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 98d9434..bafb593 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -58,23 +58,27 @@ class TokenizerTestCase(object):
                         raw = line[len("input:"):].strip()
                         if raw[0] == '"' and raw[-1] == '"':
                             raw = raw[1:-1]
-                        data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape")
+                        raw = raw.encode("raw_unicode_escape")
+                        data["input"] = raw.decode("unicode_escape")
                     elif line.startswith("output:"):
                         raw = line[len("output:"):].strip()
-                        data["output"] = eval(raw, vars(tokens))
+                        try:
+                            data["output"] = eval(raw, vars(tokens))
+                        except Exception:
+                            raise _TestParseError()
             except _TestParseError:
                 if data["name"]:
-                    error = "Could not parse test {0} in {1}"
+                    error = "Could not parse test '{0}' in '{1}'"
                     print(error.format(data["name"], filename))
                 else:
-                    print("Could not parse a test in {0}".format(filename))
+                    print("Could not parse a test in '{0}'".format(filename))
                 continue
             if not data["name"]:
-                error = "A test in {0} was ignored because it lacked a name"
+                error = "A test in '{0}' was ignored because it lacked a name"
                 print(error.format(filename))
                 continue
             if not data["input"] or not data["output"]:
-                error = "Test {0} in {1} was ignored because it lacked an input or an output"
+                error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output"
                 print(error.format(data["name"], filename))
                 continue
             fname = "test_{0}{1}_{2}".format(filename, counter, data["name"])
diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test
index eb5b9b4..77d5f50 100644
--- a/tests/tokenizer/text.test
+++ b/tests/tokenizer/text.test
@@ -16,3 +16,10 @@ name:   unicode
 label:  ensure unicode data is handled properly
 input:  "Thís ís å sëñtënce with diœcritiçs."
 output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")]
+
+---
+
+name:   unicode2
+label:  additional unicode check for non-BMP codepoints
+input:  "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰"
+output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]