From efc571c5c0e18782f2514b39b9bf351c19fafce4 Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben.kurtovic@gmail.com>
Date: Thu, 11 Jun 2015 21:45:34 -0400
Subject: [PATCH] Refactor _test_tokenizer; add syntax for running just one
 test.

---
 tests/_test_tokenizer.py | 67 +++++++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py
index 1cbbc3d..cacf166 100644
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -42,8 +42,8 @@ class TokenizerTestCase(object):
     directory.
     """
 
-    @classmethod
-    def _build_test_method(cls, funcname, data):
+    @staticmethod
+    def _build_test_method(funcname, data):
         """Create and return a method to be treated as a test case method.
 
         *data* is a dict containing multiple keys: the *input* text to be
@@ -58,13 +58,35 @@ class TokenizerTestCase(object):
                 expected = data["output"]
                 actual = self.tokenizer().tokenize(data["input"])
             self.assertEqual(expected, actual)
+
         if not py3k:
             inner.__name__ = funcname.encode("utf8")
         inner.__doc__ = data["label"]
         return inner
 
+    @staticmethod
+    def _parse_test(test, data):
+        """Parse an individual *test*, storing its info in *data*."""
+        for line in test.strip().splitlines():
+            if line.startswith("name:"):
+                data["name"] = line[len("name:"):].strip()
+            elif line.startswith("label:"):
+                data["label"] = line[len("label:"):].strip()
+            elif line.startswith("input:"):
+                raw = line[len("input:"):].strip()
+                if raw[0] == '"' and raw[-1] == '"':
+                    raw = raw[1:-1]
+                raw = raw.encode("raw_unicode_escape")
+                data["input"] = raw.decode("unicode_escape")
+            elif line.startswith("output:"):
+                raw = line[len("output:"):].strip()
+                try:
+                    data["output"] = eval(raw, vars(tokens))
+                except Exception as err:
+                    raise _TestParseError(err)
+
     @classmethod
-    def _load_tests(cls, filename, name, text):
+    def _load_tests(cls, filename, name, text, restrict=None):
         """Load all tests in *text* from the file *filename*."""
         tests = text.split("\n---\n")
         counter = 1
@@ -72,23 +94,7 @@ class TokenizerTestCase(object):
         for test in tests:
             data = {"name": None, "label": None, "input": None, "output": None}
             try:
-                for line in test.strip().splitlines():
-                    if line.startswith("name:"):
-                        data["name"] = line[len("name:"):].strip()
-                    elif line.startswith("label:"):
-                        data["label"] = line[len("label:"):].strip()
-                    elif line.startswith("input:"):
-                        raw = line[len("input:"):].strip()
-                        if raw[0] == '"' and raw[-1] == '"':
-                            raw = raw[1:-1]
-                        raw = raw.encode("raw_unicode_escape")
-                        data["input"] = raw.decode("unicode_escape")
-                    elif line.startswith("output:"):
-                        raw = line[len("output:"):].strip()
-                        try:
-                            data["output"] = eval(raw, vars(tokens))
-                        except Exception as err:
-                            raise _TestParseError(err)
+                cls._parse_test(test, data)
             except _TestParseError as err:
                 if data["name"]:
                     error = "Could not parse test '{0}' in '{1}':\n\t{2}"
@@ -97,6 +103,7 @@ class TokenizerTestCase(object):
                     error = "Could not parse a test in '{0}':\n\t{1}"
                     print(error.format(filename, err))
                 continue
+
             if not data["name"]:
                 error = "A test in '{0}' was ignored because it lacked a name"
                 print(error.format(filename))
@@ -105,27 +112,35 @@ class TokenizerTestCase(object):
                 error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
                 print(error.format(data["name"], filename))
                 continue
+
             number = str(counter).zfill(digits)
+            counter += 1
+            if restrict and data["name"] != restrict:
+                continue
+
             fname = "test_{0}{1}_{2}".format(name, number, data["name"])
             meth = cls._build_test_method(fname, data)
             setattr(cls, fname, meth)
-            counter += 1
 
     @classmethod
     def build(cls):
         """Load and install all tests from the 'tokenizer' directory."""
-        def load_file(filename):
+        def load_file(filename, restrict=None):
             with codecs.open(filename, "rU", encoding="utf8") as fp:
                 text = fp.read()
-                name = path.split(filename)[1][:0-len(extension)]
-                cls._load_tests(filename, name, text)
+                name = path.split(filename)[1][:-len(extension)]
+                cls._load_tests(filename, name, text, restrict)
 
         directory = path.join(path.dirname(__file__), "tokenizer")
         extension = ".mwtest"
         if len(sys.argv) > 2 and sys.argv[1] == "--use":
             for name in sys.argv[2:]:
-                load_file(path.join(directory, name + extension))
-            sys.argv = [sys.argv[0]]  # So unittest doesn't try to load these
+                if "." in name:
+                    name, test = name.split(".", 1)
+                else:
+                    test = None
+                load_file(path.join(directory, name + extension), test)
+            sys.argv = [sys.argv[0]]  # So unittest doesn't try to parse this
             cls.skip_others = True
         else:
             for filename in listdir(directory):