Browse Source

Built an infrastructure for loading and running tokenizer tests.

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
4636fbeb4a
4 changed files with 84 additions and 9 deletions
  1. +69
    -5
      tests/_test_tokenizer.py
  2. +2
    -2
      tests/test_ctokenizer.py
  3. +2
    -2
      tests/test_pytokenizer.py
  4. +11
    -0
      tests/tokenizer/text.test

+ 69
- 5
tests/_test_tokenizer.py View File

@@ -20,9 +20,73 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

class TestTokenizer():
def tokenize(self, text):
return self.tokenizer().tokenize(text)
from __future__ import print_function, unicode_literals
from os import listdir, path

def test_basic(self):
self.assertEqual(1, 1)
from mwparserfromhell.parser import tokens

class _TestParseError(Exception):
"""Raised internally when a test could not be parsed."""
pass


class TokenizerTestCase(object):
@classmethod
def _build_test_method(cls, funcname, data):
def inner(self):
actual = self.tokenizer().tokenize(data["input"])
self.assertEqual(actual, data["output"])
inner.__name__ = funcname.encode("utf8")
inner.__doc__ = data["label"]
return inner

@classmethod
def _load_tests(cls, filename, text):
tests = text.split("\n---\n")
for test in tests:
data = {"name": "", "label": "", "input": "", "output": []}
try:
for line in test.strip().splitlines():
if line.startswith("name:"):
data["name"] = line[len("name:"):].strip()
elif line.startswith("label:"):
data["label"] = line[len("label:"):].strip()
elif line.startswith("input:"):
raw = line[len("input:"):].strip()
if raw[0] == '"' and raw[-1] == '"':
raw = raw[1:-1]
data["input"] = raw.decode("unicode_escape")
elif line.startswith("output:"):
raw = line[len("output:"):].strip()
data["output"] = eval(raw, vars(tokens))
except _TestParseError:
if data["name"]:
error = "Could not parse test {0} in {1}"
print(error.format(data["name"], filename))
else:
print("Could not parse a test in {0}".format(filename))
continue
if not data["name"]:
error = "A test in {0} was ignored because it lacked a name"
print(error.format(filename))
continue
if not data["input"] or not data["output"]:
error = "Test {0} in {1} was ignored because it lacked an input or an output"
print(error.format(data["name"], filename))
continue
funcname = "test_" + filename + "_" + data["name"]
meth = cls._build_test_method(funcname, data)
setattr(cls, funcname, meth)

@classmethod
def build(cls):
directory = path.join(path.dirname(__file__), "tokenizer")
extension = ".test"
for filename in listdir(directory):
if not filename.endswith(extension):
continue
with open(path.join(directory, filename), "r") as fp:
text = fp.read().decode("utf8")
cls._load_tests(filename[:0-len(extension)], text)

TokenizerTestCase.build()

+ 2
- 2
tests/test_ctokenizer.py View File

@@ -22,9 +22,9 @@

import unittest

from _test_tokenizer import TestTokenizer
from _test_tokenizer import TokenizerTestCase

class TestCTokenizer(unittest.TestCase, TestTokenizer):
class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
@classmethod
def setUpClass(cls):
from mwparserfromhell.parser._tokenizer import CTokenizer


+ 2
- 2
tests/test_pytokenizer.py View File

@@ -22,9 +22,9 @@

import unittest

from _test_tokenizer import TestTokenizer
from _test_tokenizer import TokenizerTestCase

class TestPyTokenizer(unittest.TestCase, TestTokenizer):
class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
@classmethod
def setUpClass(cls):
from mwparserfromhell.parser.tokenizer import Tokenizer


+ 11
- 0
tests/tokenizer/text.test View File

@@ -0,0 +1,11 @@
name: basic
label: sanity check for basic text parsing, no gimmicks
input: "foobar"
output: [Text(text="foobar")]

---

name: basic2
label: slightly more complex text parsing, with newlines
input: "This is a line of text.\nThis is another line of text."
output: [Text(text="This is a line of text.\nThis is another line of text.")]

Loading…
Cancel
Save