From 28b124a96c8685c8c94eb51d8d29d508e8fe198c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 2 Feb 2013 23:10:58 -0500 Subject: [PATCH 01/67] Restarting work on unit tests with a test covering doc examples. --- README.rst | 4 +- mwparserfromhell/compat.py | 2 + tests/test_docs.py | 117 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_parameter.py | 119 --------------------------------------------- tests/test_parser.py | 63 ------------------------ tests/test_template.py | 106 ---------------------------------------- 6 files changed, 122 insertions(+), 289 deletions(-) create mode 100644 tests/test_docs.py delete mode 100644 tests/test_parameter.py delete mode 100644 tests/test_parser.py delete mode 100644 tests/test_template.py diff --git a/README.rst b/README.rst index 77f12c7..3901103 100644 --- a/README.rst +++ b/README.rst @@ -124,7 +124,9 @@ following code (via the API_):: import mwparserfromhell API_URL = "http://en.wikipedia.org/w/api.php" def parse(title): - raw = urllib.urlopen(API_URL, data).read() + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] return mwparserfromhell.parse(text) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index a1b6b8f..576c2c5 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -18,6 +18,7 @@ if py3k: basestring = str maxsize = sys.maxsize import html.entities as htmlentities + from io import StringIO else: bytes = str @@ -25,5 +26,6 @@ else: basestring = basestring maxsize = sys.maxint import htmlentitydefs as htmlentities + from StringIO import StringIO del sys diff --git a/tests/test_docs.py b/tests/test_docs.py new file mode 100644 index 0000000..5ec25e1 --- /dev/null +++ b/tests/test_docs.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import print_function, unicode_literals +import json +import unittest +import urllib + +import mwparserfromhell +from mwparserfromhell.compat import py3k, str, StringIO + +class TestDocs(unittest.TestCase): + def assertPrint(self, input, output): + """Assertion check that *input*, when printed, produces *output*.""" + buff = StringIO() + print(input, end="", file=buff) + buff.seek(0) + self.assertEqual(buff.read(), output) + + def test_readme_1(self): + text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" + wikicode = mwparserfromhell.parse(text) + self.assertPrint(wikicode, + "I has a template! {{foo|bar|baz|eggs=spam}} See it?") + templates = wikicode.filter_templates() + if py3k: + self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") + else: + self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") + template = templates[0] + self.assertPrint(template.name, "foo") + if py3k: + self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") + else: + self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") + self.assertPrint(template.get(1).value, "bar") + self.assertPrint(template.get("eggs").value, "spam") + + def test_readme_2(self): + code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") + if py3k: + self.assertPrint(code.filter_templates(), + "['{{foo|this {{includes a|template}}}}']") + else: + self.assertPrint(code.filter_templates(), + "[u'{{foo|this {{includes a|template}}}}']") + foo = code.filter_templates()[0] + self.assertPrint(foo.get(1).value, "this {{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0], + "{{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value, + "template") + + def test_readme_3(self): + text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" + temps = mwparserfromhell.parse(text).filter_templates(recursive=True) + if py3k: + res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" + else: + res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" + self.assertPrint(temps, res) + + def test_readme_4(self): + text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" + code = mwparserfromhell.parse(text) + for template in code.filter_templates(): + if template.name == "cleanup" and not template.has_param("date"): + template.add("date", "July 2012") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" + self.assertPrint(code, res) + code.replace("{{uncategorized}}", "{{bar-stub}}") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(code, res) + if py3k: + res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" + else: + res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" + self.assertPrint(code.filter_templates(), res) + text = str(code) + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(text, res) + self.assertEqual(text, code) + + def test_readme_5(self): + url1 = "http://en.wikipedia.org/w/api.php" + url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" + title = "Test" + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + res = json.loads(raw) + text = res["query"]["pages"].values()[0]["revisions"][0]["*"] + actual = mwparserfromhell.parse(text) + expected = urllib.urlopen(url2.format(title)).read().decode("utf8") + self.assertEqual(actual, expected) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_parameter.py b/tests/test_parameter.py deleted file mode 100644 index 2d5515b..0000000 --- a/tests/test_parameter.py +++ /dev/null @@ -1,119 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template - -class TestParameter(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.value1 = "bar" - self.value2 = "{{spam}}" - self.value3 = "bar{{spam}}" - self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here" - self.templates2 = [Template("spam")] - self.templates3 = [Template("spam")] - self.templates4 = [Template("eggs", [Parameter("1", "spam"), - Parameter("baz", "buz")]), - Template("goes")] - - def test_construct(self): - Parameter(self.name, self.value1) - Parameter(self.name, self.value2, self.templates2) - Parameter(name=self.name, value=self.value3) - Parameter(name=self.name, value=self.value4, templates=self.templates4) - - def test_name(self): - params = [ - Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(name=self.name, value=self.value3), - Parameter(name=self.name, value=self.value4, - templates=self.templates4) - ] - for param in params: - self.assertEqual(param.name, self.name) - - def test_value(self): - tests = [ - (Parameter(self.name, self.value1), self.value1), - (Parameter(self.name, self.value2, self.templates2), self.value2), - (Parameter(name=self.name, value=self.value3), self.value3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.value4) - ] - for param, correct in tests: - self.assertEqual(param.value, correct) - - def test_templates(self): - tests = [ - (Parameter(self.name, self.value3, self.templates3), - self.templates3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.templates4) - ] - for param, correct in tests: - self.assertEqual(param.templates, correct) - - def test_magic(self): - params = [Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(self.name, self.value3, self.templates3), - Parameter(self.name, self.value4, self.templates4)] - for param in params: - self.assertEqual(repr(param), repr(param.value)) - self.assertEqual(str(param), str(param.value)) - self.assertIs(param < "eggs", param.value < "eggs") - self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}") - self.assertIs(param == "bar", param.value == "bar") - self.assertIs(param != "bar", param.value != "bar") - self.assertIs(param > "eggs", param.value > "eggs") - self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}") - self.assertEquals(bool(param), bool(param.value)) - self.assertEquals(len(param), len(param.value)) - self.assertEquals(list(param), list(param.value)) - self.assertEquals(param[2], param.value[2]) - self.assertEquals(list(reversed(param)), - list(reversed(param.value))) - self.assertIs("bar" in param, "bar" in param.value) - self.assertEquals(param + "test", param.value + "test") - self.assertEquals("test" + param, "test" + param.value) - # add param - # add template left - # add template right - - self.assertEquals(param * 3, Parameter(param.name, param.value * 3, - param.templates * 3)) - self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, - 3 * param.templates)) - - # add param inplace - # add template implace - # add str inplace - # multiply int inplace - self.assertIsInstance(param, Parameter) - self.assertIsInstance(param.value, str) - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py deleted file mode 100644 index 0c989b8..0000000 --- a/tests/test_parser.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.parser import Parser -from mwparserfromhell.template import Template - -TESTS = [ - ("", []), - ("abcdef ghijhk", []), - ("abc{this is not a template}def", []), - ("neither is {{this one}nor} {this one {despite}} containing braces", []), - ("this is an acceptable {{template}}", [Template("template")]), - ("{{multiple}}{{templates}}", [Template("multiple"), - Template("templates")]), - ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), - ("{{{no templates here}}}", []), - ("{ {{templates here}}}", [Template("templates here")]), - ("{{{{I do not exist}}}}", []), - ("{{foo|bar|baz|eggs=spam}}", - [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), - Parameter("eggs", "spam")])]), - ("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}", - [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), - Parameter("2", "pqr"), Parameter("st", "uv"), - Parameter("3", "wx"), Parameter("4", "yz")])]), - ("{{this has a|{{template}}|inside of it}}", - [Template("this has a", [Parameter("1", "{{template}}", - [Template("template")]), - Parameter("2", "inside of it")])]), - ("{{{{I exist}} }}", [Template("I exist", [] )]), - ("{{}}") -] - -class TestParser(unittest.TestCase): - def test_parse(self): - parser = Parser() - for unparsed, parsed in TESTS: - self.assertEqual(parser.parse(unparsed), parsed) - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py deleted file mode 100644 index b006033..0000000 --- a/tests/test_template.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from itertools import permutations -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template - -class TestTemplate(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.bar = Parameter("1", "bar") - self.baz = Parameter("2", "baz") - self.eggs = Parameter("eggs", "spam") - self.params = [self.bar, self.baz, self.eggs] - - def test_construct(self): - Template(self.name) - Template(self.name, self.params) - Template(name=self.name) - Template(name=self.name, params=self.params) - - def test_name(self): - templates = [ - Template(self.name), - Template(self.name, self.params), - Template(name=self.name), - Template(name=self.name, params=self.params) - ] - for template in templates: - self.assertEqual(template.name, self.name) - - def test_params(self): - for template in (Template(self.name), Template(name=self.name)): - self.assertEqual(template.params, []) - for template in (Template(self.name, self.params), - Template(name=self.name, params=self.params)): - self.assertEqual(template.params, self.params) - - def test_getitem(self): - template = Template(name=self.name, params=self.params) - self.assertIs(template[0], self.bar) - self.assertIs(template[1], self.baz) - self.assertIs(template[2], self.eggs) - self.assertIs(template["1"], self.bar) - self.assertIs(template["2"], self.baz) - self.assertIs(template["eggs"], self.eggs) - - def test_render(self): - tests = [ - (Template(self.name), "{{foo}}"), - (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") - ] - for template, rendered in tests: - self.assertEqual(template.render(), rendered) - - def test_repr(self): - correct1= 'Template(name=foo, params={})' - correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})' - tests = [(Template(self.name), correct1), - (Template(self.name, self.params), correct2)] - for template, correct in tests: - self.assertEqual(repr(template), correct) - self.assertEqual(str(template), correct) - - def test_cmp(self): - tmp1 = Template(self.name) - tmp2 = Template(name=self.name) - tmp3 = Template(self.name, []) - tmp4 = Template(name=self.name, params=[]) - tmp5 = Template(self.name, self.params) - tmp6 = Template(name=self.name, params=self.params) - - for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA, tmpB in permutations((tmp5, tmp6), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA in (tmp5, tmp6): - for tmpB in (tmp1, tmp2, tmp3, tmp4): - self.assertNotEqual(tmpA, tmpB) - self.assertNotEqual(tmpB, tmpA) - -if __name__ == "__main__": - unittest.main(verbosity=2) From fb7567d6d0e7974beac39780c741fba3e50693b9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 2 Feb 2013 23:44:15 -0500 Subject: [PATCH 02/67] Some empty testcases. --- .gitignore | 1 + tests/_test_tokenizer.py | 28 ++++++++++++++++++++++++++++ tests/test_builder.py | 29 +++++++++++++++++++++++++++++ tests/test_ctokenizer.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_parser.py | 29 +++++++++++++++++++++++++++++ tests/test_pytokenizer.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_tokens.py | 29 +++++++++++++++++++++++++++++ 7 files changed, 184 insertions(+) create mode 100644 tests/_test_tokenizer.py create mode 100644 tests/test_builder.py create mode 100644 tests/test_ctokenizer.py create mode 100644 tests/test_parser.py create mode 100644 tests/test_pytokenizer.py create mode 100644 tests/test_tokens.py diff --git a/.gitignore b/.gitignore index d70b37d..ec4e8ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc +*.so *.egg *.egg-info .DS_Store diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py new file mode 100644 index 0000000..29f4e37 --- /dev/null +++ b/tests/_test_tokenizer.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +class TestTokenizer(): + def tokenize(self, text): + return self.tokenizer().tokenize(text) + + def test_basic(self): + self.assertEqual(1, 1) diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..e38e683 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +class TestBuilder(unittest.TestCase): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py new file mode 100644 index 0000000..e5a7aef --- /dev/null +++ b/tests/test_ctokenizer.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from _test_tokenizer import TestTokenizer + +class TestCTokenizer(unittest.TestCase, TestTokenizer): + @classmethod + def setUpClass(cls): + from mwparserfromhell.parser._tokenizer import CTokenizer + cls.tokenizer = CTokenizer + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..3f9b2e6 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +class TestParser(unittest.TestCase): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py new file mode 100644 index 0000000..01855f7 --- /dev/null +++ b/tests/test_pytokenizer.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from _test_tokenizer import TestTokenizer + +class TestPyTokenizer(unittest.TestCase, TestTokenizer): + @classmethod + def setUpClass(cls): + from mwparserfromhell.parser.tokenizer import Tokenizer + cls.tokenizer = Tokenizer + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py new file mode 100644 index 0000000..0e7f87b --- /dev/null +++ b/tests/test_tokens.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +class TestTokens(unittest.TestCase): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) From 4636fbeb4a46e76b5d04a9c439758ed042eea7eb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 02:10:36 -0500 Subject: [PATCH 03/67] Built an infrastructure for loading and running tokenizer tests. --- tests/_test_tokenizer.py | 74 +++++++++++++++++++++++++++++++++++++++++++---- tests/test_ctokenizer.py | 4 +-- tests/test_pytokenizer.py | 4 +-- tests/tokenizer/text.test | 11 +++++++ 4 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 tests/tokenizer/text.test diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 29f4e37..1efafd9 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -20,9 +20,73 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -class TestTokenizer(): - def tokenize(self, text): - return self.tokenizer().tokenize(text) +from __future__ import print_function, unicode_literals +from os import listdir, path - def test_basic(self): - self.assertEqual(1, 1) +from mwparserfromhell.parser import tokens + +class _TestParseError(Exception): + """Raised internally when a test could not be parsed.""" + pass + + +class TokenizerTestCase(object): + @classmethod + def _build_test_method(cls, funcname, data): + def inner(self): + actual = self.tokenizer().tokenize(data["input"]) + self.assertEqual(actual, data["output"]) + inner.__name__ = funcname.encode("utf8") + inner.__doc__ = data["label"] + return inner + + @classmethod + def _load_tests(cls, filename, text): + tests = text.split("\n---\n") + for test in tests: + data = {"name": "", "label": "", "input": "", "output": []} + try: + for line in test.strip().splitlines(): + if line.startswith("name:"): + data["name"] = line[len("name:"):].strip() + elif line.startswith("label:"): + data["label"] = line[len("label:"):].strip() + elif line.startswith("input:"): + raw = line[len("input:"):].strip() + if raw[0] == '"' and raw[-1] == '"': + raw = raw[1:-1] + data["input"] = raw.decode("unicode_escape") + elif line.startswith("output:"): + raw = line[len("output:"):].strip() + data["output"] = eval(raw, vars(tokens)) + except _TestParseError: + if data["name"]: + error = "Could not parse test {0} in {1}" + print(error.format(data["name"], filename)) + else: + print("Could not parse a test in {0}".format(filename)) + continue + if not data["name"]: + error = "A test in {0} was ignored because it lacked a name" + print(error.format(filename)) + continue + if not data["input"] or not data["output"]: + error = "Test {0} in {1} was ignored because it lacked an input or an output" + print(error.format(data["name"], filename)) + continue + funcname = "test_" + filename + "_" + data["name"] + meth = cls._build_test_method(funcname, data) + setattr(cls, funcname, meth) + + @classmethod + def build(cls): + directory = path.join(path.dirname(__file__), "tokenizer") + extension = ".test" + for filename in listdir(directory): + if not filename.endswith(extension): + continue + with open(path.join(directory, filename), "r") as fp: + text = fp.read().decode("utf8") + cls._load_tests(filename[:0-len(extension)], text) + +TokenizerTestCase.build() diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index e5a7aef..7d3ffd7 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -22,9 +22,9 @@ import unittest -from _test_tokenizer import TestTokenizer +from _test_tokenizer import TokenizerTestCase -class TestCTokenizer(unittest.TestCase, TestTokenizer): +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 01855f7..f739726 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -22,9 +22,9 @@ import unittest -from _test_tokenizer import TestTokenizer +from _test_tokenizer import TokenizerTestCase -class TestPyTokenizer(unittest.TestCase, TestTokenizer): +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test new file mode 100644 index 0000000..8d97412 --- /dev/null +++ b/tests/tokenizer/text.test @@ -0,0 +1,11 @@ +name: basic +label: sanity check for basic text parsing, no gimmicks +input: "foobar" +output: [Text(text="foobar")] + +--- + +name: basic2 +label: slightly more complex text parsing, with newlines +input: "This is a line of text.\nThis is another line of text." +output: [Text(text="This is a line of text.\nThis is another line of text.")] From 357b6dc4470f724eac6a19bef54b27761e6a492f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 02:33:31 -0500 Subject: [PATCH 04/67] Make unit tests work in Python 3; add a unicode text test. --- tests/_test_tokenizer.py | 18 ++++++++++++------ tests/tokenizer/text.test | 13 ++++++++++--- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 1efafd9..98d9434 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -23,6 +23,7 @@ from __future__ import print_function, unicode_literals from os import listdir, path +from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens class _TestParseError(Exception): @@ -36,12 +37,14 @@ class TokenizerTestCase(object): def inner(self): actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(actual, data["output"]) - inner.__name__ = funcname.encode("utf8") + if not py3k: + inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] return inner @classmethod def _load_tests(cls, filename, text): + counter = 1 tests = text.split("\n---\n") for test in tests: data = {"name": "", "label": "", "input": "", "output": []} @@ -55,7 +58,7 @@ class TokenizerTestCase(object): raw = line[len("input:"):].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] - data["input"] = raw.decode("unicode_escape") + data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape") elif line.startswith("output:"): raw = line[len("output:"):].strip() data["output"] = eval(raw, vars(tokens)) @@ -74,9 +77,10 @@ class TokenizerTestCase(object): error = "Test {0} in {1} was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue - funcname = "test_" + filename + "_" + data["name"] - meth = cls._build_test_method(funcname, data) - setattr(cls, funcname, meth) + fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) + meth = cls._build_test_method(fname, data) + setattr(cls, fname, meth) + counter += 1 @classmethod def build(cls): @@ -86,7 +90,9 @@ class TokenizerTestCase(object): if not filename.endswith(extension): continue with open(path.join(directory, filename), "r") as fp: - text = fp.read().decode("utf8") + text = fp.read() + if not py3k: + text = text.decode("utf8") cls._load_tests(filename[:0-len(extension)], text) TokenizerTestCase.build() diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test index 8d97412..eb5b9b4 100644 --- a/tests/tokenizer/text.test +++ b/tests/tokenizer/text.test @@ -5,7 +5,14 @@ output: [Text(text="foobar")] --- -name: basic2 +name: newlines label: slightly more complex text parsing, with newlines -input: "This is a line of text.\nThis is another line of text." -output: [Text(text="This is a line of text.\nThis is another line of text.")] +input: "This is a line of text.\nThis is another line of text.\nThis is another." +output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")] + +--- + +name: unicode +label: ensure unicode data is handled properly +input: "Thís ís å sëñtënce with diœcritiçs." +output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] From ecfb2c628f742c7c703fe67e8a0f7b5a51d62570 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 14:16:17 -0500 Subject: [PATCH 05/67] Another test; handle errors when reading output line better. --- tests/_test_tokenizer.py | 16 ++++++++++------ tests/tokenizer/text.test | 7 +++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 98d9434..bafb593 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -58,23 +58,27 @@ class TokenizerTestCase(object): raw = line[len("input:"):].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] - data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape") + raw = raw.encode("raw_unicode_escape") + data["input"] = raw.decode("unicode_escape") elif line.startswith("output:"): raw = line[len("output:"):].strip() - data["output"] = eval(raw, vars(tokens)) + try: + data["output"] = eval(raw, vars(tokens)) + except Exception: + raise _TestParseError() except _TestParseError: if data["name"]: - error = "Could not parse test {0} in {1}" + error = "Could not parse test '{0}' in '{1}'" print(error.format(data["name"], filename)) else: - print("Could not parse a test in {0}".format(filename)) + print("Could not parse a test in '{0}'".format(filename)) continue if not data["name"]: - error = "A test in {0} was ignored because it lacked a name" + error = "A test in '{0}' was ignored because it lacked a name" print(error.format(filename)) continue if not data["input"] or not data["output"]: - error = "Test {0} in {1} was ignored because it lacked an input or an output" + error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test index eb5b9b4..77d5f50 100644 --- a/tests/tokenizer/text.test +++ b/tests/tokenizer/text.test @@ -16,3 +16,10 @@ name: unicode label: ensure unicode data is handled properly input: "Thís ís å sëñtënce with diœcritiçs." output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] + +--- + +name: unicode2 +label: additional unicode check for non-BMP codepoints +input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" +output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] From eb1bd6b281ffe5e193825da4f36cdf1cf8b49767 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 14:38:34 -0500 Subject: [PATCH 06/67] Add some basic tests for templates; adjust error messages again. --- tests/_test_tokenizer.py | 13 +++++++------ tests/tokenizer/templates.test | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 tests/tokenizer/templates.test diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index bafb593..2571692 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -64,14 +64,15 @@ class TokenizerTestCase(object): raw = line[len("output:"):].strip() try: data["output"] = eval(raw, vars(tokens)) - except Exception: - raise _TestParseError() - except _TestParseError: + except Exception as err: + raise _TestParseError(err) + except _TestParseError as err: if data["name"]: - error = "Could not parse test '{0}' in '{1}'" - print(error.format(data["name"], filename)) + error = "Could not parse test '{0}' in '{1}':\n\t{2}" + print(error.format(data["name"], filename, err)) else: - print("Could not parse a test in '{0}'".format(filename)) + error = "Could not parse a test in '{0}':\n\t{1}" + print(error.format(filename, err)) continue if not data["name"]: error = "A test in '{0}' was ignored because it lacked a name" diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test new file mode 100644 index 0000000..23ac38f --- /dev/null +++ b/tests/tokenizer/templates.test @@ -0,0 +1,32 @@ +name: no_params +label: simplest type of template +input: "{{template}}" +output: [TemplateOpen(), Text(text="template"), TemplateClose()] + +--- + +name: one_param_unnamed +label: basic template with one unnamed parameter +input: "{{foo|bar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()] + +--- + +name: one_param_named +label: basic template with one named parameter +input: "{{foo|bar=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: multiple_unnamed_params +label: basic template with multiple unnamed parameters +input: "{{foo|bar|baz|biz|buzz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()] + +--- + +name: multiple_named_params +label: basic template with multiple named parameters +input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] From 713b83a4d94e05bf907158aa6a5d98f7132d998c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 17:41:55 -0500 Subject: [PATCH 07/67] Added a metric ton of template tests; adjustments; docstrings. --- README.rst | 3 +- tests/_test_tokenizer.py | 22 +++- tests/test_ctokenizer.py | 1 + tests/test_docs.py | 6 + tests/test_pytokenizer.py | 1 + tests/tokenizer/templates.test | 285 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 314 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 3901103..90e896f 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell`` cd mwparserfromhell python setup.py install -You can run the comprehensive unit testing suite with ``python setup.py test``. +You can run the comprehensive unit testing suite with +``python setup.py test -q``. Usage ----- diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 2571692..bef7569 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -32,8 +32,20 @@ class _TestParseError(Exception): class TokenizerTestCase(object): + """A base test case for tokenizers, whose tests are loaded dynamically. + + Subclassed along with unittest.TestCase to form TestPyTokenizer and + TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' + directory. + """ @classmethod def _build_test_method(cls, funcname, data): + """Create and return a method to be treated as a test case method. + + *data* is a dict containing multiple keys: the *input* text to be + tokenized, the expected list of tokens as *output*, and an optional + *label* for the method's docstring. + """ def inner(self): actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(actual, data["output"]) @@ -44,8 +56,10 @@ class TokenizerTestCase(object): @classmethod def _load_tests(cls, filename, text): - counter = 1 + """Load all tests in *text* from the file *filename*.""" tests = text.split("\n---\n") + counter = 1 + digits = len(str(len(tests))) for test in tests: data = {"name": "", "label": "", "input": "", "output": []} try: @@ -79,16 +93,18 @@ class TokenizerTestCase(object): print(error.format(filename)) continue if not data["input"] or not data["output"]: - error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output" + error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue - fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) + number = str(counter).zfill(digits) + fname = "test_{0}{1}_{2}".format(filename, number, data["name"]) meth = cls._build_test_method(fname, data) setattr(cls, fname, meth) counter += 1 @classmethod def build(cls): + """Load and install all tests from the 'tokenizer' directory.""" directory = path.join(path.dirname(__file__), "tokenizer") extension = ".test" for filename in listdir(directory): diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7d3ffd7..86f4787 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -25,6 +25,7 @@ import unittest from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the C tokenizer.""" @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 5ec25e1..d99652f 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -29,6 +29,7 @@ import mwparserfromhell from mwparserfromhell.compat import py3k, str, StringIO class TestDocs(unittest.TestCase): + """Integration test cases for mwparserfromhell's documentation.""" def assertPrint(self, input, output): """Assertion check that *input*, when printed, produces *output*.""" buff = StringIO() @@ -37,6 +38,7 @@ class TestDocs(unittest.TestCase): self.assertEqual(buff.read(), output) def test_readme_1(self): + """test a block of example code in the README""" text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" wikicode = mwparserfromhell.parse(text) self.assertPrint(wikicode, @@ -56,6 +58,7 @@ class TestDocs(unittest.TestCase): self.assertPrint(template.get("eggs").value, "spam") def test_readme_2(self): + """test a block of example code in the README""" code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") if py3k: self.assertPrint(code.filter_templates(), @@ -71,6 +74,7 @@ class TestDocs(unittest.TestCase): "template") def test_readme_3(self): + """test a block of example code in the README""" text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" temps = mwparserfromhell.parse(text).filter_templates(recursive=True) if py3k: @@ -80,6 +84,7 @@ class TestDocs(unittest.TestCase): self.assertPrint(temps, res) def test_readme_4(self): + """test a block of example code in the README""" text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" code = mwparserfromhell.parse(text) for template in code.filter_templates(): @@ -101,6 +106,7 @@ class TestDocs(unittest.TestCase): self.assertEqual(text, code) def test_readme_5(self): + """test a block of example code in the README; includes a web call""" url1 = "http://en.wikipedia.org/w/api.php" url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" title = "Test" diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index f739726..4254748 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -25,6 +25,7 @@ import unittest from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the Python tokenizer.""" @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 23ac38f..7399022 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -30,3 +30,288 @@ name: multiple_named_params label: basic template with multiple named parameters input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params +label: basic template with multiple unnamed/named parameters +input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params2 +label: basic template with multiple unnamed/named parameters in another order +input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: nested_unnamed_param +label: nested template as an unnamed parameter +input: "{{foo|{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_value +label: nested template as a parameter value with a named parameter +input: "{{foo|bar={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_name_and_value +label: nested templates as a parameter name and value +input: "{{foo|{{bar}}={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start +label: nested template at the beginning of a template name +input: "{{{{foo}}bar}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()] + +--- + +name: nested_name_start_unnamed_param +label: nested template at the beginning of a template name and as an unnamed parameter +input: "{{{{foo}}bar|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_value +label: nested template at the beginning of a template name and as a parameter value with a named parameter +input: "{{{{foo}}bar|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_name_and_value +label: nested template at the beginning of a template name and as a parameter name and value +input: "{{{{foo}}bar|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end +label: nested template at the end of a template name +input: "{{foo{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_unnamed_param +label: nested template at the end of a template name and as an unnamed parameter +input: "{{foo{{bar}}|{{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_value +label: nested template at the end of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_name_and_value +label: nested template at the end of a template name and as a parameter name and value +input: "{{foo{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid +label: nested template in the middle of a template name +input: "{{foo{{bar}}baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()] + +--- + +name: nested_name_mid_unnamed_param +label: nested template in the middle of a template name and as an unnamed parameter +input: "{{foo{{bar}}baz|{{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_value +label: nested template in the middle of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}baz|biz={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_name_and_value +label: nested template in the middle of a template name and as a parameter name and value +input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end +label: nested template at the beginning and end of a template name +input: "{{{{foo}}{{bar}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_unnamed_param +label: nested template at the beginning and end of a template name and as an unnamed parameter +input: "{{{{foo}}{{bar}}|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_value +label: nested template at the beginning and end of a template name and as a parameter value with a named parameter +input: "{{{{foo}}{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_name_and_value +label: nested template at the beginning and end of a template name and as a parameter name and value +input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple +label: multiple nested templates within nested templates +input: "{{{{{{{{foo}}bar}}baz}}biz}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()] + +--- + +name: nested_names_multiple_unnamed_param +label: multiple nested templates within nested templates with a nested unnamed parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_value +label: multiple nested templates within nested templates with a nested parameter value in a named parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_name_and_value +label: multiple nested templates within nested templates with a nested parameter name and value +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: incomplete_tests + +"{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" + +"{{\nfoobar}}" +"{{foobar\n}}" +"{{\nfoobar\n}}" +"{{foo\nbar}}" +"{{\nfoo\nbar}}" +"{{foo\nbar\n}}" +"{{\nfoo\nbar\n}}" + +"{{foo|\nbar}}" +"{{foo|bar\n}}" +"{{foo|\nbar\n}}" +"{{foo|\nb\nar}}" +"{{foo|b\nar\n}}" +"{{foo|\nb\nar\n}}" +"{{\nfoo|\nbar}}" +"{{\nfoo|bar\n}}" +"{{\nfoo|\nbar\n}}" +"{{\nfoo|\nb\nar}}" +"{{\nfoo|b\nar\n}}" +"{{\nfoo|\nb\nar\n}}" +"{{foo\n|\nbar}}" +"{{foo\n|bar\n}}" +"{{foo\n|\nbar\n}}" +"{{foo\n|\nb\nar}}" +"{{foo\n|b\nar\n}}" +"{{foo\n|\nb\nar\n}}" +"{{\nfoo\n|\nbar}}" +"{{\nfoo\n|bar\n}}" +"{{\nfoo\n|\nbar\n}}" +"{{\nfoo\n|\nb\nar}}" +"{{\nfoo\n|b\nar\n}}" +"{{\nfoo\n|\nb\nar\n}}" +"{{f\noo|\nbar}}" +"{{\nf\noo|\nbar}}" +"{{f\noo\n|\nbar}}" +"{{\nf\noo\n|\nbar}}" + +"{{foo|1=\nbar}}" +"{{foo|1=bar\n}}" +"{{foo|1=\nbar\n}}" +"{{foo|1=\nb\nar}}" +"{{foo|1=b\nar\n}}" +"{{foo|1=\nb\nar\n}}" +"{{foo|\nbar=baz}}" +"{{foo|bar\n=baz}}" +"{{foo|\nbar\n=baz}}" +"{{foo|\nb\nar=baz}}" +"{{foo|b\nar\n=baz}}" +"{{foo|\nb\nar\n=baz}}" +"{{foo|\nbar=baz\n}}" +"{{foo|bar\n=baz\n}}" +"{{foo|\nbar\n=baz\n}}" +"{{foo|\nb\nar=baz\n}}" +"{{foo|b\nar\n=baz\n}}" +"{{foo|\nb\nar\n=baz\n}}" +"{{foo|\nbar=\nbaz}}" +"{{foo|bar\n=\nbaz}}" +"{{foo|\nbar\n=\nbaz}}" +"{{foo|\nb\nar=\nbaz}}" +"{{foo|b\nar\n=\nbaz}}" +"{{foo|\nb\nar\n=\nbaz}}" +"{{foo|\nbar=\nbaz\n}}" +"{{foo|bar\n=\nbaz\n}}" +"{{foo|\nbar\n=\nbaz\n}}" +"{{foo|\nb\nar=\nbaz\n}}" +"{{foo|b\nar\n=\nbaz\n}}" +"{{foo|\nb\nar\n=\nbaz\n}}" +"{{foo|\nbar=ba\nz}}" +"{{foo|bar\n=ba\nz}}" +"{{foo|\nbar\n=ba\nz}}" +"{{foo|\nb\nar=ba\nz}}" +"{{foo|b\nar\n=ba\nz}}" +"{{foo|\nb\nar\n=ba\nz}}" + +"{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" +"{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}" + +"{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" + +"{{foo{bar}}" +"{{foo}bar}}" +"{{{foobar}}" +"{{foo{b{ar}}" +"{{foo[bar}}" +"{{foo]bar}}" +"{{[foobar}}" +"{{foobar]}}" + +"{{foobar" +"{{foobar}" +"{{foobar|" +"{{foo|bar" +"{{foo|bar|" +"{{foo|bar=" +"{{foo|bar=|" +"{{foo|bar=baz" +"{{foo|bar=baz|" +"{{foo|bar|baz" +"{{foo|bar|baz=" +"{{foo|bar|baz=biz" +"{{foo|bar=baz|biz" +"{{foo|bar=baz|biz=" +"{{foo|bar=baz|biz=buzz" From d500f8972e8a3ae0bfee706d40b76b3bfa1fc00d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Feb 2013 13:01:41 -0500 Subject: [PATCH 08/67] Add a few more tests; use assert*(expected, actual) instead of opposite. --- tests/_test_tokenizer.py | 7 +++-- tests/test_docs.py | 6 ++-- tests/tokenizer/templates.test | 68 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index bef7569..114b835 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -47,8 +47,9 @@ class TokenizerTestCase(object): *label* for the method's docstring. """ def inner(self): + expected = data["output"] actual = self.tokenizer().tokenize(data["input"]) - self.assertEqual(actual, data["output"]) + self.assertEqual(expected, actual) if not py3k: inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] @@ -61,7 +62,7 @@ class TokenizerTestCase(object): counter = 1 digits = len(str(len(tests))) for test in tests: - data = {"name": "", "label": "", "input": "", "output": []} + data = {"name": None, "label": None, "input": None, "output": None} try: for line in test.strip().splitlines(): if line.startswith("name:"): @@ -92,7 +93,7 @@ class TokenizerTestCase(object): error = "A test in '{0}' was ignored because it lacked a name" print(error.format(filename)) continue - if not data["input"] or not data["output"]: + if data["input"] is None or data["output"] is None: error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue diff --git a/tests/test_docs.py b/tests/test_docs.py index d99652f..8673cb9 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -35,7 +35,7 @@ class TestDocs(unittest.TestCase): buff = StringIO() print(input, end="", file=buff) buff.seek(0) - self.assertEqual(buff.read(), output) + self.assertEqual(output, buff.read()) def test_readme_1(self): """test a block of example code in the README""" @@ -115,9 +115,9 @@ class TestDocs(unittest.TestCase): raw = urllib.urlopen(url1, urllib.urlencode(data)).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - actual = mwparserfromhell.parse(text) expected = urllib.urlopen(url2.format(title)).read().decode("utf8") - self.assertEqual(actual, expected) + actual = mwparserfromhell.parse(text) + self.assertEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 7399022..348e1f5 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -208,17 +208,62 @@ output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(te --- -name: incomplete_tests +name: mixed_nested_templates +label: mixed assortment of nested templates within template names, parameter names, and values +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: newline_start +label: a newline at the start of a template name +input: "{{\nfoobar}}" +output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] + +--- + +name: newline_end +label: a newline at the end of a template name +input: "{{foobar\n}}" +output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] + +--- + +name: newline_start_end +label: a newline at the start and end of a template name +input: "{{\nfoobar\n}}" +output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] + +--- -"{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +name: newline_mid +label: a newline at the middle of a template name +input: "{{foo\nbar}}" +output: [Text(text="{{foo\nbar}}")] -"{{\nfoobar}}" -"{{foobar\n}}" -"{{\nfoobar\n}}" -"{{foo\nbar}}" -"{{\nfoo\nbar}}" -"{{foo\nbar\n}}" -"{{\nfoo\nbar\n}}" +--- + +name: newline_start_mid +label: a newline at the start and middle of a template name +input: "{{\nfoo\nbar}}" +output: [Text(text="{{\nfoo\nbar}}")] + +--- + +name: newline_mid_end +label: a newline at the middle and end of a template name +input: "{{foo\nbar\n}}" +output: [Text(text="{{foo\nbar\n}}")] + +--- + +name: newline_start_mid_end +label: a newline at the start, middle, and end of a template name +input: "{{\nfoo\nbar\n}}" +output: [Text(text="{{\nfoo\nbar\n}}")] + +--- +name: incomplete_tests "{{foo|\nbar}}" "{{foo|bar\n}}" @@ -300,6 +345,11 @@ name: incomplete_tests "{{[foobar}}" "{{foobar]}}" +"{{foo|ba{r}}" +"{{foo|ba{r}}}" +"{{foo|ba{r}=baz}}" +"{{foo|ba[r]}}" + "{{foobar" "{{foobar}" "{{foobar|" From 660a0c31e60ddde8435fb3c8c743e5f1c9f7ea77 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Feb 2013 15:30:12 -0500 Subject: [PATCH 09/67] Adding a bunch more tests. --- tests/tokenizer/templates.test | 226 +++++++++++++++++++++++++++++++++++------ 1 file changed, 197 insertions(+), 29 deletions(-) diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 348e1f5..9223d61 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -263,36 +263,204 @@ input: "{{\nfoo\nbar\n}}" output: [Text(text="{{\nfoo\nbar\n}}")] --- -name: incomplete_tests -"{{foo|\nbar}}" -"{{foo|bar\n}}" -"{{foo|\nbar\n}}" -"{{foo|\nb\nar}}" -"{{foo|b\nar\n}}" -"{{foo|\nb\nar\n}}" -"{{\nfoo|\nbar}}" -"{{\nfoo|bar\n}}" -"{{\nfoo|\nbar\n}}" -"{{\nfoo|\nb\nar}}" -"{{\nfoo|b\nar\n}}" -"{{\nfoo|\nb\nar\n}}" -"{{foo\n|\nbar}}" -"{{foo\n|bar\n}}" -"{{foo\n|\nbar\n}}" -"{{foo\n|\nb\nar}}" -"{{foo\n|b\nar\n}}" -"{{foo\n|\nb\nar\n}}" -"{{\nfoo\n|\nbar}}" -"{{\nfoo\n|bar\n}}" -"{{\nfoo\n|\nbar\n}}" -"{{\nfoo\n|\nb\nar}}" -"{{\nfoo\n|b\nar\n}}" -"{{\nfoo\n|\nb\nar\n}}" -"{{f\noo|\nbar}}" -"{{\nf\noo|\nbar}}" -"{{f\noo\n|\nbar}}" -"{{\nf\noo\n|\nbar}}" +name: newline_unnamed_param_start +label: a newline at the start of an unnamed template parameter +input: "{{foo|\nbar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_unnamed_param_end +label: a newline at the end of an unnamed template parameter +input: "{{foo|bar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_unnamed_param_start_end +label: a newline at the start and end of an unnamed template parameter +input: "{{foo|\nbar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_unnamed_param_start_mid +label: a newline at the start and middle of an unnamed template parameter +input: "{{foo|\nb\nar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_unnamed_param_mid_end +label: a newline at the middle and end of an unnamed template parameter +input: "{{foo|b\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_unnamed_param_start_mid_end +label: a newline at the start, middle, and end of an unnamed template parameter +input: "{{foo|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start +label: a newline at the start of a template name and at the start of an unnamed template parameter +input: "{{\nfoo|\nbar}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_end +label: a newline at the start of a template name and at the end of an unnamed template parameter +input: "{{\nfoo|bar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start_end +label: a newline at the start of a template name and at the start and end of an unnamed template parameter +input: "{{\nfoo|\nbar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start_mid +label: a newline at the start of a template name and at the start and middle of an unnamed template parameter +input: "{{\nfoo|\nb\nar}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_mid_end +label: a newline at the start of a template name and at the middle and end of an unnamed template parameter +input: "{{\nfoo|b\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start_mid_end +label: a newline at the start of a template name and at the start, middle, and end of an unnamed template parameter +input: "{{\nfoo|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start +label: a newline at the end of a template name and at the start of an unnamed template parameter +input: "{{foo\n|\nbar}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_end +label: a newline at the end of a template name and at the end of an unnamed template parameter +input: "{{foo\n|bar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start_end +label: a newline at the end of a template name and at the start and end of an unnamed template parameter +input: "{{foo\n|\nbar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start_mid +label: a newline at the end of a template name and at the start and middle of an unnamed template parameter +input: "{{foo\n|\nb\nar}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_mid_end +label: a newline at the end of a template name and at the middle and end of an unnamed template parameter +input: "{{foo\n|b\nar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start_mid_end +label: a newline at the end of a template name and at the start, middle, and end of an unnamed template parameter +input: "{{foo\n|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_end +label: a newline at the start and end of a template name and the start of an unnamed template parameter +input: "{{\nfoo\n|\nbar}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_end +label: a newline at the start and end of a template name and the end of an unnamed template parameter +input: "{{\nfoo\n|bar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_start_end +label: a newline at the start and end of a template name and the start and end of an unnamed template parameter +input: "{{\nfoo\n|\nbar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_start_mid +label: a newline at the start and end of a template name and the start and middle of an unnamed template parameter +input: "{{\nfoo\n|\nb\nar}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_mid_end +label: a newline at the start and end of a template name and the middle and end of an unnamed template parameter +input: "{{\nfoo\n|b\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_start_mid_end +label: a newline at the start and end of a template name and the start, middle, and end of an unnamed template parameter +input: "{{\nfoo\n|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_mid_unnamed_param_start +label: a newline at the middle of a template name and at the start of an unnamed template parameter +input: "{{f\noo|\nbar}}" +output: [Text(text="{{f\noo|\nbar}}")] + +--- + +name: newline_start_mid_unnamed_param_start +label: a newline at the start and middle of a template name and at the start of an unnamed template parameter +input: "{{\nf\noo|\nbar}}" +output: [Text(text="{{\nf\noo|\nbar}}")] + +--- + +name: newline_start_end_unnamed_param_start +label: a newline at the middle and of a template name and at the start of an unnamed template parameter +input: "{{f\noo\n|\nbar}}" +output: [Text(text="{{f\noo\n|\nbar}}")] + +--- + +name: newline_start_mid_end_unnamed_param_start +label: a newline at the start, middle, and end of a template name and at the start of an unnamed template parameter +input: "{{\nf\noo\n|\nbar}}" +output: [Text(text="{{\nf\noo\n|\nbar}}")] + +--- + +name: incomplete_tests "{{foo|1=\nbar}}" "{{foo|1=bar\n}}" From 556477f8015bd987167e7e0beee0e78ae02b1a47 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 17 Feb 2013 15:04:19 -0500 Subject: [PATCH 10/67] Adding a bunch more tests. --- tests/tokenizer/templates.test | 296 +++++++++++++++++++++++++++++++++++------ 1 file changed, 259 insertions(+), 37 deletions(-) diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 9223d61..c3416ff 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -460,44 +460,266 @@ output: [Text(text="{{\nf\noo\n|\nbar}}")] --- -name: incomplete_tests +name: newline_named_param_value_start +label: a newline at the start of a named parameter value +input: "{{foo|1=\nbar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_named_param_value_end +label: a newline at the end of a named parameter value +input: "{{foo|1=bar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_named_param_value_start_end +label: a newline at the start and end of a named parameter value +input: "{{foo|1=\nbar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_named_param_value_start_mid +label: a newline at the start and middle of a named parameter value +input: "{{foo|1=\nb\nar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_named_param_value_mid_end +label: a newline at the middle and end of a named parameter value +input: "{{foo|1=b\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_named_param_value_start_mid_end +label: a newline at the start, middle, and end of a named parameter value +input: "{{foo|1=\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start +label: a newline at the start of a parameter name +input: "{{foo|\nbar=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newline_named_param_name_end +label: a newline at the end of a parameter name +input: "{{foo|bar\n=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end +label: a newline at the start and end of a parameter name +input: "{{foo|\nbar\n=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newline_named_param_name_mid +label: a newline at the middle of a parameter name +input: "{{foo|b\nar=baz}}" +output: [Text(text="{{foo|b\nar=baz}}")] + +--- + +name: newline_named_param_name_start_mid +label: a newline at the start and middle of a parameter name +input: "{{foo|\nb\nar=baz}}" +output: [Text(text="{{foo|\nb\nar=baz}}")] + +--- + +name: newline_named_param_name_mid_end +label: a newline at the middle and end of a parameter name +input: "{{foo|b\nar\n=baz}}" +output: [Text(text="{{foo|b\nar\n=baz}}")] + +--- + +name: newline_named_param_name_start_mid_end +label: a newline at the start, middle, and end of a parameter name +input: "{{foo|\nb\nar\n=baz}}" +output: [Text(text="{{foo|\nb\nar\n=baz}}")] + +--- + +name: newline_named_param_name_start_param_value_end +label: a newline at the start of a parameter name and the end of a parameter value +input: "{{foo|\nbar=baz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_end +label: a newline at the end of a parameter name and the end of a parameter value +input: "{{foo|bar\n=baz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_end +label: a newline at the start and end of a parameter name and the end of a parameter value +input: "{{foo|\nbar\n=baz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start_mid_param_value_end +label: a newline at the start and middle of a parameter name and the end of a parameter value +input: "{{foo|\nb\nar=baz\n}}" +output: [Text(text="{{foo|\nb\nar=baz\n}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_end +label: a newline at the middle and end of a parameter name and the end of a parameter value +input: "{{foo|b\nar\n=baz\n}}" +output: [Text(text="{{foo|b\nar\n=baz\n}}")] + +--- + +name: newline_named_param_name_start_mid_end_param_value_end +label: a newline at the start, middle, and end of a parameter name and at the end of a parameter value +input: "{{foo|\nb\nar\n=baz\n}}" +output: [Text(text="{{foo|\nb\nar\n=baz\n}}")] + +--- + +name: newline_named_param_name_start_param_value_start +label: a newline at the start of a parameter name and at the start of a parameter value +input: "{{foo|\nbar=\nbaz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_start +label: a newline at the end of a parameter name and at the start of a parameter value +input: "{{foo|bar\n=\nbaz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_start +label: a newline at the start and end of a parameter name and at the start of a parameter value +input: "{{foo|\nbar\n=\nbaz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_mid_param_value_start +label: a newline at the start and middle of a parameter name and at the start of a parameter value +input: "{{foo|\nb\nar=\nbaz}}" +output: [Text(text="{{foo|\nb\nar=\nbaz}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_start +label: a newline at the middle and end of a parameter name and at the start of a parameter value +input: "{{foo|b\nar\n=\nbaz}}" +output: [Text(text="{{foo|b\nar\n=\nbaz}}")] + +--- + +name: newline_named_param_name_start_mid_end_param_value_start +label: a newline at the start, middle, and end of a parameter name and at the start of a parameter value +input: "{{foo|\nb\nar\n=\nbaz}}" +output: [Text(text="{{foo|\nb\nar\n=\nbaz}}")] + +--- + +name: newline_named_param_name_start_param_value_start_end +label: a newline at the start of a parameter name and at the start and end of a parameter value +input: "{{foo|\nbar=\nbaz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_start_end +label: a newline at the end of a parameter name and at the start and end of a parameter value +input: "{{foo|bar\n=\nbaz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_start_end +label: a newline at the start and end of a parameter name and at the start and end of a parameter value +input: "{{foo|\nbar\n=\nbaz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] + +--- -"{{foo|1=\nbar}}" -"{{foo|1=bar\n}}" -"{{foo|1=\nbar\n}}" -"{{foo|1=\nb\nar}}" -"{{foo|1=b\nar\n}}" -"{{foo|1=\nb\nar\n}}" -"{{foo|\nbar=baz}}" -"{{foo|bar\n=baz}}" -"{{foo|\nbar\n=baz}}" -"{{foo|\nb\nar=baz}}" -"{{foo|b\nar\n=baz}}" -"{{foo|\nb\nar\n=baz}}" -"{{foo|\nbar=baz\n}}" -"{{foo|bar\n=baz\n}}" -"{{foo|\nbar\n=baz\n}}" -"{{foo|\nb\nar=baz\n}}" -"{{foo|b\nar\n=baz\n}}" -"{{foo|\nb\nar\n=baz\n}}" -"{{foo|\nbar=\nbaz}}" -"{{foo|bar\n=\nbaz}}" -"{{foo|\nbar\n=\nbaz}}" -"{{foo|\nb\nar=\nbaz}}" -"{{foo|b\nar\n=\nbaz}}" -"{{foo|\nb\nar\n=\nbaz}}" -"{{foo|\nbar=\nbaz\n}}" -"{{foo|bar\n=\nbaz\n}}" -"{{foo|\nbar\n=\nbaz\n}}" -"{{foo|\nb\nar=\nbaz\n}}" -"{{foo|b\nar\n=\nbaz\n}}" -"{{foo|\nb\nar\n=\nbaz\n}}" -"{{foo|\nbar=ba\nz}}" -"{{foo|bar\n=ba\nz}}" -"{{foo|\nbar\n=ba\nz}}" -"{{foo|\nb\nar=ba\nz}}" -"{{foo|b\nar\n=ba\nz}}" -"{{foo|\nb\nar\n=ba\nz}}" +name: newline_named_param_name_start_mid_param_value_start_end +label: a newline at the start and middle of a parameter name and at the start and end of a parameter value +input: "{{foo|\nb\nar=\nbaz\n}}" +output: [Text(text="{{foo|\nb\nar=\nbaz\n}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_start_end +label: a newline at the middle and end of a parameter name and at the start and end of a parameter value +input: "{{foo|b\nar\n=\nbaz\n}}" +output: [Text(text="{{foo|b\nar\n=\nbaz\n}}")] + +--- + +name: newline_named_param_name_start_mid_end_param_value_start_end +label: a newline at the start, middle, and end of a parameter name and at the start and end of a parameter value +input: "{{foo|\nb\nar\n=\nbaz\n}}" +output: [Text(text="{{foo|\nb\nar\n=\nbaz\n}}")] + +--- + +name: newline_named_param_name_start_param_value_mid +label: a newline at the start of a parameter name and at the middle of a parameter value +input: "{{foo|\nbar=ba\nz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_mid +label: a newline at the end of a parameter name and at the middle of a parameter value +input: "{{foo|bar\n=ba\nz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_mid +label: a newline at the start and end of a parameter name and at the middle of a parameter value +input: "{{foo|\nbar\n=ba\nz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_mid_param_value_mid +label: a newline at the start and middle of a parameter name and at the middle of a parameter value +input: "{{foo|\nb\nar=ba\nz}}" +output: [Text(text="{{foo|\nb\nar=ba\nz}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_mid +label: a newline at the middle and end of a parameter name and at the middle of a parameter value +input: "{{foo|b\nar\n=ba\nz}}" +output: [Text(text="{{foo|b\nar\n=ba\nz}}")] + +--- + +name: newline_named_param_start_mid_end_param_value_mid +label: a newline at the start, middle, and end of a parameter name and at the middle of a parameter value +input: "{{foo|\nb\nar\n=ba\nz}}" +output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")] + +--- + +name: incomplete_tests "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" "{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}" From 24c55aeeb183f4b7643e521e3125a8610a74674e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 17 Feb 2013 21:52:08 -0500 Subject: [PATCH 11/67] Adding a syntax highlighter for the test-case format. --- tests/MWPFHTestCase.tmlanguage | 130 +++++++++++++++++++++ tests/_test_tokenizer.py | 2 +- .../tokenizer/{templates.test => templates.mwtest} | 0 tests/tokenizer/{text.test => text.mwtest} | 0 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 tests/MWPFHTestCase.tmlanguage rename tests/tokenizer/{templates.test => templates.mwtest} (100%) rename tests/tokenizer/{text.test => text.mwtest} (100%) diff --git a/tests/MWPFHTestCase.tmlanguage b/tests/MWPFHTestCase.tmlanguage new file mode 100644 index 0000000..e6ea7f0 --- /dev/null +++ b/tests/MWPFHTestCase.tmlanguage @@ -0,0 +1,130 @@ + + + + + fileTypes + + mwtest + + name + MWParserFromHell Test Case + patterns + + + match + --- + name + markup.heading.divider.mwpfh + + + captures + + 1 + + name + keyword.other.name.mwpfh + + 2 + + name + variable.other.name.mwpfh + + + match + (name:)\s*(\w*) + name + meta.name.mwpfh + + + captures + + 1 + + name + keyword.other.label.mwpfh + + 2 + + name + comment.line.other.label.mwpfh + + + match + (label:)\s*(.*) + name + meta.label.mwpfh + + + captures + + 1 + + name + keyword.other.input.mwpfh + + 2 + + name + string.quoted.double.input.mwpfh + + + match + (input:)\s*(.*) + name + meta.input.mwpfh + + + captures + + 1 + + name + keyword.other.output.mwpfh + + + match + (output:) + name + meta.output.mwpfh + + + captures + + 1 + + name + support.language.token.mwpfh + + + match + (\w+)\s*\( + name + meta.name.token.mwpfh + + + captures + + 1 + + name + variable.parameter.token.mwpfh + + + match + (\w+)\s*(=) + name + meta.name.parameter.token.mwpfh + + + match + ".*?" + name + string.quoted.double.mwpfh + + + scopeName + text.mwpfh + uuid + cd3e2ffa-a57d-4c40-954f-1a2e87ffd638 + + diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 114b835..4d12dc9 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -107,7 +107,7 @@ class TokenizerTestCase(object): def build(cls): """Load and install all tests from the 'tokenizer' directory.""" directory = path.join(path.dirname(__file__), "tokenizer") - extension = ".test" + extension = ".mwtest" for filename in listdir(directory): if not filename.endswith(extension): continue diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.mwtest similarity index 100% rename from tests/tokenizer/templates.test rename to tests/tokenizer/templates.mwtest diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.mwtest similarity index 100% rename from tests/tokenizer/text.test rename to tests/tokenizer/text.mwtest From 31a977bdfe2d12487417d4ef1c343fc12209b148 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 17 Feb 2013 22:39:53 -0500 Subject: [PATCH 12/67] Finish all incomplete template tests. --- tests/tokenizer/templates.mwtest | 236 +++++++++++++++++++++++++++++++++------ 1 file changed, 200 insertions(+), 36 deletions(-) diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index c3416ff..d699ef2 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -719,39 +719,203 @@ output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")] --- -name: incomplete_tests - -"{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" -"{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}" - -"{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" - -"{{foo{bar}}" -"{{foo}bar}}" -"{{{foobar}}" -"{{foo{b{ar}}" -"{{foo[bar}}" -"{{foo]bar}}" -"{{[foobar}}" -"{{foobar]}}" - -"{{foo|ba{r}}" -"{{foo|ba{r}}}" -"{{foo|ba{r}=baz}}" -"{{foo|ba[r]}}" - -"{{foobar" -"{{foobar}" -"{{foobar|" -"{{foo|bar" -"{{foo|bar|" -"{{foo|bar=" -"{{foo|bar=|" -"{{foo|bar=baz" -"{{foo|bar=baz|" -"{{foo|bar|baz" -"{{foo|bar|baz=" -"{{foo|bar|baz=biz" -"{{foo|bar=baz|biz" -"{{foo|bar=baz|biz=" -"{{foo|bar=baz|biz=buzz" +name: newline_wildcard +label: a random, complex assortment of templates and newlines +input: "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] + +--- + +name: newline_wildcard_redux +label: an even more random and complex assortment of templates and newlines +input: "{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nbaz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] + +--- + +name: invalid_name_left_brace_middle +label: invalid characters in template name: left brace in middle +input: "{{foo{bar}}" +output: [Text(text="{{foo{bar}}")] + +--- + +name: invalid_name_right_brace_middle +label: invalid characters in template name: right brace in middle +input: "{{foo}bar}}" +output: [Text(text="{{foo}bar}}")] + +--- + +name: invalid_name_left_braces +label: invalid characters in template name: two left braces in middle +input: "{{foo{b{ar}}" +output: [Text(text="{{foo{b{ar}}")] + +--- + +name: invalid_name_left_bracket_middle +label: invalid characters in template name: left bracket in middle +input: "{{foo[bar}}" +output: [Text(text="{{foo[bar}}")] + +--- + +name: invalid_name_right_bracket_middle +label: invalid characters in template name: right bracket in middle +input: "{{foo]bar}}" +output: [Text(text="{{foo]bar}}")] + +--- + +name: invalid_name_left_bracket_start +label: invalid characters in template name: left bracket at start +input: "{{[foobar}}" +output: [Text(text="{{[foobar}}")] + +--- + +name: invalid_name_right_bracket_start +label: invalid characters in template name: right bracket at end +input: "{{foobar]}}" +output: [Text(text="{{foobar]}}")] + +--- + +name: valid_name_left_brace_start +label: valid characters in template name: left brace at start +input: "{{{foobar}}" +output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()] + +--- + +name: valid_unnamed_param_left_brace +label: valid characters in unnamed template parameter: left brace +input: "{{foo|ba{r}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()] + +--- + +name: valid_unnamed_param_braces +label: valid characters in unnamed template parameter: left and right braces +input: "{{foo|ba{r}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")] + +--- + +name: valid_param_name_braces +label: valid characters in template parameter name: left and right braces +input: "{{foo|ba{r}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_brackets +label: valid characters in unnamed template parameter: left and right brackets +input: "{{foo|ba[r]}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateClose()] + +--- + +name: incomplete_plain +label: incomplete templates that should fail gracefully: no close whatsoever +input: "{{stuff}} {{foobar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")] + +--- + +name: incomplete_right_brace +label: incomplete templates that should fail gracefully: only one right brace +input: "{{stuff}} {{foobar}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")] + +--- + +name: incomplete_pipe +label: incomplete templates that should fail gracefully: a pipe +input: "{{stuff}} {{foobar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")] + +--- + +name: incomplete_unnamed_param +label: incomplete templates that should fail gracefully: an unnamed parameter +input: "{{stuff}} {{foo|bar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")] + +--- + +name: incomplete_unnamed_param_pipe +label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe +input: "{{stuff}} {{foo|bar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")] + +--- + +name: incomplete_valueless_param +label: incomplete templates that should fail gracefully: an a named parameter with no value +input: "{{stuff}} {{foo|bar=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")] + +--- + +name: incomplete_valueless_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe +input: "{{stuff}} {{foo|bar=|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")] + +--- + +name: incomplete_named_param +label: incomplete templates that should fail gracefully: a named parameter with a value +input: "{{stuff}} {{foo|bar=baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")] + +--- + +name: incomplete_named_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe +input: "{{stuff}} {{foo|bar=baz|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")] + +--- + +name: incomplete_two_unnamed_params +label: incomplete templates that should fail gracefully: two unnamed parameters +input: "{{stuff}} {{foo|bar|baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")] + +--- + +name: incomplete_unnamed_param_valueless_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value +input: "{{stuff}} {{foo|bar|baz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")] + +--- + +name: incomplete_unnamed_param_named_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value +input: "{{stuff}} {{foo|bar|baz=biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")] + +--- + +name: incomplete_named_param_unnamed_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter +input: "{{stuff}} {{foo|bar=baz|biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")] + +--- + +name: incomplete_named_param_valueless_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value +input: "{{stuff}} {{foo|bar=baz|biz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")] + +--- + +name: incomplete_two_named_params +label: incomplete templates that should fail gracefully: two named parameters with values +input: "{{stuff}} {{foo|bar=baz|biz=buzz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] From 0803417901d09d7df830e65300355507715e67cb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Feb 2013 13:12:16 -0500 Subject: [PATCH 13/67] Port CTokenizer's verify_safe method to Python to solve a failing test. --- mwparserfromhell/parser/contexts.py | 62 +++++++++++------- mwparserfromhell/parser/tokenizer.c | 12 ++-- mwparserfromhell/parser/tokenizer.h | 1 + mwparserfromhell/parser/tokenizer.py | 122 +++++++++++++++++++++++++---------- 4 files changed, 137 insertions(+), 60 deletions(-) diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index b65946c..896d137 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -62,6 +62,15 @@ Local (stack-specific) contexts: * :py:const:`COMMENT` +* :py:const:`SAFETY_CHECK` + + * :py:const:`HAS_TEXT` + * :py:const:`FAIL_ON_TEXT` + * :py:const:`FAIL_NEXT` + * :py:const:`FAIL_ON_LBRACE` + * :py:const:`FAIL_ON_RBRACE` + * :py:const:`FAIL_ON_EQUALS` + Global contexts: * :py:const:`GL_HEADING` @@ -69,29 +78,36 @@ Global contexts: # Local contexts: -TEMPLATE = 0b00000000000111 -TEMPLATE_NAME = 0b00000000000001 -TEMPLATE_PARAM_KEY = 0b00000000000010 -TEMPLATE_PARAM_VALUE = 0b00000000000100 - -ARGUMENT = 0b00000000011000 -ARGUMENT_NAME = 0b00000000001000 -ARGUMENT_DEFAULT = 0b00000000010000 - -WIKILINK = 0b00000001100000 -WIKILINK_TITLE = 0b00000000100000 -WIKILINK_TEXT = 0b00000001000000 - -HEADING = 0b01111110000000 -HEADING_LEVEL_1 = 0b00000010000000 -HEADING_LEVEL_2 = 0b00000100000000 -HEADING_LEVEL_3 = 0b00001000000000 -HEADING_LEVEL_4 = 0b00010000000000 -HEADING_LEVEL_5 = 0b00100000000000 -HEADING_LEVEL_6 = 0b01000000000000 - -COMMENT = 0b10000000000000 - +TEMPLATE = 0b00000000000000000111 +TEMPLATE_NAME = 0b00000000000000000001 +TEMPLATE_PARAM_KEY = 0b00000000000000000010 +TEMPLATE_PARAM_VALUE = 0b00000000000000000100 + +ARGUMENT = 0b00000000000000011000 +ARGUMENT_NAME = 0b00000000000000001000 +ARGUMENT_DEFAULT = 0b00000000000000010000 + +WIKILINK = 0b00000000000001100000 +WIKILINK_TITLE = 0b00000000000000100000 +WIKILINK_TEXT = 0b00000000000001000000 + +HEADING = 0b00000001111110000000 +HEADING_LEVEL_1 = 0b00000000000010000000 +HEADING_LEVEL_2 = 0b00000000000100000000 +HEADING_LEVEL_3 = 0b00000000001000000000 +HEADING_LEVEL_4 = 0b00000000010000000000 +HEADING_LEVEL_5 = 0b00000000100000000000 +HEADING_LEVEL_6 = 0b00000001000000000000 + +COMMENT = 0b00000010000000000000 + +SAFETY_CHECK = 0b11111100000000000000 +HAS_TEXT = 0b00000100000000000000 +FAIL_ON_TEXT = 0b00001000000000000000 +FAIL_NEXT = 0b00010000000000000000 +FAIL_ON_LBRACE = 0b00100000000000000000 +FAIL_ON_RBRACE = 0b01000000000000000000 +FAIL_ON_EQUALS = 0b10000000000000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 09649a7..d82b080 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1324,10 +1324,14 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"[") { - if (Tokenizer_parse_wikilink(self)) - return NULL; - if (self->topstack->context & LC_FAIL_NEXT) - self->topstack->context ^= LC_FAIL_NEXT; + if (!(this_context & LC_WIKILINK_TITLE)) { + if (Tokenizer_parse_wikilink(self)) + return NULL; + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; + } + else + Tokenizer_write_text(self, this); } else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { if (Tokenizer_handle_wikilink_separator(self)) diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 3293a8f..af86321 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -118,6 +118,7 @@ static PyObject* TagCloseClose; #define LC_COMMENT 0x02000 +#define LC_SAFETY_CHECK 0xFC000 #define LC_HAS_TEXT 0x04000 #define LC_FAIL_ON_TEXT 0x08000 #define LC_FAIL_NEXT 0x10000 diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index eead131..a365db8 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -213,28 +213,9 @@ class Tokenizer(object): self._write_all(argument) self._write(tokens.ArgumentClose()) - def _verify_safe(self, unsafes, strip=True): - """Verify that there are no unsafe characters in the current stack. - - The route will be failed if the name contains any element of *unsafes* - in it. This is used when parsing template names, parameter keys, and so - on, which cannot contain newlines and some other characters. If *strip* - is ``True``, the text will be stripped of whitespace, since this is - allowed at the ends of certain elements but not between text. - """ - self._push_textbuffer() - if self._stack: - text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] - text = "".join([token.text for token in text]) - if strip: - text = text.strip() - if text and any([unsafe in text for unsafe in unsafes]): - self._fail_route() - def _handle_template_param(self): """Handle a template parameter at the head of the string.""" if self._context & contexts.TEMPLATE_NAME: - self._verify_safe(["\n", "{", "}", "[", "]"]) self._context ^= contexts.TEMPLATE_NAME elif self._context & contexts.TEMPLATE_PARAM_VALUE: self._context ^= contexts.TEMPLATE_PARAM_VALUE @@ -246,11 +227,6 @@ class Tokenizer(object): def _handle_template_param_value(self): """Handle a template parameter's value at the head of the string.""" - try: - self._verify_safe(["\n", "{{", "}}"]) - except BadRoute: - self._pop() - raise self._write_all(self._pop(keep_context=True)) self._context ^= contexts.TEMPLATE_PARAM_KEY self._context |= contexts.TEMPLATE_PARAM_VALUE @@ -258,24 +234,19 @@ class Tokenizer(object): def _handle_template_end(self): """Handle the end of a template at the head of the string.""" - if self._context & contexts.TEMPLATE_NAME: - self._verify_safe(["\n", "{", "}", "[", "]"]) - elif self._context & contexts.TEMPLATE_PARAM_KEY: + if self._context & contexts.TEMPLATE_PARAM_KEY: self._write_all(self._pop(keep_context=True)) self._head += 1 return self._pop() def _handle_argument_separator(self): """Handle the separator between an argument's name and default.""" - self._verify_safe(["\n", "{{", "}}"]) self._context ^= contexts.ARGUMENT_NAME self._context |= contexts.ARGUMENT_DEFAULT self._write(tokens.ArgumentSeparator()) def _handle_argument_end(self): """Handle the end of an argument at the head of the string.""" - if self._context & contexts.ARGUMENT_NAME: - self._verify_safe(["\n", "{{", "}}"]) self._head += 2 return self._pop() @@ -295,15 +266,12 @@ class Tokenizer(object): def _handle_wikilink_separator(self): """Handle the separator between a wikilink's title and its text.""" - self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._context ^= contexts.WIKILINK_TITLE self._context |= contexts.WIKILINK_TEXT self._write(tokens.WikilinkSeparator()) def _handle_wikilink_end(self): """Handle the end of a wikilink at the head of the string.""" - if self._context & contexts.WIKILINK_TITLE: - self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._head += 1 return self._pop() @@ -424,11 +392,94 @@ class Tokenizer(object): self._write(tokens.CommentEnd()) self._head += 2 + def _verify_safe(self, this): + """Make sure we are not trying to write an invalid character.""" + context = self._context + if context & contexts.FAIL_NEXT: + self._fail_route() + if context & contexts.WIKILINK_TITLE: + if this == "]" or this == "{": + self._context |= contexts.FAIL_NEXT + elif this == "\n" or this == "[" or this == "}": + self._fail_route() + return + if context & contexts.TEMPLATE_NAME: + if this == "{" or this == "}" or this == "[": + self._context |= contexts.FAIL_NEXT + return + if this == "]": + self._fail_route() + return + if this == "|": + return + elif context & (contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME): + if context & contexts.FAIL_ON_EQUALS: + if this == "=": + self._fail_route() + return + elif context & contexts.FAIL_ON_LBRACE: + if this == "{": + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: + self._context |= contexts.FAIL_NEXT + return + self._context ^= contexts.FAIL_ON_LBRACE + elif context & contexts.FAIL_ON_RBRACE: + if this == "}": + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: + self._context |= contexts.FAIL_NEXT + return + self._context ^= contexts.FAIL_ON_RBRACE + elif this == "{": + self._context |= contexts.FAIL_ON_LBRACE + elif this == "}": + self._context |= contexts.FAIL_ON_RBRACE + if context & contexts.HAS_TEXT: + if context & contexts.FAIL_ON_TEXT: + if this is self.END or not this.isspace(): + if context & contexts.TEMPLATE_PARAM_KEY: + self._context ^= contexts.FAIL_ON_TEXT + self._context |= contexts.FAIL_ON_EQUALS + else: + self._fail_route() + return + else: + if this == "\n": + self._context |= contexts.FAIL_ON_TEXT + elif this is self.END or not this.isspace(): + self._context |= contexts.HAS_TEXT + + def _reset_safety_checks(self): + """Unset any safety-checking contexts set by Tokenizer_verify_safe(). + + Used when we preserve a context but previous data becomes invalid, like + when moving between template parameters. + """ + context = self._context + checks = (contexts.HAS_TEXT, contexts.FAIL_ON_TEXT, contexts.FAIL_NEXT, + contexts.FAIL_ON_LBRACE, contexts.FAIL_ON_RBRACE, + contexts.FAIL_ON_EQUALS) + for check in checks: + if context & check: + self._context ^= check; + def _parse(self, context=0): """Parse the wikicode string, using *context* for when to stop.""" self._push(context) while True: this = self._read() + unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | + contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) + if self._context & unsafe: + try: + self._verify_safe(this) + except BadRoute: + if self._context & contexts.TEMPLATE_PARAM_KEY: + self._pop() + raise if this not in self.MARKERS: self._write_text(this) self._head += 1 @@ -450,7 +501,10 @@ class Tokenizer(object): self._write_text(this) elif this == next == "{": self._parse_template_or_argument() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT elif this == "|" and self._context & contexts.TEMPLATE: + self._reset_safety_checks() self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() @@ -466,6 +520,8 @@ class Tokenizer(object): elif this == next == "[": if not self._context & contexts.WIKILINK_TITLE: self._parse_wikilink() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT else: self._write_text("[") elif this == "|" and self._context & contexts.WIKILINK_TITLE: From 111a71f0c242b6827b2f5a02731f2e198ba7b70e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Feb 2013 00:18:03 -0500 Subject: [PATCH 14/67] Committing an empty file to work on later. --- tests/test_string_mixin.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/test_string_mixin.py diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py new file mode 100644 index 0000000..b9413ec --- /dev/null +++ b/tests/test_string_mixin.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals + +import mwparserfromhell + +class TestStringMixIn(unittest.TestCase): + """Test cases for the StringMixIn class.""" + def test_(self): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) From 221af8a9d7100d69d03e1af8ad6b4e020e2cceb4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 26 Feb 2013 10:55:49 -0500 Subject: [PATCH 15/67] Adding some tests to TestStringMixIn --- mwparserfromhell/string_mixin.py | 1 - tests/test_string_mixin.py | 80 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index d7a0749..ac47251 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -50,7 +50,6 @@ class StringMixIn(object): :py:meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ - if py3k: def __str__(self): return self.__unicode__() diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index b9413ec..0d2ca43 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,12 +21,88 @@ # SOFTWARE. from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import py3k, str +from mwparserfromhell.string_mixin import StringMixIn + +class _FakeString(StringMixIn): + def __init__(self, data): + self._data = data + + def __unicode__(self): + return self._data -import mwparserfromhell class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" - def test_(self): + def test_docs(self): + """make sure the various functions of StringMixIn have docstrings""" + methods = [ + "capitalize", "center", "count", "encode", "endswith", + "expandtabs", "find", "format", "index", "isalnum", "isalpha", + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lstrip", "partition", + "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", + "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", + "title", "translate", "upper", "zfill"] + if not py3k: + methods.append("decode") + for meth in methods: + expected = getattr(str, meth).__doc__ + actual = getattr(StringMixIn, meth).__doc__ + self.assertEquals(expected, actual) + + def test_types(self): + """make sure StringMixIns convert to different types correctly""" + pass + + def test_comparisons(self): + """make sure comparison operators work""" + str1 = _FakeString("this is a fake string") + str2 = _FakeString("this is a fake string") + str3 = _FakeString("fake string, this is") + str4 = "this is a fake string" + str5 = "fake string, this is" + + self.assertFalse(str1 > str2) + self.assertTrue(str1 >= str2) + self.assertTrue(str1 == str2) + self.assertFalse(str1 != str2) + self.assertFalse(str1 < str2) + self.assertTrue(str1 <= str2) + + self.assertTrue(str1 > str3) + self.assertTrue(str1 >= str3) + self.assertFalse(str1 == str3) + self.assertTrue(str1 != str3) + self.assertFalse(str1 < str3) + self.assertFalse(str1 <= str3) + + self.assertFalse(str1 > str4) + self.assertTrue(str1 >= str4) + self.assertTrue(str1 == str4) + self.assertFalse(str1 != str4) + self.assertFalse(str1 < str4) + self.assertTrue(str1 <= str4) + + self.assertTrue(str1 > str5) + self.assertTrue(str1 >= str5) + self.assertFalse(str1 == str5) + self.assertTrue(str1 != str5) + self.assertFalse(str1 < str5) + self.assertFalse(str1 <= str5) + + def test_operators(self): + """make sure string addition and multiplication work""" + pass + + def test_other_magics(self): + """test other magically implemented features, like len() and iter()""" + pass + + def test_other_methods(self): + """test the remaining non-magic methods of StringMixIn""" pass if __name__ == "__main__": From 6e748004d1fa16ec812a527644f2f24515d0ff00 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Feb 2013 10:41:21 -0500 Subject: [PATCH 16/67] test_types(), test_other_magics(); add range to compat --- mwparserfromhell/compat.py | 2 ++ tests/test_string_mixin.py | 68 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 576c2c5..48b9807 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,6 +16,7 @@ if py3k: bytes = bytes str = str basestring = str + range = range maxsize = sys.maxsize import html.entities as htmlentities from io import StringIO @@ -24,6 +25,7 @@ else: bytes = str str = unicode basestring = basestring + range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities from StringIO import StringIO diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 0d2ca43..0e60309 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,9 +21,10 @@ # SOFTWARE. from __future__ import unicode_literals +from types import GeneratorType import unittest -from mwparserfromhell.compat import py3k, str +from mwparserfromhell.compat import bytes, py3k, range, str from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): @@ -55,7 +56,20 @@ class TestStringMixIn(unittest.TestCase): def test_types(self): """make sure StringMixIns convert to different types correctly""" - pass + fstr = _FakeString("fake string") + self.assertEquals(str(fstr), "fake string") + self.assertEquals(bytes(fstr), b"fake string") + if py3k: + self.assertEquals(repr(fstr), "'fake string'") + else: + self.assertEquals(repr(fstr), b"u'fake string'") + + self.assertIsInstance(str(fstr), str) + self.assertIsInstance(bytes(fstr), bytes) + if py3k: + self.assertIsInstance(repr(fstr), str) + else: + self.assertIsInstance(repr(fstr), bytes) def test_comparisons(self): """make sure comparison operators work""" @@ -93,13 +107,53 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str1 < str5) self.assertFalse(str1 <= str5) - def test_operators(self): - """make sure string addition and multiplication work""" - pass - def test_other_magics(self): """test other magically implemented features, like len() and iter()""" - pass + str1 = _FakeString("fake string") + str2 = _FakeString("") + expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] + + self.assertTrue(str1) + self.assertFalse(str2) + self.assertEquals(11, len(str1)) + self.assertEquals(0, len(str2)) + + out = [] + for ch in str1: + out.append(ch) + self.assertEquals(expected, out) + + out = [] + for ch in str2: + out.append(ch) + self.assertEquals([], out) + + gen1 = iter(str1) + gen2 = iter(str2) + self.assertIsInstance(gen1, GeneratorType) + self.assertIsInstance(gen2, GeneratorType) + + out = [] + for i in range(len(str1)): + out.append(gen1.next()) + self.assertRaises(StopIteration, gen1.next) + self.assertEquals(expected, out) + self.assertRaises(StopIteration, gen2.next) + + self.assertEquals("f", str1[0]) + self.assertEquals(" ", str1[4]) + self.assertEquals("g", str1[10]) + self.assertEquals("n", str1[-2]) + self.assertRaises(IndexError, lambda: str1[11]) + self.assertRaises(IndexError, lambda: str2[0]) + + self.assertTrue("k" in str1) + self.assertTrue("fake" in str1) + self.assertTrue("str" in str1) + self.assertTrue("" in str1) + self.assertTrue("" in str2) + self.assertFalse("real" in str1) + self.assertFalse("s" in str2) def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" From e2fe0120ea128ac6df646e09b25468507e3f2aec Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Feb 2013 10:56:57 -0500 Subject: [PATCH 17/67] Some tests for test_other_methods() --- tests/test_string_mixin.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 0e60309..74da9ff 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -157,7 +157,43 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" - pass + fstr = _FakeString("fake string") + + self.assertEquals("Fake string", fstr.capitalize()) + + self.assertEquals(" fake string ", fstr.center(15)) + self.assertEquals(" fake string ", fstr.center(16)) + self.assertEquals("qqfake stringqq", fstr.center(15, "q")) + + self.assertEquals(1, fstr.count("e")) + self.assertEquals(0, fstr.count("z")) + self.assertEquals(1, fstr.count("r", 7)) + self.assertEquals(0, fstr.count("r", 8)) + self.assertEquals(1, fstr.count("r", 5, 9)) + self.assertEquals(0, fstr.count("r", 5, 7)) + + if not py3k: + self.assertEquals(fstr, fstr.decode()) + self.assertEquals("𐌲𐌿𐍄", '\\U00010332\\U0001033f\\U00010344'.decode("unicode_escape")) + + self.assertEquals(b"fake string", fstr.encode()) + self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + "𐌲𐌿𐍄".encode("utf8")) + self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode) + self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii") + self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii", "strict") + self.assertEquals("", "𐌲𐌿𐍄".encode("ascii", "ignore")) + + self.assertTrue(fstr.endswith("ing")) + self.assertFalse(fstr.endswith("ingh")) + + methods = [ + "expandtabs", "find", "format", "index", "isalnum", "isalpha", + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lstrip", "partition", + "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", + "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", + "title", "translate", "upper", "zfill"] if __name__ == "__main__": unittest.main(verbosity=2) From 9a87329d690db98bd3594fb122f43de849e3c8b1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 28 Feb 2013 10:58:19 -0500 Subject: [PATCH 18/67] More tests for test_other_methods() --- mwparserfromhell/string_mixin.py | 7 ++++--- tests/test_string_mixin.py | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index ac47251..6490051 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -179,9 +179,10 @@ class StringMixIn(object): def isalpha(self): return self.__unicode__().isalpha() - @inheritdoc - def isdecimal(self): - return self.__unicode__().isdecimal() + if py3k: + @inheritdoc + def isdecimal(self): + return self.__unicode__().isdecimal() @inheritdoc def isdigit(self): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 74da9ff..4e4fa68 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -174,7 +174,11 @@ class TestStringMixIn(unittest.TestCase): if not py3k: self.assertEquals(fstr, fstr.decode()) - self.assertEquals("𐌲𐌿𐍄", '\\U00010332\\U0001033f\\U00010344'.decode("unicode_escape")) + actual = '\\U00010332\\U0001033f\\U00010344' + self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertEquals("𐌲", '\\U00010332'.decode("unicode_escape")) + self.assertRaises(UnicodeError, "fo".decode, "punycode") + self.assertEquals("", "fo".decode("punycode", "ignore")) self.assertEquals(b"fake string", fstr.encode()) self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", @@ -187,8 +191,40 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(fstr.endswith("ing")) self.assertFalse(fstr.endswith("ingh")) + self.assertEquals("fake string", fstr) + self.assertEquals(" foobar", "\tfoobar".expandtabs()) + self.assertEquals(" foobar", "\tfoobar".expandtabs(4)) + + self.assertEquals(3, fstr.find("e")) + self.assertEquals(-1, fstr.find("z")) + self.assertEquals(7, fstr.find("r", 7)) + self.assertEquals(-1, fstr.find("r", 8)) + self.assertEquals(7, fstr.find("r", 5, 9)) + self.assertEquals(-1, fstr.find("r", 5, 7)) + + self.assertEquals("fake string", fstr.format()) + self.assertEquals("foobarbaz", "foo{0}baz".format("bar")) + self.assertEquals("foobarbaz", "foo{abc}baz".format(abc="bar")) + self.assertEquals("foobarbazbuzz", + "foo{0}{abc}buzz".format("bar", abc="baz")) + self.assertRaises(IndexError, "{0}{1}".format, "abc") + + self.assertEquals(3, fstr.index("e")) + self.assertRaises(ValueError, fstr.index, "z") + self.assertEquals(7, fstr.index("r", 7)) + self.assertRaises(ValueError, fstr.index, "r", 8) + self.assertEquals(7, fstr.index("r", 5, 9)) + self.assertRaises(ValueError, fstr.index, "r", 5, 7) + + self.assertTrue("foobar".isalnum()) + self.assertTrue("foobar123".isalnum()) + self.assertFalse("foo bar".isalnum()) + + self.assertTrue("foobar".isalpha()) + self.assertFalse("foobar123".isalpha()) + self.assertFalse("foo bar".isalpha()) + methods = [ - "expandtabs", "find", "format", "index", "isalnum", "isalpha", "isdecimal", "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper", "join", "ljust", "lstrip", "partition", "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", From 5a0a00ba98f0edde985239cc4717e70c0d37c618 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Mar 2013 20:29:34 -0500 Subject: [PATCH 19/67] Change the way verify_safe() handles template params (#25). - Newlines are now allowed in template param names. - Changes also affect handling of arguments like {{{foo}}}. - Update unit tests: remove some unnecessary ones, and add some to cover the changes. - Update StringMixIn tests to actually work for some of the methods. - Update copyright notices for the C extensions. --- mwparserfromhell/parser/tokenizer.c | 2 +- mwparserfromhell/parser/tokenizer.h | 2 +- mwparserfromhell/parser/tokenizer.py | 70 ++--- tests/test_string_mixin.py | 166 ++++++----- tests/tokenizer/templates.mwtest | 540 +++++++---------------------------- 5 files changed, 235 insertions(+), 545 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index d82b080..6716698 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index af86321..8d51013 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -1,6 +1,6 @@ /* Tokenizer Header File for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a365db8..67638ca 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -396,34 +396,42 @@ class Tokenizer(object): """Make sure we are not trying to write an invalid character.""" context = self._context if context & contexts.FAIL_NEXT: - self._fail_route() + return False if context & contexts.WIKILINK_TITLE: if this == "]" or this == "{": self._context |= contexts.FAIL_NEXT elif this == "\n" or this == "[" or this == "}": - self._fail_route() - return + return False + return True if context & contexts.TEMPLATE_NAME: if this == "{" or this == "}" or this == "[": self._context |= contexts.FAIL_NEXT - return + return True if this == "]": - self._fail_route() - return + return False if this == "|": - return - elif context & (contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME): + return True + if context & contexts.HAS_TEXT: + if context & contexts.FAIL_ON_TEXT: + if this is self.END or not this.isspace(): + return False + else: + if this == "\n": + self._context |= contexts.FAIL_ON_TEXT + elif this is not self.END or not this.isspace(): + self._context |= contexts.HAS_TEXT + return True + else: if context & contexts.FAIL_ON_EQUALS: if this == "=": - self._fail_route() - return + return False elif context & contexts.FAIL_ON_LBRACE: - if this == "{": + if this == "{" or (self._read(-1) == self._read(-2) == "{"): if context & contexts.TEMPLATE: self._context |= contexts.FAIL_ON_EQUALS else: self._context |= contexts.FAIL_NEXT - return + return True self._context ^= contexts.FAIL_ON_LBRACE elif context & contexts.FAIL_ON_RBRACE: if this == "}": @@ -431,40 +439,13 @@ class Tokenizer(object): self._context |= contexts.FAIL_ON_EQUALS else: self._context |= contexts.FAIL_NEXT - return + return True self._context ^= contexts.FAIL_ON_RBRACE elif this == "{": self._context |= contexts.FAIL_ON_LBRACE elif this == "}": self._context |= contexts.FAIL_ON_RBRACE - if context & contexts.HAS_TEXT: - if context & contexts.FAIL_ON_TEXT: - if this is self.END or not this.isspace(): - if context & contexts.TEMPLATE_PARAM_KEY: - self._context ^= contexts.FAIL_ON_TEXT - self._context |= contexts.FAIL_ON_EQUALS - else: - self._fail_route() - return - else: - if this == "\n": - self._context |= contexts.FAIL_ON_TEXT - elif this is self.END or not this.isspace(): - self._context |= contexts.HAS_TEXT - - def _reset_safety_checks(self): - """Unset any safety-checking contexts set by Tokenizer_verify_safe(). - - Used when we preserve a context but previous data becomes invalid, like - when moving between template parameters. - """ - context = self._context - checks = (contexts.HAS_TEXT, contexts.FAIL_ON_TEXT, contexts.FAIL_NEXT, - contexts.FAIL_ON_LBRACE, contexts.FAIL_ON_RBRACE, - contexts.FAIL_ON_EQUALS) - for check in checks: - if context & check: - self._context ^= check; + return True def _parse(self, context=0): """Parse the wikicode string, using *context* for when to stop.""" @@ -474,12 +455,10 @@ class Tokenizer(object): unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) if self._context & unsafe: - try: - self._verify_safe(this) - except BadRoute: + if not self._verify_safe(this): if self._context & contexts.TEMPLATE_PARAM_KEY: self._pop() - raise + self._fail_route() if this not in self.MARKERS: self._write_text(this) self._head += 1 @@ -504,7 +483,6 @@ class Tokenizer(object): if self._context & contexts.FAIL_NEXT: self._context ^= contexts.FAIL_NEXT elif this == "|" and self._context & contexts.TEMPLATE: - self._reset_safety_checks() self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 4e4fa68..43a9e9a 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -42,12 +42,14 @@ class TestStringMixIn(unittest.TestCase): methods = [ "capitalize", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "index", "isalnum", "isalpha", - "isdecimal", "isdigit", "islower", "isnumeric", "isspace", - "istitle", "isupper", "join", "ljust", "lstrip", "partition", - "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", - "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", - "title", "translate", "upper", "zfill"] - if not py3k: + "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper", + "join", "ljust", "lstrip", "partition", "replace", "rfind", + "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", + "splitlines", "startswith", "strip", "swapcase", "title", + "translate", "upper", "zfill"] + if py3k: + methods.append("isdecimal") + else: methods.append("decode") for meth in methods: expected = getattr(str, meth).__doc__ @@ -157,75 +159,107 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" - fstr = _FakeString("fake string") - - self.assertEquals("Fake string", fstr.capitalize()) + str1 = _FakeString("fake string") + self.assertEquals("Fake string", str1.capitalize()) - self.assertEquals(" fake string ", fstr.center(15)) - self.assertEquals(" fake string ", fstr.center(16)) - self.assertEquals("qqfake stringqq", fstr.center(15, "q")) + self.assertEquals(" fake string ", str1.center(15)) + self.assertEquals(" fake string ", str1.center(16)) + self.assertEquals("qqfake stringqq", str1.center(15, "q")) - self.assertEquals(1, fstr.count("e")) - self.assertEquals(0, fstr.count("z")) - self.assertEquals(1, fstr.count("r", 7)) - self.assertEquals(0, fstr.count("r", 8)) - self.assertEquals(1, fstr.count("r", 5, 9)) - self.assertEquals(0, fstr.count("r", 5, 7)) + self.assertEquals(1, str1.count("e")) + self.assertEquals(0, str1.count("z")) + self.assertEquals(1, str1.count("r", 7)) + self.assertEquals(0, str1.count("r", 8)) + self.assertEquals(1, str1.count("r", 5, 9)) + self.assertEquals(0, str1.count("r", 5, 7)) if not py3k: - self.assertEquals(fstr, fstr.decode()) - actual = '\\U00010332\\U0001033f\\U00010344' + str2 = _FakeString("fo") + self.assertEquals(str1, str1.decode()) + actual = _FakeString("\\U00010332\\U0001033f\\U00010344") self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) - self.assertEquals("𐌲", '\\U00010332'.decode("unicode_escape")) - self.assertRaises(UnicodeError, "fo".decode, "punycode") - self.assertEquals("", "fo".decode("punycode", "ignore")) + self.assertRaises(UnicodeError, str2.decode, "punycode") + self.assertEquals("", str2.decode("punycode", "ignore")) - self.assertEquals(b"fake string", fstr.encode()) + str3 = _FakeString("𐌲𐌿𐍄") + self.assertEquals(b"fake string", str1.encode()) self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - "𐌲𐌿𐍄".encode("utf8")) - self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode) - self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii") - self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii", "strict") - self.assertEquals("", "𐌲𐌿𐍄".encode("ascii", "ignore")) - - self.assertTrue(fstr.endswith("ing")) - self.assertFalse(fstr.endswith("ingh")) - - self.assertEquals("fake string", fstr) - self.assertEquals(" foobar", "\tfoobar".expandtabs()) - self.assertEquals(" foobar", "\tfoobar".expandtabs(4)) - - self.assertEquals(3, fstr.find("e")) - self.assertEquals(-1, fstr.find("z")) - self.assertEquals(7, fstr.find("r", 7)) - self.assertEquals(-1, fstr.find("r", 8)) - self.assertEquals(7, fstr.find("r", 5, 9)) - self.assertEquals(-1, fstr.find("r", 5, 7)) - - self.assertEquals("fake string", fstr.format()) - self.assertEquals("foobarbaz", "foo{0}baz".format("bar")) - self.assertEquals("foobarbaz", "foo{abc}baz".format(abc="bar")) - self.assertEquals("foobarbazbuzz", - "foo{0}{abc}buzz".format("bar", abc="baz")) - self.assertRaises(IndexError, "{0}{1}".format, "abc") - - self.assertEquals(3, fstr.index("e")) - self.assertRaises(ValueError, fstr.index, "z") - self.assertEquals(7, fstr.index("r", 7)) - self.assertRaises(ValueError, fstr.index, "r", 8) - self.assertEquals(7, fstr.index("r", 5, 9)) - self.assertRaises(ValueError, fstr.index, "r", 5, 7) - - self.assertTrue("foobar".isalnum()) - self.assertTrue("foobar123".isalnum()) - self.assertFalse("foo bar".isalnum()) - - self.assertTrue("foobar".isalpha()) - self.assertFalse("foobar123".isalpha()) - self.assertFalse("foo bar".isalpha()) + str3.encode("utf8")) + self.assertRaises(UnicodeEncodeError, str3.encode) + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") + self.assertEquals("", str3.encode("ascii", "ignore")) + + self.assertTrue(str1.endswith("ing")) + self.assertFalse(str1.endswith("ingh")) + + str4 = _FakeString("\tfoobar") + self.assertEquals("fake string", str1) + self.assertEquals(" foobar", str4.expandtabs()) + self.assertEquals(" foobar", str4.expandtabs(4)) + + self.assertEquals(3, str1.find("e")) + self.assertEquals(-1, str1.find("z")) + self.assertEquals(7, str1.find("r", 7)) + self.assertEquals(-1, str1.find("r", 8)) + self.assertEquals(7, str1.find("r", 5, 9)) + self.assertEquals(-1, str1.find("r", 5, 7)) + + str5 = _FakeString("foo{0}baz") + str6 = _FakeString("foo{abc}baz") + str7 = _FakeString("foo{0}{abc}buzz") + str8 = _FakeString("{0}{1}") + self.assertEquals("fake string", str1.format()) + self.assertEquals("foobarbaz", str5.format("bar")) + self.assertEquals("foobarbaz", str6.format(abc="bar")) + self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertRaises(IndexError, str8.format, "abc") + + self.assertEquals(3, str1.index("e")) + self.assertRaises(ValueError, str1.index, "z") + self.assertEquals(7, str1.index("r", 7)) + self.assertRaises(ValueError, str1.index, "r", 8) + self.assertEquals(7, str1.index("r", 5, 9)) + self.assertRaises(ValueError, str1.index, "r", 5, 7) + + str9 = _FakeString("foobar") + str10 = _FakeString("foobar123") + str11 = _FakeString("foo bar") + self.assertTrue(str9.isalnum()) + self.assertTrue(str10.isalnum()) + self.assertFalse(str11.isalnum()) + + self.assertTrue(str9.isalpha()) + self.assertFalse(str10.isalpha()) + self.assertFalse(str11.isalpha()) + + str12 = _FakeString("123") + str13 = _FakeString("\u2155") + str14 = _FakeString("\u00B2") + if py3k: + self.assertFalse(str9.isdecimal()) + self.assertTrue(str12.isdecimal()) + self.assertFalse(str13.isdecimal()) + self.assertFalse(str14.isdecimal()) + + self.assertFalse(str9.isdigit()) + self.assertTrue(str12.isdigit()) + self.assertFalse(str13.isdigit()) + self.assertTrue(str14.isdigit()) + + str15 = _FakeString("") + str16 = _FakeString("FooBar") + self.assertTrue(str9.islower()) + self.assertFalse(str15.islower()) + self.assertFalse(str16.islower()) + + self.assertFalse(str9.isnumeric()) + self.assertTrue(str12.isnumeric()) + self.assertTrue(str13.isnumeric()) + self.assertTrue(str14.isnumeric()) methods = [ - "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "isspace", "istitle", "isupper", "join", "ljust", "lstrip", "partition", "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index d699ef2..fa3c0a4 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -215,521 +215,150 @@ output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(te --- -name: newline_start +name: newlines_start label: a newline at the start of a template name input: "{{\nfoobar}}" output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] --- -name: newline_end +name: newlines_end label: a newline at the end of a template name input: "{{foobar\n}}" output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] --- -name: newline_start_end +name: newlines_start_end label: a newline at the start and end of a template name input: "{{\nfoobar\n}}" output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] --- -name: newline_mid +name: newlines_mid label: a newline at the middle of a template name input: "{{foo\nbar}}" output: [Text(text="{{foo\nbar}}")] --- -name: newline_start_mid +name: newlines_start_mid label: a newline at the start and middle of a template name input: "{{\nfoo\nbar}}" output: [Text(text="{{\nfoo\nbar}}")] --- -name: newline_mid_end +name: newlines_mid_end label: a newline at the middle and end of a template name input: "{{foo\nbar\n}}" output: [Text(text="{{foo\nbar\n}}")] --- -name: newline_start_mid_end +name: newlines_start_mid_end label: a newline at the start, middle, and end of a template name input: "{{\nfoo\nbar\n}}" output: [Text(text="{{\nfoo\nbar\n}}")] --- -name: newline_unnamed_param_start -label: a newline at the start of an unnamed template parameter -input: "{{foo|\nbar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_unnamed_param_end -label: a newline at the end of an unnamed template parameter -input: "{{foo|bar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_unnamed_param_start_end -label: a newline at the start and end of an unnamed template parameter -input: "{{foo|\nbar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_unnamed_param_start_mid -label: a newline at the start and middle of an unnamed template parameter -input: "{{foo|\nb\nar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_unnamed_param_mid_end -label: a newline at the middle and end of an unnamed template parameter -input: "{{foo|b\nar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_unnamed_param_start_mid_end -label: a newline at the start, middle, and end of an unnamed template parameter +name: newlines_unnamed_param +label: newlines within an unnamed template parameter input: "{{foo|\nb\nar\n}}" output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] --- -name: newline_start_unnamed_param_start -label: a newline at the start of a template name and at the start of an unnamed template parameter -input: "{{\nfoo|\nbar}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_end -label: a newline at the start of a template name and at the end of an unnamed template parameter -input: "{{\nfoo|bar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_start_end -label: a newline at the start of a template name and at the start and end of an unnamed template parameter -input: "{{\nfoo|\nbar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_start_mid -label: a newline at the start of a template name and at the start and middle of an unnamed template parameter -input: "{{\nfoo|\nb\nar}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_mid_end -label: a newline at the start of a template name and at the middle and end of an unnamed template parameter -input: "{{\nfoo|b\nar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_start_mid_end -label: a newline at the start of a template name and at the start, middle, and end of an unnamed template parameter -input: "{{\nfoo|\nb\nar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start -label: a newline at the end of a template name and at the start of an unnamed template parameter -input: "{{foo\n|\nbar}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_end -label: a newline at the end of a template name and at the end of an unnamed template parameter -input: "{{foo\n|bar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start_end -label: a newline at the end of a template name and at the start and end of an unnamed template parameter -input: "{{foo\n|\nbar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start_mid -label: a newline at the end of a template name and at the start and middle of an unnamed template parameter -input: "{{foo\n|\nb\nar}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_mid_end -label: a newline at the end of a template name and at the middle and end of an unnamed template parameter -input: "{{foo\n|b\nar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start_mid_end -label: a newline at the end of a template name and at the start, middle, and end of an unnamed template parameter -input: "{{foo\n|\nb\nar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_end -label: a newline at the start and end of a template name and the start of an unnamed template parameter -input: "{{\nfoo\n|\nbar}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_end -label: a newline at the start and end of a template name and the end of an unnamed template parameter -input: "{{\nfoo\n|bar\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_start_end -label: a newline at the start and end of a template name and the start and end of an unnamed template parameter -input: "{{\nfoo\n|\nbar\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_start_mid -label: a newline at the start and end of a template name and the start and middle of an unnamed template parameter -input: "{{\nfoo\n|\nb\nar}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_mid_end -label: a newline at the start and end of a template name and the middle and end of an unnamed template parameter -input: "{{\nfoo\n|b\nar\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_start_mid_end -label: a newline at the start and end of a template name and the start, middle, and end of an unnamed template parameter +name: newlines_enclose_template_name_unnamed_param +label: newlines enclosing a template name and within an unnamed template parameter input: "{{\nfoo\n|\nb\nar\n}}" output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] --- -name: newline_mid_unnamed_param_start -label: a newline at the middle of a template name and at the start of an unnamed template parameter -input: "{{f\noo|\nbar}}" -output: [Text(text="{{f\noo|\nbar}}")] - ---- - -name: newline_start_mid_unnamed_param_start -label: a newline at the start and middle of a template name and at the start of an unnamed template parameter -input: "{{\nf\noo|\nbar}}" -output: [Text(text="{{\nf\noo|\nbar}}")] - ---- - -name: newline_start_end_unnamed_param_start -label: a newline at the middle and of a template name and at the start of an unnamed template parameter -input: "{{f\noo\n|\nbar}}" -output: [Text(text="{{f\noo\n|\nbar}}")] - ---- - -name: newline_start_mid_end_unnamed_param_start -label: a newline at the start, middle, and end of a template name and at the start of an unnamed template parameter -input: "{{\nf\noo\n|\nbar}}" -output: [Text(text="{{\nf\noo\n|\nbar}}")] - ---- - -name: newline_named_param_value_start -label: a newline at the start of a named parameter value -input: "{{foo|1=\nbar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_named_param_value_end -label: a newline at the end of a named parameter value -input: "{{foo|1=bar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_named_param_value_start_end -label: a newline at the start and end of a named parameter value -input: "{{foo|1=\nbar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_named_param_value_start_mid -label: a newline at the start and middle of a named parameter value -input: "{{foo|1=\nb\nar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_named_param_value_mid_end -label: a newline at the middle and end of a named parameter value -input: "{{foo|1=b\nar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_named_param_value_start_mid_end -label: a newline at the start, middle, and end of a named parameter value -input: "{{foo|1=\nb\nar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] - ---- - -name: newline_named_param_name_start -label: a newline at the start of a parameter name -input: "{{foo|\nbar=baz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] - ---- - -name: newline_named_param_name_end -label: a newline at the end of a parameter name -input: "{{foo|bar\n=baz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end -label: a newline at the start and end of a parameter name -input: "{{foo|\nbar\n=baz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] +name: newlines_within_template_name_unnamed_param +label: newlines within a template name and within an unnamed template parameter +input: "{{\nfo\no\n|\nb\nar\n}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")] --- -name: newline_named_param_name_mid -label: a newline at the middle of a parameter name -input: "{{foo|b\nar=baz}}" -output: [Text(text="{{foo|b\nar=baz}}")] +name: newlines_enclose_template_name_named_param_value +label: newlines enclosing a template name and within a named parameter value +input: "{{\nfoo\n|1=\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] --- -name: newline_named_param_name_start_mid -label: a newline at the start and middle of a parameter name -input: "{{foo|\nb\nar=baz}}" -output: [Text(text="{{foo|\nb\nar=baz}}")] +name: newlines_within_template_name_named_param_value +label: newlines within a template name and within a named parameter value +input: "{{\nf\noo\n|1=\nb\nar\n}}" +output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")] --- -name: newline_named_param_name_mid_end -label: a newline at the middle and end of a parameter name -input: "{{foo|b\nar\n=baz}}" -output: [Text(text="{{foo|b\nar\n=baz}}")] - ---- - -name: newline_named_param_name_start_mid_end -label: a newline at the start, middle, and end of a parameter name +name: newlines_named_param_name +label: newlines within a parameter name input: "{{foo|\nb\nar\n=baz}}" -output: [Text(text="{{foo|\nb\nar\n=baz}}")] - ---- - -name: newline_named_param_name_start_param_value_end -label: a newline at the start of a parameter name and the end of a parameter value -input: "{{foo|\nbar=baz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] - ---- - -name: newline_named_param_name_end_param_value_end -label: a newline at the end of a parameter name and the end of a parameter value -input: "{{foo|bar\n=baz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end_param_value_end -label: a newline at the start and end of a parameter name and the end of a parameter value -input: "{{foo|\nbar\n=baz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] - ---- - -name: newline_named_param_name_start_mid_param_value_end -label: a newline at the start and middle of a parameter name and the end of a parameter value -input: "{{foo|\nb\nar=baz\n}}" -output: [Text(text="{{foo|\nb\nar=baz\n}}")] - ---- - -name: newline_named_param_name_mid_end_param_value_end -label: a newline at the middle and end of a parameter name and the end of a parameter value -input: "{{foo|b\nar\n=baz\n}}" -output: [Text(text="{{foo|b\nar\n=baz\n}}")] - ---- - -name: newline_named_param_name_start_mid_end_param_value_end -label: a newline at the start, middle, and end of a parameter name and at the end of a parameter value -input: "{{foo|\nb\nar\n=baz\n}}" -output: [Text(text="{{foo|\nb\nar\n=baz\n}}")] - ---- - -name: newline_named_param_name_start_param_value_start -label: a newline at the start of a parameter name and at the start of a parameter value -input: "{{foo|\nbar=\nbaz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] - ---- - -name: newline_named_param_name_end_param_value_start -label: a newline at the end of a parameter name and at the start of a parameter value -input: "{{foo|bar\n=\nbaz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end_param_value_start -label: a newline at the start and end of a parameter name and at the start of a parameter value -input: "{{foo|\nbar\n=\nbaz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_mid_param_value_start -label: a newline at the start and middle of a parameter name and at the start of a parameter value -input: "{{foo|\nb\nar=\nbaz}}" -output: [Text(text="{{foo|\nb\nar=\nbaz}}")] - ---- - -name: newline_named_param_name_mid_end_param_value_start -label: a newline at the middle and end of a parameter name and at the start of a parameter value -input: "{{foo|b\nar\n=\nbaz}}" -output: [Text(text="{{foo|b\nar\n=\nbaz}}")] +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] --- -name: newline_named_param_name_start_mid_end_param_value_start -label: a newline at the start, middle, and end of a parameter name and at the start of a parameter value -input: "{{foo|\nb\nar\n=\nbaz}}" -output: [Text(text="{{foo|\nb\nar\n=\nbaz}}")] +name: newlines_named_param_name_param_value +label: newlines within a parameter name and within a parameter value +input: "{{foo|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] --- -name: newline_named_param_name_start_param_value_start_end -label: a newline at the start of a parameter name and at the start and end of a parameter value -input: "{{foo|\nbar=\nbaz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] +name: newlines_enclose_template_name_named_param_name +label: newlines enclosing a template name and within a parameter name +input: "{{\nfoo\n|\nb\nar\n=baz}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] --- -name: newline_named_param_name_end_param_value_start_end -label: a newline at the end of a parameter name and at the start and end of a parameter value -input: "{{foo|bar\n=\nbaz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] +name: newlines_enclose_template_name_named_param_name_param_value +label: newlines enclosing a template name and within a parameter name and within a parameter value +input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] --- -name: newline_named_param_name_start_end_param_value_start_end -label: a newline at the start and end of a parameter name and at the start and end of a parameter value -input: "{{foo|\nbar\n=\nbaz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] +name: newlines_within_template_name_named_param_name +label: newlines within a template name and within a parameter name +input: "{{\nfo\no\n|\nb\nar\n=baz}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")] --- -name: newline_named_param_name_start_mid_param_value_start_end -label: a newline at the start and middle of a parameter name and at the start and end of a parameter value -input: "{{foo|\nb\nar=\nbaz\n}}" -output: [Text(text="{{foo|\nb\nar=\nbaz\n}}")] +name: newlines_within_template_name_named_param_name_param_value +label: newlines within a template name and within a parameter name and within a parameter value +input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}" +output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")] --- -name: newline_named_param_name_mid_end_param_value_start_end -label: a newline at the middle and end of a parameter name and at the start and end of a parameter value -input: "{{foo|b\nar\n=\nbaz\n}}" -output: [Text(text="{{foo|b\nar\n=\nbaz\n}}")] - ---- - -name: newline_named_param_name_start_mid_end_param_value_start_end -label: a newline at the start, middle, and end of a parameter name and at the start and end of a parameter value -input: "{{foo|\nb\nar\n=\nbaz\n}}" -output: [Text(text="{{foo|\nb\nar\n=\nbaz\n}}")] - ---- - -name: newline_named_param_name_start_param_value_mid -label: a newline at the start of a parameter name and at the middle of a parameter value -input: "{{foo|\nbar=ba\nz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] - ---- - -name: newline_named_param_name_end_param_value_mid -label: a newline at the end of a parameter name and at the middle of a parameter value -input: "{{foo|bar\n=ba\nz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end_param_value_mid -label: a newline at the start and end of a parameter name and at the middle of a parameter value -input: "{{foo|\nbar\n=ba\nz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_mid_param_value_mid -label: a newline at the start and middle of a parameter name and at the middle of a parameter value -input: "{{foo|\nb\nar=ba\nz}}" -output: [Text(text="{{foo|\nb\nar=ba\nz}}")] - ---- - -name: newline_named_param_name_mid_end_param_value_mid -label: a newline at the middle and end of a parameter name and at the middle of a parameter value -input: "{{foo|b\nar\n=ba\nz}}" -output: [Text(text="{{foo|b\nar\n=ba\nz}}")] - ---- - -name: newline_named_param_start_mid_end_param_value_mid -label: a newline at the start, middle, and end of a parameter name and at the middle of a parameter value -input: "{{foo|\nb\nar\n=ba\nz}}" -output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")] +name: newlines_wildcard +label: a random, complex assortment of templates and newlines +input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] --- -name: newline_wildcard -label: a random, complex assortment of templates and newlines -input: "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] +name: newlines_wildcard_redux +label: an even more random and complex assortment of templates and newlines +input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] --- -name: newline_wildcard_redux -label: an even more random and complex assortment of templates and newlines -input: "{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nbaz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] +name: newlines_wildcard_redux_invalid +label: a variation of the newlines_wildcard_redux test that is invalid +input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")] --- @@ -812,8 +441,43 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text=" name: valid_param_name_brackets label: valid characters in unnamed template parameter: left and right brackets -input: "{{foo|ba[r]}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateClose()] +input: "{{foo|ba[r]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_left_brackets +label: valid characters in unnamed template parameter: double left brackets +input: "{{foo|bar[[in\nvalid=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_right_brackets +label: valid characters in unnamed template parameter: double right brackets +input: "{{foo|bar]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_brackets +label: valid characters in unnamed template parameter: double left and right brackets +input: "{{foo|bar[[in\nvalid]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: invalid_param_name_double_left_braces +label: invalid characters in template parameter name: double left braces +input: "{{foo|bar{{in\nvalid=baz}}" +output: [Text(text="{{foo|bar{{in\nvalid=baz}}")] + +--- + +name: invalid_param_name_double_braces +label: invalid characters in template parameter name: double left and right braces +input: "{{foo|bar{{in\nvalid}}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")] --- @@ -919,3 +583,17 @@ name: incomplete_two_named_params label: incomplete templates that should fail gracefully: two named parameters with values input: "{{stuff}} {{foo|bar=baz|biz=buzz" output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] + +--- + +name: incomplete_nested_template_as_unnamed_param +label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter +input: "{{stuff}} {{foo|{{bar}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()] + +--- + +name: incomplete_nested_template_as_param_value +label: incomplete templates that should fail gracefully: a valid nested template as a parameter value +input: "{{stuff}} {{foo|bar={{baz}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] From 718fcb24c86415a5ec4f597d63dbe71ce3a49fea Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 8 Mar 2013 23:08:49 -0500 Subject: [PATCH 20/67] Fix eight failing tests; all template parsing tests now passing (#25). --- mwparserfromhell/parser/tokenizer.c | 90 +++++++++++++------------------------ mwparserfromhell/parser/tokenizer.h | 3 +- 2 files changed, 31 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 6716698..3e6527a 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1135,48 +1135,59 @@ Tokenizer_parse_comment(Tokenizer* self) } /* - Make sure we are not trying to write an invalid character. + Make sure we are not trying to write an invalid character. Return 0 if + everything is safe, or -1 if the route must be failed. */ -static void +static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) { if (context & LC_FAIL_NEXT) { - Tokenizer_fail_route(self); - return; + return -1; } if (context & LC_WIKILINK_TITLE) { if (data == *"]" || data == *"{") self->topstack->context |= LC_FAIL_NEXT; else if (data == *"\n" || data == *"[" || data == *"}") - Tokenizer_fail_route(self); - return; + return -1; + return 0; } if (context & LC_TEMPLATE_NAME) { if (data == *"{" || data == *"}" || data == *"[") { self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } if (data == *"]") { - Tokenizer_fail_route(self); - return; + return -1; } if (data == *"|") - return; + return 0; + + if (context & LC_HAS_TEXT) { + if (context & LC_FAIL_ON_TEXT) { + if (!Py_UNICODE_ISSPACE(data)) + return -1; + } + else { + if (data == *"\n") + self->topstack->context |= LC_FAIL_ON_TEXT; + } + } + else if (!Py_UNICODE_ISSPACE(data)) + self->topstack->context |= LC_HAS_TEXT; } - else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) { + else { if (context & LC_FAIL_ON_EQUALS) { if (data == *"=") { - Tokenizer_fail_route(self); - return; + return -1; } } else if (context & LC_FAIL_ON_LBRACE) { - if (data == *"{") { + if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && Tokenizer_READ(self, -2) == *"{")) { if (context & LC_TEMPLATE) self->topstack->context |= LC_FAIL_ON_EQUALS; else self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } self->topstack->context ^= LC_FAIL_ON_LBRACE; } @@ -1186,7 +1197,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) self->topstack->context |= LC_FAIL_ON_EQUALS; else self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } self->topstack->context ^= LC_FAIL_ON_RBRACE; } @@ -1195,47 +1206,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) else if (data == *"}") self->topstack->context |= LC_FAIL_ON_RBRACE; } - if (context & LC_HAS_TEXT) { - if (context & LC_FAIL_ON_TEXT) { - if (!Py_UNICODE_ISSPACE(data)) { - if (context & LC_TEMPLATE_PARAM_KEY) { - self->topstack->context ^= LC_FAIL_ON_TEXT; - self->topstack->context |= LC_FAIL_ON_EQUALS; - } - else - Tokenizer_fail_route(self); - return; - } - } - else { - if (data == *"\n") - self->topstack->context |= LC_FAIL_ON_TEXT; - } - } - else if (!Py_UNICODE_ISSPACE(data)) - self->topstack->context |= LC_HAS_TEXT; -} - -/* - Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used - when we preserve a context but previous data becomes invalid, like when - moving between template parameters. -*/ -static void -Tokenizer_reset_safety_checks(Tokenizer* self) -{ - static int checks[] = { - LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE, - LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0}; - int context = self->topstack->context, i = 0, this; - while (1) { - this = checks[i]; - if (!this) - return; - if (context & this) - self->topstack->context ^= this; - i++; - } + return 0; } /* @@ -1258,12 +1229,12 @@ Tokenizer_parse(Tokenizer* self, int context) this = Tokenizer_READ(self, 0); this_context = self->topstack->context; if (this_context & unsafe_contexts) { - Tokenizer_verify_safe(self, this_context, this); - if (BAD_ROUTE) { + if (Tokenizer_verify_safe(self, this_context, this) < 0) { if (this_context & LC_TEMPLATE_PARAM_KEY) { trash = Tokenizer_pop(self); Py_XDECREF(trash); } + Tokenizer_fail_route(self); return NULL; } } @@ -1303,7 +1274,6 @@ Tokenizer_parse(Tokenizer* self, int context) self->topstack->context ^= LC_FAIL_NEXT; } else if (this == *"|" && this_context & LC_TEMPLATE) { - Tokenizer_reset_safety_checks(self); if (Tokenizer_handle_template_param(self)) return NULL; } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 8d51013..d4b42f5 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -206,8 +206,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*); static int Tokenizer_really_parse_entity(Tokenizer*); static int Tokenizer_parse_entity(Tokenizer*); static int Tokenizer_parse_comment(Tokenizer*); -static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); -static void Tokenizer_reset_safety_checks(Tokenizer*); +static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); static PyObject* Tokenizer_parse(Tokenizer*, int); static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); From 054a84afe09681974d8438c70c619b3a40be59ee Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 8 Mar 2013 23:17:08 -0500 Subject: [PATCH 21/67] A bit of misc cleanup. --- mwparserfromhell/parser/tokenizer.c | 40 ++++++++++++++++++++----------------- mwparserfromhell/parser/tokenizer.h | 4 ++++ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3e6527a..8c96500 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self) self->head++; } current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; - level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); + level = current > best ? (best > 6 ? 6 : best) : + (current > 6 ? 6 : current); after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); if (BAD_ROUTE) { RESET_ROUTE(); @@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self) else numeric = hexadecimal = 0; if (hexadecimal) - valid = "0123456789abcdefABCDEF"; + valid = HEXDIGITS; else if (numeric) - valid = "0123456789"; + valid = DIGITS; else - valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + valid = ALPHANUM; text = calloc(MAX_ENTITY_SIZE, sizeof(char)); if (!text) { PyErr_NoMemory(); @@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) i = 0; while (1) { def = entitydefs[i]; - if (!def) // We've reached the end of the def list without finding it + if (!def) // We've reached the end of the defs without finding it FAIL_ROUTE_AND_EXIT() if (strcmp(text, def) == 0) break; @@ -1161,7 +1162,6 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) } if (data == *"|") return 0; - if (context & LC_HAS_TEXT) { if (context & LC_FAIL_ON_TEXT) { if (!Py_UNICODE_ISSPACE(data)) @@ -1182,7 +1182,8 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) } } else if (context & LC_FAIL_ON_LBRACE) { - if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && Tokenizer_READ(self, -2) == *"{")) { + if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && + Tokenizer_READ(self, -2) == *"{")) { if (context & LC_TEMPLATE) self->topstack->context |= LC_FAIL_ON_EQUALS; else @@ -1375,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args) PyMODINIT_FUNC init_tokenizer(void) { - PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname; + PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals, + *fromlist, *modname; unsigned numdefs, i; char *name; @@ -1386,13 +1388,13 @@ init_tokenizer(void) Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); - tempmodule = PyImport_ImportModule("htmlentitydefs"); - if (!tempmodule) + tempmod = PyImport_ImportModule("htmlentitydefs"); + if (!tempmod) return; - defmap = PyObject_GetAttrString(tempmodule, "entitydefs"); + defmap = PyObject_GetAttrString(tempmod, "entitydefs"); if (!defmap) return; - Py_DECREF(tempmodule); + Py_DECREF(tempmod); deflist = PyDict_Keys(defmap); if (!deflist) return; @@ -1416,18 +1418,20 @@ init_tokenizer(void) if (!modname) return; PyList_SET_ITEM(fromlist, 0, modname); - tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); + tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); Py_DECREF(fromlist); - if (!tempmodule) + if (!tempmod) return; - tokens = PyObject_GetAttrString(tempmodule, "tokens"); - Py_DECREF(tempmodule); + tokens = PyObject_GetAttrString(tempmod, "tokens"); + Py_DECREF(tempmod); Text = PyObject_GetAttrString(tokens, "Text"); TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen"); - TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); - TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals"); + TemplateParamSeparator = PyObject_GetAttrString(tokens, + "TemplateParamSeparator"); + TemplateParamEquals = PyObject_GetAttrString(tokens, + "TemplateParamEquals"); TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose"); ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen"); diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index d4b42f5..693538c 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -36,6 +36,10 @@ SOFTWARE. #define malloc PyObject_Malloc #define free PyObject_Free +#define DIGITS "0123456789" +#define HEXDIGITS "0123456789abcdefABCDEF" +#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + static const char* MARKERS[] = { "{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", "!", "\n", ""}; From 57d48f1adb8969e6a210fff5c613831d5e70b5e7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Mar 2013 18:15:00 -0400 Subject: [PATCH 22/67] More tests for StringMixIns. --- tests/test_string_mixin.py | 78 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 43a9e9a..c0fe99d 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -258,12 +258,78 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str13.isnumeric()) self.assertTrue(str14.isnumeric()) - methods = [ - "isspace", - "istitle", "isupper", "join", "ljust", "lstrip", "partition", - "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", - "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", - "title", "translate", "upper", "zfill"] + str17 = _FakeString(" ") + str18 = _FakeString("\t \t \r\n") + self.assertFalse(str1.isspace()) + self.assertFalse(str9.isspace()) + self.assertTrue(str17.isspace()) + self.assertTrue(str18.isspace()) + + str19 = _FakeString("This Sentence Looks Like A Title") + str20 = _FakeString("This sentence doesn't LookLikeATitle") + self.assertFalse(str15.istitle()) + self.assertTrue(str19.istitle()) + self.assertFalse(str20.istitle()) + + str21 = _FakeString("FOOBAR") + self.assertFalse(str9.isupper()) + self.assertFalse(str15.isupper()) + self.assertTrue(str21.isupper()) + + self.assertEquals("foobar", str15.join(["foo", "bar"])) + self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + + self.assertEquals("fake string ", str1.ljust(15)) + self.assertEquals("fake string ", str1.ljust(16)) + self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + + # lstrip + + # partition + + # replace + + self.assertEquals(3, str1.rfind("e")) + self.assertEquals(-1, str1.rfind("z")) + self.assertEquals(7, str1.rfind("r", 7)) + self.assertEquals(-1, str1.rfind("r", 8)) + self.assertEquals(7, str1.rfind("r", 5, 9)) + self.assertEquals(-1, str1.rfind("r", 5, 7)) + + self.assertEquals(3, str1.rindex("e")) + self.assertRaises(ValueError, str1.rindex, "z") + self.assertEquals(7, str1.rindex("r", 7)) + self.assertRaises(ValueError, str1.rindex, "r", 8) + self.assertEquals(7, str1.rindex("r", 5, 9)) + self.assertRaises(ValueError, str1.rindex, "r", 5, 7) + + self.assertEquals(" fake string", str1.rjust(15)) + self.assertEquals(" fake string", str1.rjust(16)) + self.assertEquals("qqqqfake string", str1.rjust(15, "q")) + + # rpartition + + # rsplit + + # rstrip + + # split + + # splitlines + + # startswith + + # strip + + # swapcase + + # title + + # translate + + # upper + + # zfill if __name__ == "__main__": unittest.main(verbosity=2) From 6704bcd6621e73352b1a138c24d65e6c2ca09b56 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Mar 2013 19:39:56 -0400 Subject: [PATCH 23/67] Document the remaining universal methods. Still need to do some py3k-only ones. --- mwparserfromhell/string_mixin.py | 11 ++-- tests/test_string_mixin.py | 117 +++++++++++++++++++++++++++------------ 2 files changed, 89 insertions(+), 39 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 6490051..2539f74 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -179,10 +179,9 @@ class StringMixIn(object): def isalpha(self): return self.__unicode__().isalpha() - if py3k: - @inheritdoc - def isdecimal(self): - return self.__unicode__().isdecimal() + @inheritdoc + def isdecimal(self): + return self.__unicode__().isdecimal() @inheritdoc def isdigit(self): @@ -231,7 +230,9 @@ class StringMixIn(object): return self.__unicode__().partition(sep) @inheritdoc - def replace(self, old, new, count): + def replace(self, old, new, count=None): + if count is None: + return self.__unicode__().replace(old, new) return self.__unicode__().replace(old, new, count) @inheritdoc diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index c0fe99d..455f2e6 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -42,13 +42,13 @@ class TestStringMixIn(unittest.TestCase): methods = [ "capitalize", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "index", "isalnum", "isalpha", - "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper", - "join", "ljust", "lstrip", "partition", "replace", "rfind", - "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", - "splitlines", "startswith", "strip", "swapcase", "title", - "translate", "upper", "zfill"] + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lower", "lstrip", + "partition", "replace", "rfind", "rindex", "rjust", "rpartition", + "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", + "swapcase", "title", "translate", "upper", "zfill"] if py3k: - methods.append("isdecimal") + methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"]) else: methods.append("decode") for meth in methods: @@ -236,11 +236,10 @@ class TestStringMixIn(unittest.TestCase): str12 = _FakeString("123") str13 = _FakeString("\u2155") str14 = _FakeString("\u00B2") - if py3k: - self.assertFalse(str9.isdecimal()) - self.assertTrue(str12.isdecimal()) - self.assertFalse(str13.isdecimal()) - self.assertFalse(str14.isdecimal()) + self.assertFalse(str9.isdecimal()) + self.assertTrue(str12.isdecimal()) + self.assertFalse(str13.isdecimal()) + self.assertFalse(str14.isdecimal()) self.assertFalse(str9.isdigit()) self.assertTrue(str12.isdigit()) @@ -283,11 +282,20 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals("fake string ", str1.ljust(16)) self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) - # lstrip + self.assertEquals("", str15.lower()) + self.assertEquals("foobar", str16.lower()) + + str22 = _FakeString(" fake string ") + self.assertEquals("fake string", str1.lstrip()) + self.assertEquals("fake string ", str22.lstrip()) + self.assertEquals("ke string", str1.lstrip("abcdef")) - # partition + self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) + self.assertEquals(("fake string", "", ""), str1.partition("asdf")) - # replace + str23 = _FakeString("boo foo moo") + self.assertEquals("real string", str1.replace("fake", "real")) + self.assertEquals("bu fu moo", str23.replace("oo", "u", 2)) self.assertEquals(3, str1.rfind("e")) self.assertEquals(-1, str1.rfind("z")) @@ -307,29 +315,70 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(" fake string", str1.rjust(16)) self.assertEquals("qqqqfake string", str1.rjust(15, "q")) - # rpartition - - # rsplit - - # rstrip - - # split - - # splitlines + self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) + + str24 = _FakeString(" this is a sentence with whitespace ") + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEquals(actual, str24.rsplit()) + self.assertEquals(actual, str24.rsplit(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEquals(actual, str24.rsplit(" ")) + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEquals(actual, str24.rsplit(None, 3)) + actual = [" this is a sentence with", "", "whitespace", ""] + self.assertEquals(actual, str24.rsplit(" ", 3)) + + self.assertEquals("fake string", str1.rstrip()) + self.assertEquals(" fake string", str22.rstrip()) + self.assertEquals("fake stri", str1.rstrip("ngr")) + + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEquals(actual, str24.split()) + self.assertEquals(actual, str24.split(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEquals(actual, str24.split(" ")) + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEquals(actual, str24.split(None, 3)) + actual = ["", "", "", "this is a sentence with whitespace "] + self.assertEquals(actual, str24.split(" ", 3)) + + str25 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") + self.assertEquals(["lines", "of", "text", "are", "presented", "here"], + str25.splitlines()) + self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", + "presented\n", "here"], str25.splitlines(True)) + + self.assertTrue(str1.startswith("fake")) + self.assertFalse(str1.startswith("faker")) + + self.assertEquals("fake string", str1.strip()) + self.assertEquals("fake string", str22.strip()) + self.assertEquals("ke stri", str1.strip("abcdefngr")) + + self.assertEquals("fOObAR", str16.swapcase()) + + self.assertEquals("Fake String", str1.title()) - # startswith - - # strip - - # swapcase - - # title - - # translate + if py3k: + table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", + 117: "5"}) + table2 = str.maketrans("aeiou", "12345") + table3 = str.maketrans("aeiou", "12345", "rts") + self.assertEquals("f1k2 str3ng", str1.translate(table1)) + self.assertEquals("f1k2 str3ng", str1.translate(table2)) + self.assertEquals("f1k2 3ng", str1.translate(table3)) + else: + table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} + self.assertEquals("f1k2 str3ng", str1.translate(table)) - # upper + self.assertEquals("", str15.upper()) + self.assertEquals("FOOBAR", str16.upper()) - # zfill + self.assertEquals("123", str12.zfill(3)) + self.assertEquals("000123", str12.zfill(6)) if __name__ == "__main__": unittest.main(verbosity=2) From cf14b5ef4e02dadcba08083e62ceb800ec9edb6d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Mar 2013 19:55:25 -0400 Subject: [PATCH 24/67] Add some missing methods to StringMixIn. --- mwparserfromhell/string_mixin.py | 30 ++++++++++++++++++++++++++++++ tests/test_string_mixin.py | 3 ++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 2539f74..9e6d551 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -122,6 +122,11 @@ class StringMixIn(object): def capitalize(self): return self.__unicode__().capitalize() + if py3k: + @inheritdoc + def casefold(self): + return self.__unicode__().casefold() + @inheritdoc def center(self, width, fillchar=None): if fillchar is None: @@ -167,6 +172,11 @@ class StringMixIn(object): def format(self, *args, **kwargs): return self.__unicode__().format(*args, **kwargs) + if py3k: + @inheritdoc + def format_map(self, mapping): + return self.__unicode__().format_map(mapping) + @inheritdoc def index(self, sub, start=None, end=None): return self.__unicode__().index(sub, start, end) @@ -187,6 +197,11 @@ class StringMixIn(object): def isdigit(self): return self.__unicode__().isdigit() + if py3k: + @inheritdoc + def isidentifier(self): + return self.__unicode__().isidentifier() + @inheritdoc def islower(self): return self.__unicode__().islower() @@ -195,6 +210,11 @@ class StringMixIn(object): def isnumeric(self): return self.__unicode__().isnumeric() + if py3k: + @inheritdoc + def isprintable(self): + return self.__unicode__().isprintable() + @inheritdoc def isspace(self): return self.__unicode__().isspace() @@ -225,6 +245,16 @@ class StringMixIn(object): def lstrip(self, chars=None): return self.__unicode__().lstrip(chars) + if py3k: + @inheritdoc + @staticmethod + def maketrans(self, x, y=None, z=None): + if z is None: + if y is None: + return self.__unicode__.maketrans(x) + return self.__unicode__.maketrans(x, y) + return self.__unicode__.maketrans(x, y, z) + @inheritdoc def partition(self, sep): return self.__unicode__().partition(sep) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 455f2e6..cff3848 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -48,7 +48,8 @@ class TestStringMixIn(unittest.TestCase): "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", "title", "translate", "upper", "zfill"] if py3k: - methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"]) + methods.extend(["casefold", "format_map", "isidentifier", + "isprintable", "maketrans"]) else: methods.append("decode") for meth in methods: From 0af5894647c96d3d7cd2273aa2ddf30c864cdbca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 18 Mar 2013 02:44:25 -0400 Subject: [PATCH 25/67] Finish tests for py3k-only methods in TestStringMixIn. --- tests/test_string_mixin.py | 65 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index cff3848..bf49629 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -216,6 +216,11 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") + if py3k: + self.assertEquals("fake string", str1.format_map({})) + self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertRaises(ValueError, str5.format_map, {0: "abc"}) + self.assertEquals(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") self.assertEquals(7, str1.index("r", 7)) @@ -247,6 +252,12 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str13.isdigit()) self.assertTrue(str14.isdigit()) + if py3k: + self.assertTrue(str9.isidentifier()) + self.assertTrue(str10.isidentifier()) + self.assertFalse(str11.isidentifier()) + self.assertFalse(str12.isidentifier()) + str15 = _FakeString("") str16 = _FakeString("FooBar") self.assertTrue(str9.islower()) @@ -258,6 +269,14 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str13.isnumeric()) self.assertTrue(str14.isnumeric()) + if py3k: + str16B = _FakeString("\x01\x02") + self.assertTrue(str9.isprintable()) + self.assertTrue(str13.isprintable()) + self.assertTrue(str14.isprintable()) + self.assertTrue(str15.isprintable()) + self.assertFalse(str16B.isprintable()) + str17 = _FakeString(" ") str18 = _FakeString("\t \t \r\n") self.assertFalse(str1.isspace()) @@ -283,20 +302,26 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals("fake string ", str1.ljust(16)) self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + str22 = _FakeString("ß") self.assertEquals("", str15.lower()) self.assertEquals("foobar", str16.lower()) + self.assertEquals("ß", str22.lower()) + if py3k: + self.assertEquals("", str15.casefold()) + self.assertEquals("foobar", str16.casefold()) + self.assertEquals("ss", str22.casefold()) - str22 = _FakeString(" fake string ") + str23 = _FakeString(" fake string ") self.assertEquals("fake string", str1.lstrip()) - self.assertEquals("fake string ", str22.lstrip()) + self.assertEquals("fake string ", str23.lstrip()) self.assertEquals("ke string", str1.lstrip("abcdef")) self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) self.assertEquals(("fake string", "", ""), str1.partition("asdf")) - str23 = _FakeString("boo foo moo") + str24 = _FakeString("boo foo moo") self.assertEquals("real string", str1.replace("fake", "real")) - self.assertEquals("bu fu moo", str23.replace("oo", "u", 2)) + self.assertEquals("bu fu moo", str24.replace("oo", "u", 2)) self.assertEquals(3, str1.rfind("e")) self.assertEquals(-1, str1.rfind("z")) @@ -319,44 +344,44 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) - str24 = _FakeString(" this is a sentence with whitespace ") + str25 = _FakeString(" this is a sentence with whitespace ") actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str24.rsplit()) - self.assertEquals(actual, str24.rsplit(None)) + self.assertEquals(actual, str25.rsplit()) + self.assertEquals(actual, str25.rsplit(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str24.rsplit(" ")) + self.assertEquals(actual, str25.rsplit(" ")) actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str24.rsplit(None, 3)) + self.assertEquals(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] - self.assertEquals(actual, str24.rsplit(" ", 3)) + self.assertEquals(actual, str25.rsplit(" ", 3)) self.assertEquals("fake string", str1.rstrip()) - self.assertEquals(" fake string", str22.rstrip()) + self.assertEquals(" fake string", str23.rstrip()) self.assertEquals("fake stri", str1.rstrip("ngr")) actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str24.split()) - self.assertEquals(actual, str24.split(None)) + self.assertEquals(actual, str25.split()) + self.assertEquals(actual, str25.split(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str24.split(" ")) + self.assertEquals(actual, str25.split(" ")) actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEquals(actual, str24.split(None, 3)) + self.assertEquals(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] - self.assertEquals(actual, str24.split(" ", 3)) + self.assertEquals(actual, str25.split(" ", 3)) - str25 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") + str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") self.assertEquals(["lines", "of", "text", "are", "presented", "here"], - str25.splitlines()) + str26.splitlines()) self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", - "presented\n", "here"], str25.splitlines(True)) + "presented\n", "here"], str26.splitlines(True)) self.assertTrue(str1.startswith("fake")) self.assertFalse(str1.startswith("faker")) self.assertEquals("fake string", str1.strip()) - self.assertEquals("fake string", str22.strip()) + self.assertEquals("fake string", str23.strip()) self.assertEquals("ke stri", str1.strip("abcdefngr")) self.assertEquals("fOObAR", str16.swapcase()) From 88201ecb5425689fae53343899b3ee1cc89d77c4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 18 Mar 2013 03:21:36 -0400 Subject: [PATCH 26/67] Adding TestTokens. Add from __future__ import unicode_literals to a few files. --- tests/test_builder.py | 1 + tests/test_ctokenizer.py | 1 + tests/test_parser.py | 1 + tests/test_pytokenizer.py | 1 + tests/test_tokens.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 81 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e38e683..a3518fd 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest class TestBuilder(unittest.TestCase): diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 86f4787..07b5290 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest from _test_tokenizer import TokenizerTestCase diff --git a/tests/test_parser.py b/tests/test_parser.py index 3f9b2e6..5ea2b49 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest class TestParser(unittest.TestCase): diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 4254748..a2f2482 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest from _test_tokenizer import TokenizerTestCase diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 0e7f87b..5a18b8e 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -20,10 +20,86 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import py3k +from mwparserfromhell.parser import tokens + class TestTokens(unittest.TestCase): - pass + """Test cases for the Token class and its subclasses.""" + + def test_issubclass(self): + """check that all classes within the tokens module are really Tokens""" + for name in tokens.__all__: + klass = getattr(tokens, name) + self.assertTrue(issubclass(klass, tokens.Token)) + self.assertIsInstance(klass(), klass) + self.assertIsInstance(klass(), tokens.Token) + + def test_attributes(self): + """check that Token attributes can be managed properly""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + + self.assertEquals("bar", token2.foo) + self.assertEquals(123, token2.baz) + self.assertRaises(KeyError, lambda: token1.foo) + self.assertRaises(KeyError, lambda: token2.bar) + + token1.spam = "eggs" + token2.foo = "ham" + del token2.baz + + self.assertEquals("eggs", token1.spam) + self.assertEquals("ham", token2.foo) + self.assertRaises(KeyError, lambda: token2.baz) + self.assertRaises(KeyError, delattr, token2, "baz") + + def test_repr(self): + """check that repr() on a Token works as expected""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + token3 = tokens.Text(text="earwig" * 100) + hundredchars = ("earwig" * 100)[:97] + "..." + + self.assertEquals("Token()", repr(token1)) + if py3k: + token2repr = "Token(foo='bar', baz=123)" + token3repr = "Text(text='" + hundredchars + "')" + else: + token2repr = "Token(foo=u'bar', baz=123)" + token3repr = "Text(text=u'" + hundredchars + "')" + self.assertEquals(token2repr, repr(token2)) + self.assertEquals(token3repr, repr(token3)) + + def test_equality(self): + """check that equivalent tokens are considered equal""" + token1 = tokens.Token() + token2 = tokens.Token() + token3 = tokens.Token(foo="bar", baz=123) + token4 = tokens.Text(text="asdf") + token5 = tokens.Text(text="asdf") + token6 = tokens.TemplateOpen(text="asdf") + + self.assertEquals(token1, token2) + self.assertEquals(token2, token1) + self.assertEquals(token4, token5) + self.assertEquals(token5, token4) + self.assertNotEquals(token1, token3) + self.assertNotEquals(token2, token3) + self.assertNotEquals(token4, token6) + self.assertNotEquals(token5, token6) + + def test_repr_equality(self): + "check that eval(repr(token)) == token" + tests = [ + tokens.Token(), + tokens.Token(foo="bar", baz=123), + tokens.Text(text="earwig") + ] + for token in tests: + self.assertEquals(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": unittest.main(verbosity=2) From b9d2a83b8a7d187be92772af7510a15fdbd414cd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 19 Mar 2013 10:50:41 -0400 Subject: [PATCH 27/67] Starting TestSmartList. --- mwparserfromhell/smart_list.py | 3 +++ tests/test_smart_list.py | 42 ++++++++++++++++++++++++++++++++++++++++++ tests/test_string_mixin.py | 2 +- 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/test_smart_list.py diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 625307f..67d96be 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -361,3 +361,6 @@ class _ListProxy(list): else: item.sort() self._parent[self._start:self._stop:self._step] = item + + +del inheritdoc \ No newline at end of file diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py new file mode 100644 index 0000000..e22ad27 --- /dev/null +++ b/tests/test_smart_list.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.smart_list import SmartList, _ListProxy + +class TestSmartList(unittest.TestCase): + """Test cases for the SmartList class and its child, _ListProxy.""" + def test_docs(self): + """make sure the methods of SmartList/_ListProxy have docstrings""" + methods = ["append", "count", "extend", "index", "insert", "pop", + "remove", "reverse", "sort"] + for meth in methods: + expected = getattr(list, meth).__doc__ + smartlist_doc = getattr(SmartList, meth).__doc__ + listproxy_doc = getattr(_ListProxy, meth).__doc__ + self.assertEquals(expected, smartlist_doc) + self.assertEquals(expected, listproxy_doc) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index bf49629..28b30dd 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -38,7 +38,7 @@ class _FakeString(StringMixIn): class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" def test_docs(self): - """make sure the various functions of StringMixIn have docstrings""" + """make sure the various methods of StringMixIn have docstrings""" methods = [ "capitalize", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "index", "isalnum", "isalpha", From fe3328aa386c9212d19cebeb3a0c5e626c53b7fc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 22 Mar 2013 08:38:29 -0400 Subject: [PATCH 28/67] test_doctest() --- mwparserfromhell/smart_list.py | 2 +- tests/test_smart_list.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 67d96be..7c29c60 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -363,4 +363,4 @@ class _ListProxy(list): self._parent[self._start:self._stop:self._step] = item -del inheritdoc \ No newline at end of file +del inheritdoc diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index e22ad27..b83f4d3 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -38,5 +38,15 @@ class TestSmartList(unittest.TestCase): self.assertEquals(expected, smartlist_doc) self.assertEquals(expected, listproxy_doc) + def test_doctest(self): + """make sure a test embedded in SmartList's docstring passes""" + parent = SmartList([0, 1, 2, 3]) + self.assertEquals([0, 1, 2, 3], parent) + child = parent[2:] + self.assertEquals([2, 3], child) + child.append(4) + self.assertEquals([2, 3, 4], child) + self.assertEquals([0, 1, 2, 3, 4], parent) + if __name__ == "__main__": unittest.main(verbosity=2) From a3a35b1e73e7f4cfa84c449d7dfcc191105154f0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Mar 2013 11:29:20 -0400 Subject: [PATCH 29/67] Only compile Tokenizer on Python 2 for now. --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 445473e..8b4ae86 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools import setup, find_packages, Extension from mwparserfromhell import __version__ +from mwparserfromhell.compat import py3k with open("README.rst") as fp: long_docs = fp.read() @@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer", setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), - ext_modules = [tokenizer], + ext_modules = [] if py3k else [tokenizer], test_suite = "tests", version = __version__, author = "Ben Kurtovic", From ff51d7f5e59577fb99d03d7848b7091be1b82d80 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Mar 2013 15:12:36 -0400 Subject: [PATCH 30/67] Some tests for SmartLists; __reversed__ in StringMixIn --- mwparserfromhell/string_mixin.py | 3 + tests/test_smart_list.py | 164 ++++++++++++++++++++++++++++++++++++++- tests/test_string_mixin.py | 3 + 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 9e6d551..7d269f5 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -113,6 +113,9 @@ class StringMixIn(object): def __getitem__(self, key): return self.__unicode__()[key] + def __reversed__(self): + return reversed(self.__unicode__()) + def __contains__(self, item): if isinstance(item, StringMixIn): return str(item) in self.__unicode__() diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index b83f4d3..5fc26b3 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,6 +23,7 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import py3k from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): @@ -39,7 +40,7 @@ class TestSmartList(unittest.TestCase): self.assertEquals(expected, listproxy_doc) def test_doctest(self): - """make sure a test embedded in SmartList's docstring passes""" + """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) self.assertEquals([0, 1, 2, 3], parent) child = parent[2:] @@ -48,5 +49,166 @@ class TestSmartList(unittest.TestCase): self.assertEquals([2, 3, 4], child) self.assertEquals([0, 1, 2, 3, 4], parent) + def test_parent_magics(self): + """make sure magically implemented SmartList features work""" + # __getitem__ + # __setitem__ + # __delitem__ + # if not py3k: + # __getslice__ + # __setslice__ + # __delslice__ + # __add__ + # __radd__ + # __iadd__ + + def test_parent_unaffected_magics(self): + """sanity checks against SmartList features that were not modified""" + list1 = SmartList([0, 1, 2, 3, "one", "two"]) + list2 = SmartList([]) + list3 = SmartList([0, 2, 3, 4]) + list4 = SmartList([0, 1, 2]) + + if py3k: + self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + else: + self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + + self.assertTrue(list1 < list3) + self.assertTrue(list1 <= list3) + self.assertFalse(list1 == list3) + self.assertTrue(list1 != list3) + self.assertFalse(list1 > list3) + self.assertFalse(list1 >= list3) + + other1 = [0, 2, 3, 4] + self.assertTrue(list1 < other1) + self.assertTrue(list1 <= other1) + self.assertFalse(list1 == other1) + self.assertTrue(list1 != other1) + self.assertFalse(list1 > other1) + self.assertFalse(list1 >= other1) + + other2 = [0, 0, 1, 2] + self.assertFalse(list1 < other2) + self.assertFalse(list1 <= other2) + self.assertFalse(list1 == other2) + self.assertTrue(list1 != other2) + self.assertTrue(list1 > other2) + self.assertTrue(list1 >= other2) + + other3 = [0, 1, 2, 3, "one", "two"] + self.assertFalse(list1 < other3) + self.assertTrue(list1 <= other3) + self.assertTrue(list1 == other3) + self.assertFalse(list1 != other3) + self.assertFalse(list1 > other3) + self.assertTrue(list1 >= other3) + + self.assertTrue(bool(list1)) + self.assertFalse(bool(list2)) + + self.assertEquals(6, len(list1)) + self.assertEquals(0, len(list2)) + + out = [] + for obj in list1: + out.append(obj) + self.assertEquals([0, 1, 2, 3, "one", "two"], out) + + out = [] + for ch in list2: + out.append(ch) + self.assertEquals([], out) + + gen1 = iter(list1) + out = [] + for i in range(len(list1)): + out.append(gen1.next()) + self.assertRaises(StopIteration, gen1.next) + self.assertEquals([0, 1, 2, 3, "one", "two"], out) + gen2 = iter(list2) + self.assertRaises(StopIteration, gen2.next) + + self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEquals([], list(reversed(list2))) + + self.assertTrue("one" in list1) + self.assertTrue(3 in list1) + self.assertFalse(10 in list1) + self.assertFalse(0 in list2) + + self.assertEquals([], list2 * 5) + self.assertEquals([], 5 * list2) + self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + list4 *= 2 + self.assertEquals([0, 1, 2, 0, 1, 2], list4) + + def test_parent_methods(self): + # append + # count + # extend + # index + # insert + # pop + # remove + # reverse + # sort + + def test_child_magics(self): + # if py3k: + # __str__ + # __bytes__ + # else: + # __unicode__ + # __str__ + # __repr__ + # __lt__ + # __le__ + # __eq__ + # __ne__ + # __gt__ + # __ge__ + # if py3k: + # __bool__ + # else: + # __nonzero__ + # __len__ + # __getitem__ + # __setitem__ + # __delitem__ + # __iter__ + # __reversed__ + # __contains__ + # if not py3k: + # __getslice__ + # __setslice__ + # __delslice__ + # __add__ + # __radd__ + # __iadd__ + # __mul__ + # __rmul__ + # __imul__ + + def test_child_methods(self): + # append + # count + # extend + # index + # insert + # pop + # remove + # reverse + # sort + + def test_influence(self): + pass + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 28b30dd..0d95311 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -143,6 +143,9 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(expected, out) self.assertRaises(StopIteration, gen2.next) + self.assertEquals("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEquals([], list(reversed(str2))) + self.assertEquals("f", str1[0]) self.assertEquals(" ", str1[4]) self.assertEquals("g", str1[10]) From 65c3950e89bd3d5f60590707fc0aa4269c2b9612 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 12:01:43 -0400 Subject: [PATCH 31/67] Fix bug when an extended slice's stop argument is missing. --- mwparserfromhell/smart_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 7c29c60..47f7232 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,7 +76,7 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, key.stop or 0, key.step or 1] + sliceinfo = [key.start or 0, key.stop or maxsize, key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child From 10a7e5d2418e9d7afc652b03ef0686434bde8683 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 12:17:52 -0400 Subject: [PATCH 32/67] Fix the same bug in __setitem__ and __delitem__ --- mwparserfromhell/smart_list.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 47f7232..e42dbae 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -86,6 +86,7 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) + key = slice(key.start or 0, key.stop or maxsize) diff = len(item) - key.stop + key.start values = self._children.values if py3k else self._children.itervalues if diff: @@ -97,7 +98,9 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) - if not isinstance(key, slice): + if isinstance(key, slice): + key = slice(key.start or 0, key.stop or maxsize) + else: key = slice(key, key + 1) diff = key.stop - key.start values = self._children.values if py3k else self._children.itervalues From ce6929107edf88065a7dd96082c41ab59732984b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 12:50:10 -0400 Subject: [PATCH 33/67] Implementing test_parent_get_set_del(). --- tests/test_smart_list.py | 87 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 5fc26b3..71f428b 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -49,15 +49,79 @@ class TestSmartList(unittest.TestCase): self.assertEquals([2, 3, 4], child) self.assertEquals([0, 1, 2, 3, 4], parent) - def test_parent_magics(self): - """make sure magically implemented SmartList features work""" - # __getitem__ - # __setitem__ - # __delitem__ - # if not py3k: - # __getslice__ - # __setslice__ - # __delslice__ + def test_parent_get_set_del(self): + """make sure SmartList's getitem/setitem/delitem work""" + def assign(L, s1, s2, s3, val): + L[s1:s2:s3] = val + def delete(L, s1): + del L[s1] + + list1 = SmartList([0, 1, 2, 3, "one", "two"]) + list2 = SmartList(list(range(10))) + + self.assertEquals(1, list1[1]) + self.assertEquals("one", list1[-2]) + self.assertEquals([2, 3], list1[2:4]) + self.assertRaises(IndexError, lambda: list1[6]) + self.assertRaises(IndexError, lambda: list1[-7]) + + self.assertEquals([0, 1, 2], list1[:3]) + self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEquals([3, "one", "two"], list1[3:]) + self.assertEquals(["one", "two"], list1[-2:]) + self.assertEquals([0, 1], list1[:-4]) + self.assertEquals([], list1[6:]) + self.assertEquals([], list1[4:2]) + + self.assertEquals([0, 2, "one"], list1[0:5:2]) + self.assertEquals([0, 2], list1[0:-3:2]) + self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEquals([2, 3, "one", "two"], list1[2::]) + self.assertEquals([0, 1, 2, 3], list1[:4:]) + self.assertEquals([2, 3], list1[2:4:]) + self.assertEquals([0, 2, 4, 6, 8], list2[::2]) + self.assertEquals([2, 5, 8], list2[2::3]) + self.assertEquals([0, 3], list2[:6:3]) + self.assertEquals([2, 5, 8], list2[-8:9:3]) + self.assertEquals([], list2[100000:1000:-100]) + + list1[3] = 100 + self.assertEquals(100, list1[3]) + list1[5:] = [6, 7, 8] + self.assertEquals([6, 7, 8], list1[5:]) + self.assertEquals([0, 1, 2, 100, "one", 6, 7, 8], list1) + list1[2:4] = [-1, -2, -3, -4, -5] + self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + list1[0:-3] = [99] + self.assertEquals([99, 6, 7, 8], list1) + list2[0:6:2] = [100, 102, 104] + self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + list2[::3] = [200, 203, 206, 209] + self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + list2[::] = range(7) + self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) + self.assertRaises(ValueError, + lambda: assign(list2, 0, 5, 2, [100, 102, 104, 106])) + + del list2[2] + self.assertEquals([0, 1, 3, 4, 5, 6], list2) + del list2[-3] + self.assertEquals([0, 1, 3, 5, 6], list2) + self.assertRaises(IndexError, lambda: delete(list2, 100)) + self.assertRaises(IndexError, lambda: delete(list2, -6)) + list2[:] = range(10) + del list2[3:6] + self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) + del list2[-2:] + self.assertEquals([0, 1, 2, 6, 7], list2) + del list2[:2] + self.assertEquals([2, 6, 7], list2) + list2[:] = range(10) + del list2[2:8:2] + self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) + + def test_parent_add(self): + """make sure SmartList's add/radd/iadd work""" # __add__ # __radd__ # __iadd__ @@ -150,6 +214,7 @@ class TestSmartList(unittest.TestCase): self.assertEquals([0, 1, 2, 0, 1, 2], list4) def test_parent_methods(self): + pass # append # count # extend @@ -161,6 +226,7 @@ class TestSmartList(unittest.TestCase): # sort def test_child_magics(self): + pass # if py3k: # __str__ # __bytes__ @@ -197,6 +263,7 @@ class TestSmartList(unittest.TestCase): # __imul__ def test_child_methods(self): + pass # append # count # extend @@ -209,6 +276,8 @@ class TestSmartList(unittest.TestCase): def test_influence(self): pass + # test whether changes are propogated correctly + # also test whether children that exit scope are removed from parent's map if __name__ == "__main__": unittest.main(verbosity=2) From 67611bfb5bdbc2c445b264d48fea710d99ad56f7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 13:43:15 -0400 Subject: [PATCH 34/67] Implement test_parent_add(). --- tests/test_smart_list.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 71f428b..1b1c267 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k +from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): @@ -122,9 +122,15 @@ class TestSmartList(unittest.TestCase): def test_parent_add(self): """make sure SmartList's add/radd/iadd work""" - # __add__ - # __radd__ - # __iadd__ + list1 = SmartList(range(5)) + list2 = SmartList(range(5, 10)) + self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEquals([0, 1, 2, 3, 4], list1) + self.assertEquals(list(range(10)), list1 + list2) + self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEquals([0, 1, 2, 3, 4], list1) + list1 += ["foo", "bar", "baz"] + self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) def test_parent_unaffected_magics(self): """sanity checks against SmartList features that were not modified""" From fb92349909f302833ebcfe905578d1d6e75fd891 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 14:09:47 -0400 Subject: [PATCH 35/67] Fix parsing of arguments in SmartList.sort() --- mwparserfromhell/smart_list.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index e42dbae..b8d02d5 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -168,16 +168,14 @@ class SmartList(list): copy = list(self) for child in self._children: child._parent = copy + kwargs = {} if cmp is not None: - if key is not None: - if reverse is not None: - super(SmartList, self).sort(cmp, key, reverse) - else: - super(SmartList, self).sort(cmp, key) - else: - super(SmartList, self).sort(cmp) - else: - super(SmartList, self).sort() + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) class _ListProxy(list): From 986e3ed855971593d2ea6f68962fed4d1ca8d2ca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 14:41:31 -0400 Subject: [PATCH 36/67] Implement test_parent_methods(). --- tests/test_smart_list.py | 79 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 1b1c267..2fdfeff 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -100,15 +100,15 @@ class TestSmartList(unittest.TestCase): self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) list2[::] = range(7) self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) - self.assertRaises(ValueError, - lambda: assign(list2, 0, 5, 2, [100, 102, 104, 106])) + self.assertRaises(ValueError, assign, list2, 0, 5, 2, + [100, 102, 104, 106]) del list2[2] self.assertEquals([0, 1, 3, 4, 5, 6], list2) del list2[-3] self.assertEquals([0, 1, 3, 5, 6], list2) - self.assertRaises(IndexError, lambda: delete(list2, 100)) - self.assertRaises(IndexError, lambda: delete(list2, -6)) + self.assertRaises(IndexError, delete, list2, 100) + self.assertRaises(IndexError, delete, list2, -6) list2[:] = range(10) del list2[3:6] self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) @@ -220,16 +220,67 @@ class TestSmartList(unittest.TestCase): self.assertEquals([0, 1, 2, 0, 1, 2], list4) def test_parent_methods(self): - pass - # append - # count - # extend - # index - # insert - # pop - # remove - # reverse - # sort + """make sure SmartList's non-magic methods work, like append()""" + list1 = SmartList(range(5)) + list2 = SmartList(["foo"]) + list3 = SmartList([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEquals(0, list1.count(6)) + self.assertEquals(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEquals(1, list1.index(1)) + self.assertEquals(6, list1.index(1, 3)) + self.assertEquals(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(100, 8) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEquals(8, list1.pop()) + self.assertEquals(7, list1.pop()) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEquals(-1, list1.pop(0)) + self.assertEquals(5, list1.pop(5)) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6, 6.5], list1) + self.assertEquals("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEquals([], list2) + + list1.remove(6.5) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 1, 2, 5, 6], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 2, 5, 6], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEquals([6, 5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEquals([0, 2, 2, 3, 4, 5, 6], list1) + list1.sort(reverse=True) + self.assertEquals([6, 5, 4, 3, 2, 2, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 + self.assertEquals([3, 4, 2, 2, 5, 6, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) + self.assertEquals([6, 0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) def test_child_magics(self): pass From d85ff73c19e026fa209c252b4d96699bbeb75121 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 15:46:30 -0400 Subject: [PATCH 37/67] Squashing some sneaky bugs in SmartLists's children. --- mwparserfromhell/smart_list.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index b8d02d5..229500c 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,7 +76,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, key.stop or maxsize, key.step or 1] + sliceinfo = [key.start or 0, maxsize if key.stop is None else key.stop, + key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -86,12 +87,12 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - key = slice(key.start or 0, key.stop or maxsize) + key = slice(key.start or 0, maxsize if key.stop is None else key.stop) diff = len(item) - key.stop + key.start values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): - if start >= key.stop: + if start > key.stop: self._children[id(child)][1][0] += diff if stop >= key.stop and stop != maxsize: self._children[id(child)][1][1] += diff @@ -99,7 +100,8 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - key = slice(key.start or 0, key.stop or maxsize) + key = slice(key.start or 0, + maxsize if key.stop is None else key.stop) else: key = slice(key, key + 1) diff = key.stop - key.start @@ -107,7 +109,7 @@ class SmartList(list): for child, (start, stop, step) in values(): if start > key.start: self._children[id(child)][1][0] -= diff - if stop >= key.stop: + if stop >= key.stop and stop != maxsize: self._children[id(child)][1][1] -= diff if not py3k: @@ -296,6 +298,8 @@ class _ListProxy(list): @property def _stop(self): """The ending index of this list, exclusive.""" + if self._sliceinfo[1] == maxsize: + return len(self._parent) return self._sliceinfo[1] @property @@ -329,18 +333,25 @@ class _ListProxy(list): @inheritdoc def insert(self, index, item): + if index < 0: + index = len(self) + index self._parent.insert(self._start + index, item) @inheritdoc def pop(self, index=None): + length = len(self) if index is None: - index = len(self) - 1 + index = length - 1 + elif index < 0: + index = length + index + if index < 0 or index >= length: + raise IndexError("pop index out of range") return self._parent.pop(self._start + index) @inheritdoc def remove(self, item): index = self.index(item) - del self._parent[index] + del self._parent[self._start + index] @inheritdoc def reverse(self): @@ -351,16 +362,14 @@ class _ListProxy(list): @inheritdoc def sort(self, cmp=None, key=None, reverse=None): item = self._render() + kwargs = {} if cmp is not None: - if key is not None: - if reverse is not None: - item.sort(cmp, key, reverse) - else: - item.sort(cmp, key) - else: - item.sort(cmp) - else: - item.sort() + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) self._parent[self._start:self._stop:self._step] = item From b8e926a2569c7ec15001d19e767dd475a4f249e9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 15:47:21 -0400 Subject: [PATCH 38/67] Abstract out public list method tests; implement test_child_methods() --- tests/test_smart_list.py | 145 ++++++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 71 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 2fdfeff..44775b4 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -28,6 +28,71 @@ from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" + + def _test_list_methods(self, builder): + """Run tests on the public methods of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(["foo"]) + list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEquals(0, list1.count(6)) + self.assertEquals(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEquals(1, list1.index(1)) + self.assertEquals(6, list1.index(1, 3)) + self.assertEquals(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(13, 8) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEquals(8, list1.pop()) + self.assertEquals(7, list1.pop()) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEquals(-1, list1.pop(0)) + self.assertEquals(5, list1.pop(5)) + self.assertEquals(6.5, list1.pop(-1)) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEquals("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEquals([], list2) + + list1.remove(6) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEquals([5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEquals([0, 2, 2, 3, 4, 5], list1) + list1.sort(reverse=True) + self.assertEquals([5, 4, 3, 2, 2, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 + self.assertEquals([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) + self.assertEquals([0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + def test_docs(self): """make sure the methods of SmartList/_ListProxy have docstrings""" methods = ["append", "count", "extend", "index", "insert", "pop", @@ -221,68 +286,10 @@ class TestSmartList(unittest.TestCase): def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" - list1 = SmartList(range(5)) - list2 = SmartList(["foo"]) - list3 = SmartList([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) - - list1.append(5) - list1.append(1) - list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) - - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) - - list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) - self.assertRaises(ValueError, list1.index, 1, 3, 5) - - list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) - list1.insert(100, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6, 6.5], list1) - self.assertEquals("foo", list2.pop()) - self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) - - list1.remove(6.5) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5, 6], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5, 6], list1) - self.assertRaises(ValueError, list1.remove, 1) - - list1.reverse() - self.assertEquals([6, 5, 2, 4, 3, 2, 0], list1) - - list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5, 6], list1) - list1.sort(reverse=True) - self.assertEquals([6, 5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 6, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([6, 0, 5, 4, 2, 2, 3], list1) - list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) - list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + self._test_list_methods(lambda L: SmartList(L)) def test_child_magics(self): + """make sure _ListProxy's magically implemented features work""" pass # if py3k: # __str__ @@ -320,20 +327,16 @@ class TestSmartList(unittest.TestCase): # __imul__ def test_child_methods(self): - pass - # append - # count - # extend - # index - # insert - # pop - # remove - # reverse - # sort + """make sure _ListProxy's non-magic methods work, like append()""" + self._test_list_methods(lambda L: SmartList(list(L))[:]) + self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_list_methods(builder) def test_influence(self): + """make sure changes are propagated from parents to children""" pass - # test whether changes are propogated correctly # also test whether children that exit scope are removed from parent's map if __name__ == "__main__": From 49b9863b77e91e1199c8f036910b862b8fddf0fb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 15:56:38 -0400 Subject: [PATCH 39/67] Handle keyword arguments in some methods with py3k correctly. --- mwparserfromhell/string_mixin.py | 70 ++++++++++++++++++++++++++-------------- tests/test_string_mixin.py | 8 +++++ 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 7d269f5..efd28d8 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -143,19 +143,21 @@ class StringMixIn(object): if not py3k: @inheritdoc def decode(self, encoding=None, errors=None): - if errors is None: - if encoding is None: - return self.__unicode__().decode() - return self.__unicode__().decode(encoding) - return self.__unicode__().decode(encoding, errors) + kwargs = {} + if encoding is not None: + kwargs["encoding"] = encoding + if errors is not None: + kwargs["errors"] = errors + return self.__unicode__().decode(**kwargs) @inheritdoc def encode(self, encoding=None, errors=None): - if errors is None: - if encoding is None: - return self.__unicode__().encode() - return self.__unicode__().encode(encoding) - return self.__unicode__().encode(encoding, errors) + kwargs = {} + if encoding is not None: + kwargs["encoding"] = encoding + if errors is not None: + kwargs["errors"] = errors + return self.__unicode__().encode(**kwargs) @inheritdoc def endswith(self, prefix, start=None, end=None): @@ -286,25 +288,45 @@ class StringMixIn(object): def rpartition(self, sep): return self.__unicode__().rpartition(sep) - @inheritdoc - def rsplit(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().rsplit() - return self.__unicode__().rsplit(sep) - return self.__unicode__().rsplit(sep, maxsplit) + if py3k: + @inheritdoc + def rsplit(self, sep=None, maxsplit=None): + kwargs = {} + if sep is not None: + kwargs["sep"] = sep + if maxsplit is not None: + kwargs["maxsplit"] = maxsplit + return self.__unicode__().rsplit(**kwargs) + else: + @inheritdoc + def rsplit(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().rsplit() + return self.__unicode__().rsplit(sep) + return self.__unicode__().rsplit(sep, maxsplit) @inheritdoc def rstrip(self, chars=None): return self.__unicode__().rstrip(chars) - @inheritdoc - def split(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().split() - return self.__unicode__().split(sep) - return self.__unicode__().split(sep, maxsplit) + if py3k: + @inheritdoc + def split(self, sep=None, maxsplit=None): + kwargs = {} + if sep is not None: + kwargs["sep"] = sep + if maxsplit is not None: + kwargs["maxsplit"] = maxsplit + return self.__unicode__().split(**kwargs) + else: + @inheritdoc + def split(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().split() + return self.__unicode__().split(sep) + return self.__unicode__().split(sep, maxsplit) @inheritdoc def splitlines(self, keepends=None): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 0d95311..8d86c8e 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -189,10 +189,14 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(b"fake string", str1.encode()) self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode("utf8")) + self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + str3.encode(encoding="utf8")) self.assertRaises(UnicodeEncodeError, str3.encode) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") self.assertEquals("", str3.encode("ascii", "ignore")) + self.assertEquals("", str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) @@ -358,6 +362,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEquals(actual, str25.rsplit(" ", 3)) + if py3k: + self.assertEquals(actual, str25.rsplit(maxsplit=3)) self.assertEquals("fake string", str1.rstrip()) self.assertEquals(" fake string", str23.rstrip()) @@ -373,6 +379,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEquals(actual, str25.split(" ", 3)) + if py3k: + self.assertEquals(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") self.assertEquals(["lines", "of", "text", "are", "presented", "here"], From 740db6ddfa86c3c52776ea57503ef9254f2bbd7a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 16:42:37 -0400 Subject: [PATCH 40/67] Implement some more tests; squash bugs in SmartList/_ListProxy --- mwparserfromhell/smart_list.py | 44 +++++-- tests/test_smart_list.py | 274 ++++++++++++++++++++--------------------- 2 files changed, 173 insertions(+), 145 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 229500c..062e9ad 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,8 +76,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, maxsize if key.stop is None else key.stop, - key.step or 1] + keystop = maxsize if key.stop is None else key.stop + sliceinfo = [key.start or 0, keystop, key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -100,8 +100,8 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - key = slice(key.start or 0, - maxsize if key.stop is None else key.stop) + keystop = maxsize if key.stop is None else key.stop + key = slice(key.start or 0, keystop) else: key = slice(key, key + 1) diff = key.stop - key.start @@ -241,18 +241,36 @@ class _ListProxy(list): def __setitem__(self, key, item): if isinstance(key, slice): - adjusted = slice(key.start + self._start, key.stop + self._stop, - key.step) + keystart = (key.start or 0) + self._start + if key.stop is None or key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(keystart, keystop, key.step) self._parent[adjusted] = item else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") self._parent[self._start + key] = item def __delitem__(self, key): if isinstance(key, slice): - adjusted = slice(key.start + self._start, key.stop + self._stop, - key.step) + keystart = (key.start or 0) + self._start + if key.stop is None or key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(keystart, keystop, key.step) del self._parent[adjusted] else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") del self._parent[self._start + key] def __iter__(self): @@ -290,6 +308,16 @@ class _ListProxy(list): self.extend(other) return self + def __mul__(self, other): + return SmartList(list(self) * other) + + def __rmul__(self, other): + return SmartList(other * list(self)) + + def __imul__(self, other): + self.extend(list(self) * (other - 1)) + return self + @property def _start(self): """The starting index of this list, inclusive.""" diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 44775b4..777660a 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -29,100 +29,15 @@ from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" - def _test_list_methods(self, builder): - """Run tests on the public methods of a list built with *builder*.""" - list1 = builder(range(5)) - list2 = builder(["foo"]) - list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) - - list1.append(5) - list1.append(1) - list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) - - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) - - list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) - self.assertRaises(ValueError, list1.index, 1, 3, 5) - - list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) - list1.insert(13, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals(6.5, list1.pop(-1)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - self.assertEquals("foo", list2.pop()) - self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) - - list1.remove(6) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5], list1) - self.assertRaises(ValueError, list1.remove, 1) - - list1.reverse() - self.assertEquals([5, 2, 4, 3, 2, 0], list1) - - list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5], list1) - list1.sort(reverse=True) - self.assertEquals([5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([0, 5, 4, 2, 2, 3], list1) - list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) - list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) - - def test_docs(self): - """make sure the methods of SmartList/_ListProxy have docstrings""" - methods = ["append", "count", "extend", "index", "insert", "pop", - "remove", "reverse", "sort"] - for meth in methods: - expected = getattr(list, meth).__doc__ - smartlist_doc = getattr(SmartList, meth).__doc__ - listproxy_doc = getattr(_ListProxy, meth).__doc__ - self.assertEquals(expected, smartlist_doc) - self.assertEquals(expected, listproxy_doc) - - def test_doctest(self): - """make sure the test embedded in SmartList's docstring passes""" - parent = SmartList([0, 1, 2, 3]) - self.assertEquals([0, 1, 2, 3], parent) - child = parent[2:] - self.assertEquals([2, 3], child) - child.append(4) - self.assertEquals([2, 3, 4], child) - self.assertEquals([0, 1, 2, 3, 4], parent) - - def test_parent_get_set_del(self): - """make sure SmartList's getitem/setitem/delitem work""" + def _test_get_set_del_item(self, builder): + """Run tests on __get/set/delitem__ of a list built with *builder*.""" def assign(L, s1, s2, s3, val): L[s1:s2:s3] = val def delete(L, s1): del L[s1] - list1 = SmartList([0, 1, 2, 3, "one", "two"]) - list2 = SmartList(list(range(10))) + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder(list(range(10))) self.assertEquals(1, list1[1]) self.assertEquals("one", list1[-2]) @@ -152,9 +67,11 @@ class TestSmartList(unittest.TestCase): list1[3] = 100 self.assertEquals(100, list1[3]) + list1[-3] = 101 + self.assertEquals([0, 1, 2, 101, "one", "two"], list1) list1[5:] = [6, 7, 8] self.assertEquals([6, 7, 8], list1[5:]) - self.assertEquals([0, 1, 2, 100, "one", 6, 7, 8], list1) + self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1) list1[2:4] = [-1, -2, -3, -4, -5] self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) list1[0:-3] = [99] @@ -185,10 +102,10 @@ class TestSmartList(unittest.TestCase): del list2[2:8:2] self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) - def test_parent_add(self): - """make sure SmartList's add/radd/iadd work""" - list1 = SmartList(range(5)) - list2 = SmartList(range(5, 10)) + def _test_add_radd_iadd(self, builder): + """Run tests on __r/i/add__ of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(range(5, 10)) self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) self.assertEquals([0, 1, 2, 3, 4], list1) self.assertEquals(list(range(10)), list1 + list2) @@ -197,12 +114,12 @@ class TestSmartList(unittest.TestCase): list1 += ["foo", "bar", "baz"] self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) - def test_parent_unaffected_magics(self): - """sanity checks against SmartList features that were not modified""" - list1 = SmartList([0, 1, 2, 3, "one", "two"]) - list2 = SmartList([]) - list3 = SmartList([0, 2, 3, 4]) - list4 = SmartList([0, 1, 2]) + def _test_other_magic_methods(self, builder): + """Run tests on other magic methods of a list built with *builder*.""" + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder([]) + list3 = builder([0, 2, 3, 4]) + list4 = builder([0, 1, 2]) if py3k: self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) @@ -284,47 +201,130 @@ class TestSmartList(unittest.TestCase): list4 *= 2 self.assertEquals([0, 1, 2, 0, 1, 2], list4) + def _test_list_methods(self, builder): + """Run tests on the public methods of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(["foo"]) + list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEquals(0, list1.count(6)) + self.assertEquals(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEquals(1, list1.index(1)) + self.assertEquals(6, list1.index(1, 3)) + self.assertEquals(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(13, 8) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEquals(8, list1.pop()) + self.assertEquals(7, list1.pop()) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEquals(-1, list1.pop(0)) + self.assertEquals(5, list1.pop(5)) + self.assertEquals(6.5, list1.pop(-1)) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEquals("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEquals([], list2) + + list1.remove(6) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEquals([5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEquals([0, 2, 2, 3, 4, 5], list1) + list1.sort(reverse=True) + self.assertEquals([5, 4, 3, 2, 2, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 + self.assertEquals([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) + self.assertEquals([0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + + def test_docs(self): + """make sure the methods of SmartList/_ListProxy have docstrings""" + methods = ["append", "count", "extend", "index", "insert", "pop", + "remove", "reverse", "sort"] + for meth in methods: + expected = getattr(list, meth).__doc__ + smartlist_doc = getattr(SmartList, meth).__doc__ + listproxy_doc = getattr(_ListProxy, meth).__doc__ + self.assertEquals(expected, smartlist_doc) + self.assertEquals(expected, listproxy_doc) + + def test_doctest(self): + """make sure the test embedded in SmartList's docstring passes""" + parent = SmartList([0, 1, 2, 3]) + self.assertEquals([0, 1, 2, 3], parent) + child = parent[2:] + self.assertEquals([2, 3], child) + child.append(4) + self.assertEquals([2, 3, 4], child) + self.assertEquals([0, 1, 2, 3, 4], parent) + + def test_parent_get_set_del(self): + """make sure SmartList's getitem/setitem/delitem work""" + self._test_get_set_del_item(lambda L: SmartList(L)) + + def test_parent_add(self): + """make sure SmartList's add/radd/iadd work""" + self._test_add_radd_iadd(lambda L: SmartList(L)) + + def test_parent_unaffected_magics(self): + """sanity checks against SmartList features that were not modified""" + self._test_other_magic_methods(lambda L: SmartList(L)) + def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" self._test_list_methods(lambda L: SmartList(L)) - def test_child_magics(self): - """make sure _ListProxy's magically implemented features work""" - pass - # if py3k: - # __str__ - # __bytes__ - # else: - # __unicode__ - # __str__ - # __repr__ - # __lt__ - # __le__ - # __eq__ - # __ne__ - # __gt__ - # __ge__ - # if py3k: - # __bool__ - # else: - # __nonzero__ - # __len__ - # __getitem__ - # __setitem__ - # __delitem__ - # __iter__ - # __reversed__ - # __contains__ - # if not py3k: - # __getslice__ - # __setslice__ - # __delslice__ - # __add__ - # __radd__ - # __iadd__ - # __mul__ - # __rmul__ - # __imul__ + def test_child_get_set_del(self): + """make sure _ListProxy's getitem/setitem/delitem work""" + self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) + self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) + # self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) + # builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + # self._test_get_set_del_item(builder) + + def test_child_add(self): + """make sure _ListProxy's add/radd/iadd work""" + self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) + self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) + self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_add_radd_iadd(builder) + + def test_child_other_magics(self): + """make sure _ListProxy's other magically implemented features work""" + self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) + self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_other_magic_methods(builder) def test_child_methods(self): """make sure _ListProxy's non-magic methods work, like append()""" From b298a68b37444ff2674ce7699e1bc85d610df547 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 16:54:01 -0400 Subject: [PATCH 41/67] Squash a bug dealing with extended slices. --- mwparserfromhell/smart_list.py | 11 ++++++----- tests/test_smart_list.py | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 062e9ad..46c475a 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -87,8 +87,9 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - key = slice(key.start or 0, maxsize if key.stop is None else key.stop) - diff = len(item) - key.stop + key.start + keystop = maxsize if key.stop is None else key.stop + key = slice(key.start or 0, keystop, key.step or 1) + diff = len(item) + (key.start - key.stop) / key.step values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): @@ -101,10 +102,10 @@ class SmartList(list): super(SmartList, self).__delitem__(key) if isinstance(key, slice): keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop) + key = slice(key.start or 0, keystop, key.step or 1) else: - key = slice(key, key + 1) - diff = key.stop - key.start + key = slice(key, key + 1, 1) + diff = (key.stop - key.start) / key.step values = self._children.values if py3k else self._children.itervalues for child, (start, stop, step) in values(): if start > key.start: diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 777660a..10e39ea 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -306,9 +306,9 @@ class TestSmartList(unittest.TestCase): """make sure _ListProxy's getitem/setitem/delitem work""" self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) - # self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) - # builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] - # self._test_get_set_del_item(builder) + self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_get_set_del_item(builder) def test_child_add(self): """make sure _ListProxy's add/radd/iadd work""" From b6284195d31543aca2a1d4e1742ce3f649217b14 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 17:08:18 -0400 Subject: [PATCH 42/67] Implement first part of test_influence(). --- tests/test_smart_list.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 10e39ea..b0a10cb 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -336,7 +336,54 @@ class TestSmartList(unittest.TestCase): def test_influence(self): """make sure changes are propagated from parents to children""" - pass + parent = SmartList([0, 1, 2, 3, 4, 5]) + child1 = parent[2:] + child2 = parent[2:5] + + parent.append(6) + child1.append(7) + child2.append(4.5) + self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEquals([2, 3, 4, 4.5], child2) + + parent.insert(0, -1) + parent.insert(4, 2.5) + parent.insert(10, 6.5) + self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEquals([2, 2.5, 3, 4, 4.5], child2) + + self.assertEquals(7, parent.pop()) + self.assertEquals(6.5, child1.pop()) + self.assertEquals(4.5, child2.pop()) + self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEquals([2, 2.5, 3, 4, 5, 6], child1) + self.assertEquals([2, 2.5, 3, 4], child2) + + parent.remove(-1) + child1.remove(2.5) + self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEquals([2, 3, 4, 5, 6], child1) + self.assertEquals([2, 3, 4], child2) + + self.assertEquals(0, parent.pop(0)) + self.assertEquals([1, 2, 3, 4, 5, 6], parent) + self.assertEquals([2, 3, 4, 5, 6], child1) + self.assertEquals([2, 3, 4], child2) + + child2.reverse() + self.assertEquals([1, 4, 3, 2, 5, 6], parent) + self.assertEquals([4, 3, 2, 5, 6], child1) + self.assertEquals([4, 3, 2], child2) + + parent.extend([7, 8]) + child1.extend([8.1, 8.2]) + child2.extend([1.9, 1.8]) + self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEquals([4, 3, 2, 1.9, 1.8], child2) + # also test whether children that exit scope are removed from parent's map if __name__ == "__main__": From 34b85a93cd425f3b9c1b2d91fa7d5625b284f171 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 17:33:29 -0400 Subject: [PATCH 43/67] Cosmetic change: standardize whitespace after class definition. --- mwparserfromhell/nodes/argument.py | 1 + mwparserfromhell/nodes/comment.py | 1 + mwparserfromhell/nodes/text.py | 1 + mwparserfromhell/nodes/wikilink.py | 1 + mwparserfromhell/string_mixin.py | 1 + tests/_test_tokenizer.py | 1 + tests/test_ctokenizer.py | 1 + tests/test_docs.py | 1 + tests/test_pytokenizer.py | 1 + tests/test_smart_list.py | 2 -- tests/test_string_mixin.py | 1 + 11 files changed, 10 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 06facb4..d7db92a 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -30,6 +30,7 @@ __all__ = ["Argument"] class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" + def __init__(self, name, default=None): super(Argument, self).__init__() self._name = name diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index b34c29e..e96ce38 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -29,6 +29,7 @@ __all__ = ["Comment"] class Comment(Node): """Represents a hidden HTML comment, like ````.""" + def __init__(self, contents): super(Comment, self).__init__() self._contents = contents diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 60ba847..6fda3da 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -29,6 +29,7 @@ __all__ = ["Text"] class Text(Node): """Represents ordinary, unformatted text with no special properties.""" + def __init__(self, value): super(Text, self).__init__() self._value = value diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index f880016..6fea468 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -30,6 +30,7 @@ __all__ = ["Wikilink"] class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): super(Wikilink, self).__init__() self._title = title diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index efd28d8..eee58b9 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -50,6 +50,7 @@ class StringMixIn(object): :py:meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ + if py3k: def __str__(self): return self.__unicode__() diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 4d12dc9..379b4fa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -38,6 +38,7 @@ class TokenizerTestCase(object): TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' directory. """ + @classmethod def _build_test_method(cls, funcname, data): """Create and return a method to be treated as a test case method. diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 07b5290..4dbeceb 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" + @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 8673cb9..075b0a7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -30,6 +30,7 @@ from mwparserfromhell.compat import py3k, str, StringIO class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" + def assertPrint(self, input, output): """Assertion check that *input*, when printed, produces *output*.""" buff = StringIO() diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index a2f2482..73e6fe7 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" + @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index b0a10cb..f6d22ae 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -384,7 +384,5 @@ class TestSmartList(unittest.TestCase): self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) self.assertEquals([4, 3, 2, 1.9, 1.8], child2) - # also test whether children that exit scope are removed from parent's map - if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 8d86c8e..7b99995 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -37,6 +37,7 @@ class _FakeString(StringMixIn): class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" + def test_docs(self): """make sure the various methods of StringMixIn have docstrings""" methods = [ From 6a741db7ce98239108f21004b2a9d2f99a63f90f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 18:25:03 -0400 Subject: [PATCH 44/67] Applying fb71f5507eca7bc73fae764549a7579889817cba --- mwparserfromhell/parser/__init__.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 074b9ba..1fb95b5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -26,16 +26,16 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module joins them together under one interface. """ +from .builder import Builder +from .tokenizer import Tokenizer try: - from ._builder import CBuilder as Builder + from ._tokenizer import CTokenizer + use_c = True except ImportError: - from .builder import Builder -try: - from ._tokenizer import CTokenizer as Tokenizer -except ImportError: - from .tokenizer import Tokenizer + CTokenizer = None + use_c = False -__all__ = ["Parser"] +__all__ = ["use_c", "Parser"] class Parser(object): """Represents a parser for wikicode. @@ -48,7 +48,10 @@ class Parser(object): def __init__(self, text): self.text = text - self._tokenizer = Tokenizer() + if use_c and CTokenizer: + self._tokenizer = CTokenizer() + else: + self._tokenizer = Tokenizer() self._builder = Builder() def parse(self): From 9e26264d6b8d462cd93bc4c475c91abfe6d3b501 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 19:13:32 -0400 Subject: [PATCH 45/67] Replace deprecated alias assertEquals() with assertEqual(). --- tests/test_smart_list.py | 244 ++++++++++++++++++++++----------------------- tests/test_string_mixin.py | 228 +++++++++++++++++++++--------------------- tests/test_tokens.py | 24 ++--- 3 files changed, 248 insertions(+), 248 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index f6d22ae..680de9d 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -39,80 +39,80 @@ class TestSmartList(unittest.TestCase): list1 = builder([0, 1, 2, 3, "one", "two"]) list2 = builder(list(range(10))) - self.assertEquals(1, list1[1]) - self.assertEquals("one", list1[-2]) - self.assertEquals([2, 3], list1[2:4]) + self.assertEqual(1, list1[1]) + self.assertEqual("one", list1[-2]) + self.assertEqual([2, 3], list1[2:4]) self.assertRaises(IndexError, lambda: list1[6]) self.assertRaises(IndexError, lambda: list1[-7]) - self.assertEquals([0, 1, 2], list1[:3]) - self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:]) - self.assertEquals([3, "one", "two"], list1[3:]) - self.assertEquals(["one", "two"], list1[-2:]) - self.assertEquals([0, 1], list1[:-4]) - self.assertEquals([], list1[6:]) - self.assertEquals([], list1[4:2]) - - self.assertEquals([0, 2, "one"], list1[0:5:2]) - self.assertEquals([0, 2], list1[0:-3:2]) - self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::]) - self.assertEquals([2, 3, "one", "two"], list1[2::]) - self.assertEquals([0, 1, 2, 3], list1[:4:]) - self.assertEquals([2, 3], list1[2:4:]) - self.assertEquals([0, 2, 4, 6, 8], list2[::2]) - self.assertEquals([2, 5, 8], list2[2::3]) - self.assertEquals([0, 3], list2[:6:3]) - self.assertEquals([2, 5, 8], list2[-8:9:3]) - self.assertEquals([], list2[100000:1000:-100]) + self.assertEqual([0, 1, 2], list1[:3]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEqual([3, "one", "two"], list1[3:]) + self.assertEqual(["one", "two"], list1[-2:]) + self.assertEqual([0, 1], list1[:-4]) + self.assertEqual([], list1[6:]) + self.assertEqual([], list1[4:2]) + + self.assertEqual([0, 2, "one"], list1[0:5:2]) + self.assertEqual([0, 2], list1[0:-3:2]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEqual([2, 3, "one", "two"], list1[2::]) + self.assertEqual([0, 1, 2, 3], list1[:4:]) + self.assertEqual([2, 3], list1[2:4:]) + self.assertEqual([0, 2, 4, 6, 8], list2[::2]) + self.assertEqual([2, 5, 8], list2[2::3]) + self.assertEqual([0, 3], list2[:6:3]) + self.assertEqual([2, 5, 8], list2[-8:9:3]) + self.assertEqual([], list2[100000:1000:-100]) list1[3] = 100 - self.assertEquals(100, list1[3]) + self.assertEqual(100, list1[3]) list1[-3] = 101 - self.assertEquals([0, 1, 2, 101, "one", "two"], list1) + self.assertEqual([0, 1, 2, 101, "one", "two"], list1) list1[5:] = [6, 7, 8] - self.assertEquals([6, 7, 8], list1[5:]) - self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1) + self.assertEqual([6, 7, 8], list1[5:]) + self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) list1[2:4] = [-1, -2, -3, -4, -5] - self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) list1[0:-3] = [99] - self.assertEquals([99, 6, 7, 8], list1) + self.assertEqual([99, 6, 7, 8], list1) list2[0:6:2] = [100, 102, 104] - self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) list2[::3] = [200, 203, 206, 209] - self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) list2[::] = range(7) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) self.assertRaises(ValueError, assign, list2, 0, 5, 2, [100, 102, 104, 106]) del list2[2] - self.assertEquals([0, 1, 3, 4, 5, 6], list2) + self.assertEqual([0, 1, 3, 4, 5, 6], list2) del list2[-3] - self.assertEquals([0, 1, 3, 5, 6], list2) + self.assertEqual([0, 1, 3, 5, 6], list2) self.assertRaises(IndexError, delete, list2, 100) self.assertRaises(IndexError, delete, list2, -6) list2[:] = range(10) del list2[3:6] - self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) + self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) del list2[-2:] - self.assertEquals([0, 1, 2, 6, 7], list2) + self.assertEqual([0, 1, 2, 6, 7], list2) del list2[:2] - self.assertEquals([2, 6, 7], list2) + self.assertEqual([2, 6, 7], list2) list2[:] = range(10) del list2[2:8:2] - self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) + self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) def _test_add_radd_iadd(self, builder): """Run tests on __r/i/add__ of a list built with *builder*.""" list1 = builder(range(5)) list2 = builder(range(5, 10)) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) - self.assertEquals([0, 1, 2, 3, 4], list1) - self.assertEquals(list(range(10)), list1 + list2) - self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) - self.assertEquals([0, 1, 2, 3, 4], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEqual([0, 1, 2, 3, 4], list1) + self.assertEqual(list(range(10)), list1 + list2) + self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEqual([0, 1, 2, 3, 4], list1) list1 += ["foo", "bar", "baz"] - self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) + self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) def _test_other_magic_methods(self, builder): """Run tests on other magic methods of a list built with *builder*.""" @@ -122,13 +122,13 @@ class TestSmartList(unittest.TestCase): list4 = builder([0, 1, 2]) if py3k: - self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) - self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) else: - self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) - self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) - self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) self.assertTrue(list1 < list3) self.assertTrue(list1 <= list3) @@ -164,42 +164,42 @@ class TestSmartList(unittest.TestCase): self.assertTrue(bool(list1)) self.assertFalse(bool(list2)) - self.assertEquals(6, len(list1)) - self.assertEquals(0, len(list2)) + self.assertEqual(6, len(list1)) + self.assertEqual(0, len(list2)) out = [] for obj in list1: out.append(obj) - self.assertEquals([0, 1, 2, 3, "one", "two"], out) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) out = [] for ch in list2: out.append(ch) - self.assertEquals([], out) + self.assertEqual([], out) gen1 = iter(list1) out = [] for i in range(len(list1)): out.append(gen1.next()) self.assertRaises(StopIteration, gen1.next) - self.assertEquals([0, 1, 2, 3, "one", "two"], out) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) self.assertRaises(StopIteration, gen2.next) - self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1))) - self.assertEquals([], list(reversed(list2))) + self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEqual([], list(reversed(list2))) self.assertTrue("one" in list1) self.assertTrue(3 in list1) self.assertFalse(10 in list1) self.assertFalse(0 in list2) - self.assertEquals([], list2 * 5) - self.assertEquals([], 5 * list2) - self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) - self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + self.assertEqual([], list2 * 5) + self.assertEqual([], 5 * list2) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) list4 *= 2 - self.assertEquals([0, 1, 2, 0, 1, 2], list4) + self.assertEqual([0, 1, 2, 0, 1, 2], list4) def _test_list_methods(self, builder): """Run tests on the public methods of a list built with *builder*.""" @@ -210,60 +210,60 @@ class TestSmartList(unittest.TestCase): list1.append(5) list1.append(1) list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) + self.assertEqual(0, list1.count(6)) + self.assertEqual(2, list1.count(1)) list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) + self.assertEqual(1, list1.index(1)) + self.assertEqual(6, list1.index(1, 3)) + self.assertEqual(6, list1.index(1, 3, 7)) self.assertRaises(ValueError, list1.index, 1, 3, 5) list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) list1.insert(13, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals(6.5, list1.pop(-1)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - self.assertEquals("foo", list2.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEqual(8, list1.pop()) + self.assertEqual(7, list1.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEqual(-1, list1.pop(0)) + self.assertEqual(5, list1.pop(5)) + self.assertEqual(6.5, list1.pop(-1)) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEqual("foo", list2.pop()) self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) + self.assertEqual([], list2) list1.remove(6) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertEqual([0, 2, 3, 4, 2, 5], list1) self.assertRaises(ValueError, list1.remove, 1) list1.reverse() - self.assertEquals([5, 2, 4, 3, 2, 0], list1) + self.assertEqual([5, 2, 4, 3, 2, 0], list1) list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5], list1) + self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) - self.assertEquals([5, 4, 3, 2, 2, 0], list1) + self.assertEqual([5, 4, 3, 2, 2, 0], list1) list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 0], list1) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([0, 5, 4, 2, 2, 3], list1) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) def test_docs(self): """make sure the methods of SmartList/_ListProxy have docstrings""" @@ -273,18 +273,18 @@ class TestSmartList(unittest.TestCase): expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ listproxy_doc = getattr(_ListProxy, meth).__doc__ - self.assertEquals(expected, smartlist_doc) - self.assertEquals(expected, listproxy_doc) + self.assertEqual(expected, smartlist_doc) + self.assertEqual(expected, listproxy_doc) def test_doctest(self): """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) - self.assertEquals([0, 1, 2, 3], parent) + self.assertEqual([0, 1, 2, 3], parent) child = parent[2:] - self.assertEquals([2, 3], child) + self.assertEqual([2, 3], child) child.append(4) - self.assertEquals([2, 3, 4], child) - self.assertEquals([0, 1, 2, 3, 4], parent) + self.assertEqual([2, 3, 4], child) + self.assertEqual([0, 1, 2, 3, 4], parent) def test_parent_get_set_del(self): """make sure SmartList's getitem/setitem/delitem work""" @@ -343,46 +343,46 @@ class TestSmartList(unittest.TestCase): parent.append(6) child1.append(7) child2.append(4.5) - self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) - self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1) - self.assertEquals([2, 3, 4, 4.5], child2) + self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEqual([2, 3, 4, 4.5], child2) parent.insert(0, -1) parent.insert(4, 2.5) parent.insert(10, 6.5) - self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) - self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) - self.assertEquals([2, 2.5, 3, 4, 4.5], child2) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEqual([2, 2.5, 3, 4, 4.5], child2) - self.assertEquals(7, parent.pop()) - self.assertEquals(6.5, child1.pop()) - self.assertEquals(4.5, child2.pop()) - self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) - self.assertEquals([2, 2.5, 3, 4, 5, 6], child1) - self.assertEquals([2, 2.5, 3, 4], child2) + self.assertEqual(7, parent.pop()) + self.assertEqual(6.5, child1.pop()) + self.assertEqual(4.5, child2.pop()) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) + self.assertEqual([2, 2.5, 3, 4], child2) parent.remove(-1) child1.remove(2.5) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent) - self.assertEquals([2, 3, 4, 5, 6], child1) - self.assertEquals([2, 3, 4], child2) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) - self.assertEquals(0, parent.pop(0)) - self.assertEquals([1, 2, 3, 4, 5, 6], parent) - self.assertEquals([2, 3, 4, 5, 6], child1) - self.assertEquals([2, 3, 4], child2) + self.assertEqual(0, parent.pop(0)) + self.assertEqual([1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) child2.reverse() - self.assertEquals([1, 4, 3, 2, 5, 6], parent) - self.assertEquals([4, 3, 2, 5, 6], child1) - self.assertEquals([4, 3, 2], child2) + self.assertEqual([1, 4, 3, 2, 5, 6], parent) + self.assertEqual([4, 3, 2, 5, 6], child1) + self.assertEqual([4, 3, 2], child2) parent.extend([7, 8]) child1.extend([8.1, 8.2]) child2.extend([1.9, 1.8]) - self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) - self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) - self.assertEquals([4, 3, 2, 1.9, 1.8], child2) + self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEqual([4, 3, 2, 1.9, 1.8], child2) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 7b99995..6ef6344 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -56,17 +56,17 @@ class TestStringMixIn(unittest.TestCase): for meth in methods: expected = getattr(str, meth).__doc__ actual = getattr(StringMixIn, meth).__doc__ - self.assertEquals(expected, actual) + self.assertEqual(expected, actual) def test_types(self): """make sure StringMixIns convert to different types correctly""" fstr = _FakeString("fake string") - self.assertEquals(str(fstr), "fake string") - self.assertEquals(bytes(fstr), b"fake string") + self.assertEqual(str(fstr), "fake string") + self.assertEqual(bytes(fstr), b"fake string") if py3k: - self.assertEquals(repr(fstr), "'fake string'") + self.assertEqual(repr(fstr), "'fake string'") else: - self.assertEquals(repr(fstr), b"u'fake string'") + self.assertEqual(repr(fstr), b"u'fake string'") self.assertIsInstance(str(fstr), str) self.assertIsInstance(bytes(fstr), bytes) @@ -119,18 +119,18 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str1) self.assertFalse(str2) - self.assertEquals(11, len(str1)) - self.assertEquals(0, len(str2)) + self.assertEqual(11, len(str1)) + self.assertEqual(0, len(str2)) out = [] for ch in str1: out.append(ch) - self.assertEquals(expected, out) + self.assertEqual(expected, out) out = [] for ch in str2: out.append(ch) - self.assertEquals([], out) + self.assertEqual([], out) gen1 = iter(str1) gen2 = iter(str2) @@ -141,16 +141,16 @@ class TestStringMixIn(unittest.TestCase): for i in range(len(str1)): out.append(gen1.next()) self.assertRaises(StopIteration, gen1.next) - self.assertEquals(expected, out) + self.assertEqual(expected, out) self.assertRaises(StopIteration, gen2.next) - self.assertEquals("gnirts ekaf", "".join(list(reversed(str1)))) - self.assertEquals([], list(reversed(str2))) + self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEqual([], list(reversed(str2))) - self.assertEquals("f", str1[0]) - self.assertEquals(" ", str1[4]) - self.assertEquals("g", str1[10]) - self.assertEquals("n", str1[-2]) + self.assertEqual("f", str1[0]) + self.assertEqual(" ", str1[4]) + self.assertEqual("g", str1[10]) + self.assertEqual("n", str1[-2]) self.assertRaises(IndexError, lambda: str1[11]) self.assertRaises(IndexError, lambda: str2[0]) @@ -165,75 +165,75 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" str1 = _FakeString("fake string") - self.assertEquals("Fake string", str1.capitalize()) + self.assertEqual("Fake string", str1.capitalize()) - self.assertEquals(" fake string ", str1.center(15)) - self.assertEquals(" fake string ", str1.center(16)) - self.assertEquals("qqfake stringqq", str1.center(15, "q")) + self.assertEqual(" fake string ", str1.center(15)) + self.assertEqual(" fake string ", str1.center(16)) + self.assertEqual("qqfake stringqq", str1.center(15, "q")) - self.assertEquals(1, str1.count("e")) - self.assertEquals(0, str1.count("z")) - self.assertEquals(1, str1.count("r", 7)) - self.assertEquals(0, str1.count("r", 8)) - self.assertEquals(1, str1.count("r", 5, 9)) - self.assertEquals(0, str1.count("r", 5, 7)) + self.assertEqual(1, str1.count("e")) + self.assertEqual(0, str1.count("z")) + self.assertEqual(1, str1.count("r", 7)) + self.assertEqual(0, str1.count("r", 8)) + self.assertEqual(1, str1.count("r", 5, 9)) + self.assertEqual(0, str1.count("r", 5, 7)) if not py3k: str2 = _FakeString("fo") - self.assertEquals(str1, str1.decode()) + self.assertEqual(str1, str1.decode()) actual = _FakeString("\\U00010332\\U0001033f\\U00010344") - self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) self.assertRaises(UnicodeError, str2.decode, "punycode") - self.assertEquals("", str2.decode("punycode", "ignore")) + self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("𐌲𐌿𐍄") - self.assertEquals(b"fake string", str1.encode()) - self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + self.assertEqual(b"fake string", str1.encode()) + self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode("utf8")) - self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode(encoding="utf8")) self.assertRaises(UnicodeEncodeError, str3.encode) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEquals("", str3.encode("ascii", "ignore")) - self.assertEquals("", str3.encode(errors="ignore")) + self.assertEqual("", str3.encode("ascii", "ignore")) + self.assertEqual("", str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) str4 = _FakeString("\tfoobar") - self.assertEquals("fake string", str1) - self.assertEquals(" foobar", str4.expandtabs()) - self.assertEquals(" foobar", str4.expandtabs(4)) + self.assertEqual("fake string", str1) + self.assertEqual(" foobar", str4.expandtabs()) + self.assertEqual(" foobar", str4.expandtabs(4)) - self.assertEquals(3, str1.find("e")) - self.assertEquals(-1, str1.find("z")) - self.assertEquals(7, str1.find("r", 7)) - self.assertEquals(-1, str1.find("r", 8)) - self.assertEquals(7, str1.find("r", 5, 9)) - self.assertEquals(-1, str1.find("r", 5, 7)) + self.assertEqual(3, str1.find("e")) + self.assertEqual(-1, str1.find("z")) + self.assertEqual(7, str1.find("r", 7)) + self.assertEqual(-1, str1.find("r", 8)) + self.assertEqual(7, str1.find("r", 5, 9)) + self.assertEqual(-1, str1.find("r", 5, 7)) str5 = _FakeString("foo{0}baz") str6 = _FakeString("foo{abc}baz") str7 = _FakeString("foo{0}{abc}buzz") str8 = _FakeString("{0}{1}") - self.assertEquals("fake string", str1.format()) - self.assertEquals("foobarbaz", str5.format("bar")) - self.assertEquals("foobarbaz", str6.format(abc="bar")) - self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertEqual("fake string", str1.format()) + self.assertEqual("foobarbaz", str5.format("bar")) + self.assertEqual("foobarbaz", str6.format(abc="bar")) + self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") if py3k: - self.assertEquals("fake string", str1.format_map({})) - self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) self.assertRaises(ValueError, str5.format_map, {0: "abc"}) - self.assertEquals(3, str1.index("e")) + self.assertEqual(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") - self.assertEquals(7, str1.index("r", 7)) + self.assertEqual(7, str1.index("r", 7)) self.assertRaises(ValueError, str1.index, "r", 8) - self.assertEquals(7, str1.index("r", 5, 9)) + self.assertEqual(7, str1.index("r", 5, 9)) self.assertRaises(ValueError, str1.index, "r", 5, 7) str9 = _FakeString("foobar") @@ -303,120 +303,120 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str15.isupper()) self.assertTrue(str21.isupper()) - self.assertEquals("foobar", str15.join(["foo", "bar"])) - self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + self.assertEqual("foobar", str15.join(["foo", "bar"])) + self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) - self.assertEquals("fake string ", str1.ljust(15)) - self.assertEquals("fake string ", str1.ljust(16)) - self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + self.assertEqual("fake string ", str1.ljust(15)) + self.assertEqual("fake string ", str1.ljust(16)) + self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) str22 = _FakeString("ß") - self.assertEquals("", str15.lower()) - self.assertEquals("foobar", str16.lower()) - self.assertEquals("ß", str22.lower()) + self.assertEqual("", str15.lower()) + self.assertEqual("foobar", str16.lower()) + self.assertEqual("ß", str22.lower()) if py3k: - self.assertEquals("", str15.casefold()) - self.assertEquals("foobar", str16.casefold()) - self.assertEquals("ss", str22.casefold()) + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) str23 = _FakeString(" fake string ") - self.assertEquals("fake string", str1.lstrip()) - self.assertEquals("fake string ", str23.lstrip()) - self.assertEquals("ke string", str1.lstrip("abcdef")) + self.assertEqual("fake string", str1.lstrip()) + self.assertEqual("fake string ", str23.lstrip()) + self.assertEqual("ke string", str1.lstrip("abcdef")) - self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) - self.assertEquals(("fake string", "", ""), str1.partition("asdf")) + self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) + self.assertEqual(("fake string", "", ""), str1.partition("asdf")) str24 = _FakeString("boo foo moo") - self.assertEquals("real string", str1.replace("fake", "real")) - self.assertEquals("bu fu moo", str24.replace("oo", "u", 2)) + self.assertEqual("real string", str1.replace("fake", "real")) + self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) - self.assertEquals(3, str1.rfind("e")) - self.assertEquals(-1, str1.rfind("z")) - self.assertEquals(7, str1.rfind("r", 7)) - self.assertEquals(-1, str1.rfind("r", 8)) - self.assertEquals(7, str1.rfind("r", 5, 9)) - self.assertEquals(-1, str1.rfind("r", 5, 7)) + self.assertEqual(3, str1.rfind("e")) + self.assertEqual(-1, str1.rfind("z")) + self.assertEqual(7, str1.rfind("r", 7)) + self.assertEqual(-1, str1.rfind("r", 8)) + self.assertEqual(7, str1.rfind("r", 5, 9)) + self.assertEqual(-1, str1.rfind("r", 5, 7)) - self.assertEquals(3, str1.rindex("e")) + self.assertEqual(3, str1.rindex("e")) self.assertRaises(ValueError, str1.rindex, "z") - self.assertEquals(7, str1.rindex("r", 7)) + self.assertEqual(7, str1.rindex("r", 7)) self.assertRaises(ValueError, str1.rindex, "r", 8) - self.assertEquals(7, str1.rindex("r", 5, 9)) + self.assertEqual(7, str1.rindex("r", 5, 9)) self.assertRaises(ValueError, str1.rindex, "r", 5, 7) - self.assertEquals(" fake string", str1.rjust(15)) - self.assertEquals(" fake string", str1.rjust(16)) - self.assertEquals("qqqqfake string", str1.rjust(15, "q")) + self.assertEqual(" fake string", str1.rjust(15)) + self.assertEqual(" fake string", str1.rjust(16)) + self.assertEqual("qqqqfake string", str1.rjust(15, "q")) - self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) - self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) + self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) str25 = _FakeString(" this is a sentence with whitespace ") actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.rsplit()) - self.assertEquals(actual, str25.rsplit(None)) + self.assertEqual(actual, str25.rsplit()) + self.assertEqual(actual, str25.rsplit(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str25.rsplit(" ")) + self.assertEqual(actual, str25.rsplit(" ")) actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.rsplit(None, 3)) + self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] - self.assertEquals(actual, str25.rsplit(" ", 3)) + self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: - self.assertEquals(actual, str25.rsplit(maxsplit=3)) + self.assertEqual(actual, str25.rsplit(maxsplit=3)) - self.assertEquals("fake string", str1.rstrip()) - self.assertEquals(" fake string", str23.rstrip()) - self.assertEquals("fake stri", str1.rstrip("ngr")) + self.assertEqual("fake string", str1.rstrip()) + self.assertEqual(" fake string", str23.rstrip()) + self.assertEqual("fake stri", str1.rstrip("ngr")) actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.split()) - self.assertEquals(actual, str25.split(None)) + self.assertEqual(actual, str25.split()) + self.assertEqual(actual, str25.split(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str25.split(" ")) + self.assertEqual(actual, str25.split(" ")) actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEquals(actual, str25.split(None, 3)) + self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] - self.assertEquals(actual, str25.split(" ", 3)) + self.assertEqual(actual, str25.split(" ", 3)) if py3k: - self.assertEquals(actual, str25.split(maxsplit=3)) + self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") - self.assertEquals(["lines", "of", "text", "are", "presented", "here"], + self.assertEqual(["lines", "of", "text", "are", "presented", "here"], str26.splitlines()) - self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", + self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"], str26.splitlines(True)) self.assertTrue(str1.startswith("fake")) self.assertFalse(str1.startswith("faker")) - self.assertEquals("fake string", str1.strip()) - self.assertEquals("fake string", str23.strip()) - self.assertEquals("ke stri", str1.strip("abcdefngr")) + self.assertEqual("fake string", str1.strip()) + self.assertEqual("fake string", str23.strip()) + self.assertEqual("ke stri", str1.strip("abcdefngr")) - self.assertEquals("fOObAR", str16.swapcase()) + self.assertEqual("fOObAR", str16.swapcase()) - self.assertEquals("Fake String", str1.title()) + self.assertEqual("Fake String", str1.title()) if py3k: table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) table2 = str.maketrans("aeiou", "12345") table3 = str.maketrans("aeiou", "12345", "rts") - self.assertEquals("f1k2 str3ng", str1.translate(table1)) - self.assertEquals("f1k2 str3ng", str1.translate(table2)) - self.assertEquals("f1k2 3ng", str1.translate(table3)) + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) else: table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} - self.assertEquals("f1k2 str3ng", str1.translate(table)) + self.assertEqual("f1k2 str3ng", str1.translate(table)) - self.assertEquals("", str15.upper()) - self.assertEquals("FOOBAR", str16.upper()) + self.assertEqual("", str15.upper()) + self.assertEqual("FOOBAR", str16.upper()) - self.assertEquals("123", str12.zfill(3)) - self.assertEquals("000123", str12.zfill(6)) + self.assertEqual("123", str12.zfill(3)) + self.assertEqual("000123", str12.zfill(6)) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 5a18b8e..1449ad2 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -42,8 +42,8 @@ class TestTokens(unittest.TestCase): token1 = tokens.Token() token2 = tokens.Token(foo="bar", baz=123) - self.assertEquals("bar", token2.foo) - self.assertEquals(123, token2.baz) + self.assertEqual("bar", token2.foo) + self.assertEqual(123, token2.baz) self.assertRaises(KeyError, lambda: token1.foo) self.assertRaises(KeyError, lambda: token2.bar) @@ -51,8 +51,8 @@ class TestTokens(unittest.TestCase): token2.foo = "ham" del token2.baz - self.assertEquals("eggs", token1.spam) - self.assertEquals("ham", token2.foo) + self.assertEqual("eggs", token1.spam) + self.assertEqual("ham", token2.foo) self.assertRaises(KeyError, lambda: token2.baz) self.assertRaises(KeyError, delattr, token2, "baz") @@ -63,15 +63,15 @@ class TestTokens(unittest.TestCase): token3 = tokens.Text(text="earwig" * 100) hundredchars = ("earwig" * 100)[:97] + "..." - self.assertEquals("Token()", repr(token1)) + self.assertEqual("Token()", repr(token1)) if py3k: token2repr = "Token(foo='bar', baz=123)" token3repr = "Text(text='" + hundredchars + "')" else: token2repr = "Token(foo=u'bar', baz=123)" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEquals(token2repr, repr(token2)) - self.assertEquals(token3repr, repr(token3)) + self.assertEqual(token2repr, repr(token2)) + self.assertEqual(token3repr, repr(token3)) def test_equality(self): """check that equivalent tokens are considered equal""" @@ -82,10 +82,10 @@ class TestTokens(unittest.TestCase): token5 = tokens.Text(text="asdf") token6 = tokens.TemplateOpen(text="asdf") - self.assertEquals(token1, token2) - self.assertEquals(token2, token1) - self.assertEquals(token4, token5) - self.assertEquals(token5, token4) + self.assertEqual(token1, token2) + self.assertEqual(token2, token1) + self.assertEqual(token4, token5) + self.assertEqual(token5, token4) self.assertNotEquals(token1, token3) self.assertNotEquals(token2, token3) self.assertNotEquals(token4, token6) @@ -99,7 +99,7 @@ class TestTokens(unittest.TestCase): tokens.Text(text="earwig") ] for token in tests: - self.assertEquals(token, eval(repr(token), vars(tokens))) + self.assertEqual(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": unittest.main(verbosity=2) From 97a837c1e8d8fbaae71360f442f53ca7bd81a58f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 01:36:02 -0400 Subject: [PATCH 46/67] Implement test_parser(). Clean up a few lambdas in TestSmartList. --- tests/test_parser.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++- tests/test_smart_list.py | 8 +++---- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5ea2b49..6e775ce 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,8 +23,68 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import range +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.parser import Parser +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + class TestParser(unittest.TestCase): - pass + """Tests for the Parser class itself, which tokenizes and builds nodes.""" + + def assertNodesEqual(self, expected, actual): + """Assert that two Nodes are the same type and have the same data.""" + self.assertIs(type(expected), type(actual)) + if isinstance(expected, Text): + self.assertEqual(expected.value, actual.value) + elif isinstance(expected, Template): + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + elif isinstance(expected, Wikilink): + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodesEqual(expected.get(i), actual.get(i)) + + def test_parser(self): + """integration test for parsing overall""" + text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" + wrap = lambda L: Wikicode(SmartList(L)) + expected = wrap([ + Text("this is text; "), + Template(wrap([Text("this")]), [ + Parameter(wrap([Text("is")]), wrap([Text("a")])), + Parameter(wrap([Text("template")]), wrap([ + Template(wrap([Text("with")]), [ + Parameter(wrap([Text("1")]), + wrap([Wikilink(wrap([Text("links")]))]), + showkey=False), + Parameter(wrap([Text("2")]), + wrap([Text("in")]), showkey=False) + ]), + Text("it") + ])) + ]) + ]) + actual = Parser(text).parse() + self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 680de9d..d821ccd 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -288,19 +288,19 @@ class TestSmartList(unittest.TestCase): def test_parent_get_set_del(self): """make sure SmartList's getitem/setitem/delitem work""" - self._test_get_set_del_item(lambda L: SmartList(L)) + self._test_get_set_del_item(SmartList) def test_parent_add(self): """make sure SmartList's add/radd/iadd work""" - self._test_add_radd_iadd(lambda L: SmartList(L)) + self._test_add_radd_iadd(SmartList) def test_parent_unaffected_magics(self): """sanity checks against SmartList features that were not modified""" - self._test_other_magic_methods(lambda L: SmartList(L)) + self._test_other_magic_methods(SmartList) def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" - self._test_list_methods(lambda L: SmartList(L)) + self._test_list_methods(SmartList) def test_child_get_set_del(self): """make sure _ListProxy's getitem/setitem/delitem work""" From f8032695146f032108c1b736631f546712689372 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:19:08 -0400 Subject: [PATCH 47/67] Add a USES_C field to the tokenizers; add TestParser.test_use_c() --- mwparserfromhell/parser/tokenizer.c | 2 ++ mwparserfromhell/parser/tokenizer.py | 1 + tests/test_parser.py | 13 ++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 8c96500..d3abb22 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1387,6 +1387,8 @@ init_tokenizer(void) module = Py_InitModule("_tokenizer", module_methods); Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); + Py_INCREF(Py_True); + PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); tempmod = PyImport_ImportModule("htmlentitydefs"); if (!tempmod) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 67638ca..0bf0322 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -38,6 +38,7 @@ class BadRoute(Exception): class Tokenizer(object): """Creates a list of tokens from a string of wikicode.""" + USES_C = False START = object() END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", diff --git a/tests/test_parser.py b/tests/test_parser.py index 6e775ce..4f718c8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,10 +23,10 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell import parser from mwparserfromhell.compat import range from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.parser import Parser from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode @@ -63,7 +63,14 @@ class TestParser(unittest.TestCase): for i in range(length): self.assertNodesEqual(expected.get(i), actual.get(i)) - def test_parser(self): + def test_use_c(self): + """make sure the correct tokenizer is used""" + if parser.use_c: + self.assertTrue(parser.Parser(None)._tokenizer.USES_C) + parser.use_c = False + self.assertFalse(parser.Parser(None)._tokenizer.USES_C) + + def test_parsing(self): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" wrap = lambda L: Wikicode(SmartList(L)) @@ -83,7 +90,7 @@ class TestParser(unittest.TestCase): ])) ]) ]) - actual = Parser(text).parse() + actual = parser.Parser(text).parse() self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": From 27a3503aa113c12971fab6a1d8fd676180b70449 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:22:37 -0400 Subject: [PATCH 48/67] Add test_uses_c() to TestPyTokenizer and TestCTokenizer --- tests/test_ctokenizer.py | 8 +++++++- tests/test_pytokenizer.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 4dbeceb..7ef8975 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,6 +23,8 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser._tokenizer import CTokenizer + from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @@ -30,8 +32,12 @@ class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): - from mwparserfromhell.parser._tokenizer import CTokenizer cls.tokenizer = CTokenizer + def test_uses_c(self): + """make sure the C tokenizer identifies as using a C extension""" + self.assertTrue(CTokenizer.USES_C) + self.assertTrue(CTokenizer().USES_C) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 73e6fe7..3e598bf 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -23,6 +23,8 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser.tokenizer import Tokenizer + from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @@ -30,8 +32,12 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): - from mwparserfromhell.parser.tokenizer import Tokenizer cls.tokenizer = Tokenizer + def test_uses_c(self): + """make sure the Python tokenizer identifies as not using C""" + self.assertFalse(Tokenizer.USES_C) + self.assertFalse(Tokenizer().USES_C) + if __name__ == "__main__": unittest.main(verbosity=2) From 5ca6f6c755bb8b3d3a3190bab4cf6f0a1eb6b2a7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:40:39 -0400 Subject: [PATCH 49/67] Skip test_readme_5() if web query fails. --- tests/test_docs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_docs.py b/tests/test_docs.py index 075b0a7..971c5d1 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -113,7 +113,10 @@ class TestDocs(unittest.TestCase): title = "Test" data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + try: + raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] expected = urllib.urlopen(url2.format(title)).read().decode("utf8") From 7f87a1c4b371f813d5006b25cf39f2b40b4dc58e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 19:39:12 -0400 Subject: [PATCH 50/67] Apply bugfixes so that some tests pass on Python 3. - Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal() --- mwparserfromhell/compat.py | 69 +++++++++++++++++++++------------------- mwparserfromhell/string_mixin.py | 2 +- tests/test_ctokenizer.py | 6 +++- tests/test_docs.py | 14 ++++---- tests/test_smart_list.py | 6 ++-- tests/test_string_mixin.py | 33 ++++++++++++------- tests/test_tokens.py | 17 ++++++---- 7 files changed, 85 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 48b9807..34870e6 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -1,33 +1,36 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :py:class:`str` is set to -:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, -:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These -types are meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = sys.version_info[0] == 3 - -if py3k: - bytes = bytes - str = str - basestring = str - range = range - maxsize = sys.maxsize - import html.entities as htmlentities - from io import StringIO - -else: - bytes = str - str = unicode - basestring = basestring - range = xrange - maxsize = sys.maxint - import htmlentitydefs as htmlentities - from StringIO import StringIO - -del sys +# -*- coding: utf-8 -*- + +""" +Implements support for both Python 2 and Python 3 by defining common types in +terms of their Python 2/3 variants. For example, :py:class:`str` is set to +:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, +:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These +types are meant to be imported directly from within the parser's modules. +""" + +import sys + +py3k = sys.version_info[0] == 3 + +if py3k: + bytes = bytes + str = str + basestring = str + range = range + maxsize = sys.maxsize + import html.entities as htmlentities + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + bytes = str + str = unicode + basestring = basestring + range = xrange + maxsize = sys.maxint + import htmlentitydefs as htmlentities + from StringIO import StringIO + from urllib import urlencode, urlopen + +del sys diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index eee58b9..6bee9c4 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -252,8 +252,8 @@ class StringMixIn(object): return self.__unicode__().lstrip(chars) if py3k: - @inheritdoc @staticmethod + @inheritdoc def maketrans(self, x, y=None, z=None): if z is None: if y is None: diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7ef8975..f21378c 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,10 +23,14 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.parser._tokenizer import CTokenizer +try: + from mwparserfromhell.parser._tokenizer import CTokenizer +except ImportError: + CTokenizer = None from _test_tokenizer import TokenizerTestCase +@unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" diff --git a/tests/test_docs.py b/tests/test_docs.py index 971c5d1..3b23bb7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -23,10 +23,9 @@ from __future__ import print_function, unicode_literals import json import unittest -import urllib import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO +from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" @@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase): data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} try: - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + raw = urlopen(url1, urlencode(data).encode("utf8")).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + res = json.loads(raw.decode("utf8")) + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + try: + expected = urlopen(url2.format(title)).read().decode("utf8") except IOError: self.skipTest("cannot continue because of unsuccessful web call") - res = json.loads(raw) - text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - expected = urllib.urlopen(url2.format(title)).read().decode("utf8") actual = mwparserfromhell.parse(text) self.assertEqual(expected, actual) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index d821ccd..01caca7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase): gen1 = iter(list1) out = [] for i in range(len(list1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) self.assertEqual([], list(reversed(list2))) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6ef6344..6d10609 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals +from sys import getdefaultencoding from types import GeneratorType import unittest @@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase): out = [] for i in range(len(str1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual(expected, out) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) self.assertEqual([], list(reversed(str2))) @@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("𐌲𐌿𐍄") + actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" self.assertEqual(b"fake string", str1.encode()) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode("utf8")) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode(encoding="utf8")) - self.assertRaises(UnicodeEncodeError, str3.encode) + self.assertEqual(actual, str3.encode("utf-8")) + self.assertEqual(actual, str3.encode(encoding="utf-8")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode()) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") - self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEqual("", str3.encode("ascii", "ignore")) - self.assertEqual("", str3.encode(errors="ignore")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="strict")) + self.assertEqual(b"", str3.encode("ascii", "ignore")) + if getdefaultencoding() == "ascii": + self.assertEqual(b"", str3.encode(errors="ignore")) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) @@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase): actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: + actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual("fake string", str1.rstrip()) @@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase): actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) if py3k: + actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 1449ad2..4620982 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase): self.assertEqual("Token()", repr(token1)) if py3k: - token2repr = "Token(foo='bar', baz=123)" + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" token3repr = "Text(text='" + hundredchars + "')" else: - token2repr = "Token(foo=u'bar', baz=123)" + token2repr1 = "Token(foo=u'bar', baz=123)" + token2repr2 = "Token(baz=123, foo=u'bar')" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEqual(token2repr, repr(token2)) + token2repr = repr(token2) + self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) self.assertEqual(token3repr, repr(token3)) def test_equality(self): @@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase): self.assertEqual(token2, token1) self.assertEqual(token4, token5) self.assertEqual(token5, token4) - self.assertNotEquals(token1, token3) - self.assertNotEquals(token2, token3) - self.assertNotEquals(token4, token6) - self.assertNotEquals(token5, token6) + self.assertNotEqual(token1, token3) + self.assertNotEqual(token2, token3) + self.assertNotEqual(token4, token6) + self.assertNotEqual(token5, token6) def test_repr_equality(self): "check that eval(repr(token)) == token" From 32ac6958e1618e9025486212dac412346126bccd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 20:59:23 -0400 Subject: [PATCH 51/67] Apply some bugfixes to SmartList to fix tests on Python 3. - Add a _SliceNormalizerMixIn to properly handle slices. - Use floor division when applying key.step. - Implement sort() without 'cmp' parameter. - Fix bytes(list) behavior. - Children of _ListProxies are now _ListProxies, not regular lists. --- mwparserfromhell/smart_list.py | 137 +++++++++++++++++++++++++++-------------- tests/test_smart_list.py | 12 ++-- 2 files changed, 99 insertions(+), 50 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 46c475a..09b7bbb 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -41,8 +41,23 @@ def inheritdoc(method): method.__doc__ = getattr(list, method.__name__).__doc__ return method +class _SliceNormalizerMixIn(object): + """MixIn that provides a private method to normalize slices.""" -class SmartList(list): + def _normalize_slice(self, key): + """Return a slice equivalent to the input *key*, standardized.""" + if key.start is not None: + start = (len(self) + key.start) if key.start < 0 else key.start + else: + start = 0 + if key.stop is not None: + stop = (len(self) + key.stop) if key.stop < 0 else key.stop + else: + stop = maxsize + return slice(start, stop, key.step or 1) + + +class SmartList(_SliceNormalizerMixIn, list): """Implements the ``list`` interface with special handling of sublists. When a sublist is created (by ``list[i:j]``), any changes made to this @@ -76,8 +91,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - keystop = maxsize if key.stop is None else key.stop - sliceinfo = [key.start or 0, keystop, key.step or 1] + key = self._normalize_slice(key) + sliceinfo = [key.start, key.stop, key.step] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -87,9 +102,8 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop, key.step or 1) - diff = len(item) + (key.start - key.stop) / key.step + key = self._normalize_slice(key) + diff = len(item) + (key.start - key.stop) // key.step values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): @@ -101,11 +115,10 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop, key.step or 1) + key = self._normalize_slice(key) else: key = slice(key, key + 1, 1) - diff = (key.stop - key.start) / key.step + diff = (key.stop - key.start) // key.step values = self._children.values if py3k else self._children.itervalues for child, (start, stop, step) in values(): if start > key.start: @@ -166,22 +179,35 @@ class SmartList(list): child._parent = copy super(SmartList, self).reverse() - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - copy = list(self) - for child in self._children: - child._parent = copy - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - - -class _ListProxy(list): + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + + +class _ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :py:class:`~.SmartList` object when slicing. It does @@ -235,19 +261,28 @@ class _ListProxy(list): return bool(self._render()) def __len__(self): - return (self._stop - self._start) / self._step + return (self._stop - self._start) // self._step def __getitem__(self, key): - return self._render()[key] + if isinstance(key, slice): + key = self._normalize_slice(key) + if key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(key.start + self._start, keystop, key.step) + return self._parent[adjusted] + else: + return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): - keystart = (key.start or 0) + self._start - if key.stop is None or key.stop == maxsize: + key = self._normalize_slice(key) + if key.stop == maxsize: keystop = self._stop else: keystop = key.stop + self._start - adjusted = slice(keystart, keystop, key.step) + adjusted = slice(key.start + self._start, keystop, key.step) self._parent[adjusted] = item else: length = len(self) @@ -259,12 +294,12 @@ class _ListProxy(list): def __delitem__(self, key): if isinstance(key, slice): - keystart = (key.start or 0) + self._start - if key.stop is None or key.stop == maxsize: + key = self._normalize_slice(key) + if key.stop == maxsize: keystop = self._stop else: keystop = key.stop + self._start - adjusted = slice(keystart, keystop, key.step) + adjusted = slice(key.start + self._start, keystop, key.step) del self._parent[adjusted] else: length = len(self) @@ -388,18 +423,30 @@ class _ListProxy(list): item.reverse() self._parent[self._start:self._stop:self._step] = item - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + item = self._render() + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item del inheritdoc diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 01caca7..3423bb7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -123,7 +123,7 @@ class TestSmartList(unittest.TestCase): if py3k: self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) else: self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) @@ -256,10 +256,12 @@ class TestSmartList(unittest.TestCase): self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) self.assertEqual([5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEqual([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEqual([0, 5, 4, 2, 2, 3], list1) + if not py3k: + func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 + list1.sort(cmp=func) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=func, reverse=True) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) From eae6f11add071401c95e89c5f8ea42be2d0c96aa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 21:24:45 -0400 Subject: [PATCH 52/67] Make _test_tokenizer import relative so tests work on py3k. --- tests/test_ctokenizer.py | 2 +- tests/test_pytokenizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index f21378c..7a082e8 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -28,7 +28,7 @@ try: except ImportError: CTokenizer = None -from _test_tokenizer import TokenizerTestCase +from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 3e598bf..697c7e5 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -25,7 +25,7 @@ import unittest from mwparserfromhell.parser.tokenizer import Tokenizer -from _test_tokenizer import TokenizerTestCase +from ._test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" From 1b69b5e882944abf0909816d2daed76c37cbe9c8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2013 16:46:39 -0400 Subject: [PATCH 53/67] Moving compat stuff exclusively for unit tests to its own file. --- mwparserfromhell/compat.py | 7 ------- tests/compat.py | 20 ++++++++++++++++++++ tests/test_docs.py | 4 +++- tests/test_parser.py | 3 ++- tests/test_smart_list.py | 4 +++- tests/test_string_mixin.py | 4 +++- 6 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 tests/compat.py diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 34870e6..bb81513 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,21 +16,14 @@ if py3k: bytes = bytes str = str basestring = str - range = range maxsize = sys.maxsize import html.entities as htmlentities - from io import StringIO - from urllib.parse import urlencode - from urllib.request import urlopen else: bytes = str str = unicode basestring = basestring - range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities - from StringIO import StringIO - from urllib import urlencode, urlopen del sys diff --git a/tests/compat.py b/tests/compat.py new file mode 100644 index 0000000..8bed40e --- /dev/null +++ b/tests/compat.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +""" +Serves the same purpose as mwparserfromhell.compat, but only for objects +required by unit tests. This avoids unnecessary imports (like urllib) within +the main library. +""" + +from mwparserfromhell.compat import py3k + +if py3k: + range = range + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + range = xrange + from StringIO import StringIO + from urllib import urlencode, urlopen diff --git a/tests/test_docs.py b/tests/test_docs.py index 3b23bb7..8d95c47 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -25,7 +25,9 @@ import json import unittest import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen +from mwparserfromhell.compat import py3k, str + +from .compat import StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" diff --git a/tests/test_parser.py b/tests/test_parser.py index 4f718c8..1c37a85 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,12 +24,13 @@ from __future__ import unicode_literals import unittest from mwparserfromhell import parser -from mwparserfromhell.compat import range from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +from .compat import range + class TestParser(unittest.TestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 3423bb7..25df555 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,9 +23,11 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k, range +from mwparserfromhell.compat import py3k from mwparserfromhell.smart_list import SmartList, _ListProxy +from .compat import range + class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6d10609..306f2fd 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -25,9 +25,11 @@ from sys import getdefaultencoding from types import GeneratorType import unittest -from mwparserfromhell.compat import bytes, py3k, range, str +from mwparserfromhell.compat import bytes, py3k, str from mwparserfromhell.string_mixin import StringMixIn +from .compat import range + class _FakeString(StringMixIn): def __init__(self, data): self._data = data From e3f89af62dcc323b6119174a07868057e814ede9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2013 18:38:29 -0400 Subject: [PATCH 54/67] Adding a TreeEqualityTestCase base class. --- tests/_test_tokenizer.py | 3 +- tests/_test_tree_equality.py | 78 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_ctokenizer.py | 2 +- tests/test_parser.py | 33 ++----------------- tests/test_pytokenizer.py | 2 +- 5 files changed, 84 insertions(+), 34 deletions(-) create mode 100644 tests/_test_tree_equality.py diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 379b4fa..13882aa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import print_function, unicode_literals +from unittest import TestCase from os import listdir, path from mwparserfromhell.compat import py3k @@ -31,7 +32,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(object): +class TokenizerTestCase(TestCase): """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py new file mode 100644 index 0000000..26c373d --- /dev/null +++ b/tests/_test_tree_equality.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +from unittest import TestCase + +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.wikicode import Wikicode + +class TreeEqualityTestCase(TestCase): + """A base test case with support for comparing the equality of node trees. + + This adds a number of type equality functions, for Wikicode, Text, + Templates, and Wikilinks. + """ + + def assertNodeEqual(self, expected, actual): + registry = { + Text: self.assertTextNodeEqual, + Template: self.assertTemplateNodeEqual, + Wikilink: self.assertWikilinkNodeEqual + } + for nodetype in registry: + if isinstance(expected, nodetype): + self.assertIsInstance(actual, nodetype) + registry[nodetype](expected, actual) + + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + + def assertTemplateNodeEqual(self, expected, actual): + """Assert that two Template nodes have the same data.""" + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + + def assertWikilinkNodeEqual(self, expected, actual): + """Assert that two Wikilink nodes have the same data.""" + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodeEqual(expected.get(i), actual.get(i)) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7a082e8..955b9a0 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -31,7 +31,7 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase, unittest.TestCase): +class TestCTokenizer(TokenizerTestCase): """Test cases for the C tokenizer.""" @classmethod diff --git a/tests/test_parser.py b/tests/test_parser.py index 1c37a85..9d2c969 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -29,41 +29,12 @@ from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +from ._test_tree_equality import TreeEqualityTestCase from .compat import range -class TestParser(unittest.TestCase): +class TestParser(TreeEqualityTestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" - def assertNodesEqual(self, expected, actual): - """Assert that two Nodes are the same type and have the same data.""" - self.assertIs(type(expected), type(actual)) - if isinstance(expected, Text): - self.assertEqual(expected.value, actual.value) - elif isinstance(expected, Template): - self.assertWikicodeEqual(expected.name, actual.name) - length = len(expected.params) - self.assertEqual(length, len(actual.params)) - for i in range(length): - exp_param = expected.params[i] - act_param = actual.params[i] - self.assertWikicodeEqual(exp_param.name, act_param.name) - self.assertWikicodeEqual(exp_param.value, act_param.value) - self.assertIs(exp_param.showkey, act_param.showkey) - elif isinstance(expected, Wikilink): - self.assertWikicodeEqual(expected.title, actual.title) - if expected.text is not None: - self.assertWikicodeEqual(expected.text, actual.text) - else: - self.assertIs(None, actual.text) - - def assertWikicodeEqual(self, expected, actual): - """Assert that two Wikicode objects have the same data.""" - self.assertIsInstance(actual, Wikicode) - length = len(expected.nodes) - self.assertEqual(length, len(actual.nodes)) - for i in range(length): - self.assertNodesEqual(expected.get(i), actual.get(i)) - def test_use_c(self): """make sure the correct tokenizer is used""" if parser.use_c: diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 697c7e5..7b37eb3 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): +class TestPyTokenizer(TokenizerTestCase): """Test cases for the Python tokenizer.""" @classmethod From a8cb275b941b70524e8b97341784097434ae627c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:04:55 -0400 Subject: [PATCH 55/67] Add TestUtils; implement two tests for it. Also, add a missing docstring in TreeEqualityTestCase. --- tests/_test_tree_equality.py | 1 + tests/test_utils.py | 67 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 26c373d..0fdb531 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -35,6 +35,7 @@ class TreeEqualityTestCase(TestCase): """ def assertNodeEqual(self, expected, actual): + """Assert that two Nodes have the same type and have the same data.""" registry = { Text: self.assertTextNodeEqual, Template: self.assertTemplateNodeEqual, diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..8afad7a --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.utils import parse_anything +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +class TestUtils(TreeEqualityTestCase): + """Tests for the utils module, which provides parse_anything().""" + + def test_parse_anything_valid(self): + """tests for valid input to utils.parse_anything()""" + wrap = lambda L: Wikicode(SmartList(L)) + textify = lambda L: wrap([Text(item) for item in L]) + tests = [ + (wrap([Text("foobar")]), textify(["foobar"])), + (Template(wrap([Text("spam")])), + wrap([Template(textify(["spam"]))])), + ("fóóbar", textify(["fóóbar"])), + (b"foobár", textify(["foobár"])), + (123, textify(["123"])), + (True, textify(["True"])), + (None, wrap([])), + ([Text("foo"), Text("bar"), Text("baz")], + textify(["foo", "bar", "baz"])), + ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], + textify(["foo", "bar", "baz", "123", "456"])), + ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, parse_anything(test)) + + def test_parse_anything_invalid(self): + """tests for invalid input to utils.parse_anything()""" + self.assertRaises(ValueError, parse_anything, Ellipsis) + self.assertRaises(ValueError, parse_anything, object) + self.assertRaises(ValueError, parse_anything, object()) + self.assertRaises(ValueError, parse_anything, type) + self.assertRaises(ValueError, parse_anything, ["foo", [object]]) + +if __name__ == "__main__": + unittest.main(verbosity=2) From 30d4f137a829a7bfd613363f3579f97337462024 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:06:59 -0400 Subject: [PATCH 56/67] Curse you, Python 3! --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8afad7a..c088530 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -42,7 +42,7 @@ class TestUtils(TreeEqualityTestCase): (Template(wrap([Text("spam")])), wrap([Template(textify(["spam"]))])), ("fóóbar", textify(["fóóbar"])), - (b"foobár", textify(["foobár"])), + (b"foob\xc3\xa1r", textify(["foobár"])), (123, textify(["123"])), (True, textify(["True"])), (None, wrap([])), From cda1ce95f3b46c3392e57de182bc925c815b7d1f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:11:30 -0400 Subject: [PATCH 57/67] Roll back part of e3f89af62d because CURSE YOU UNIT TESTING FRAMEWORK --- tests/_test_tokenizer.py | 3 +-- tests/test_ctokenizer.py | 2 +- tests/test_pytokenizer.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 13882aa..379b4fa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -21,7 +21,6 @@ # SOFTWARE. from __future__ import print_function, unicode_literals -from unittest import TestCase from os import listdir, path from mwparserfromhell.compat import py3k @@ -32,7 +31,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(TestCase): +class TokenizerTestCase(object): """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 955b9a0..7a082e8 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -31,7 +31,7 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase): +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" @classmethod diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 7b37eb3..697c7e5 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase): +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" @classmethod From 892092434fa748ef06ff2558c5b9dbfce9155071 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 21:04:53 -0400 Subject: [PATCH 58/67] Skeleton for TestBuilder; adding some nodes to TreeEqualityTestCase. --- tests/_test_tree_equality.py | 38 ++++++++++++++++++++++++++++++------ tests/test_builder.py | 46 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 0fdb531..16f4b49 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -23,8 +23,9 @@ from __future__ import unicode_literals from unittest import TestCase -from mwparserfromhell.nodes import Template, Text, Wikilink -from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.wikicode import Wikicode class TreeEqualityTestCase(TestCase): @@ -37,8 +38,13 @@ class TreeEqualityTestCase(TestCase): def assertNodeEqual(self, expected, actual): """Assert that two Nodes have the same type and have the same data.""" registry = { - Text: self.assertTextNodeEqual, + Argument: self.assertArgumentNodeEqual, + Comment: self.assertCommentNodeEqual, + Heading: self.assertHeadingNodeEqual, + HTMLEntity: self.assertHTMLEntityNodeEqual, + Tag: self.assertTagNodeEqual, Template: self.assertTemplateNodeEqual, + Text: self.assertTextNodeEqual, Wikilink: self.assertWikilinkNodeEqual } for nodetype in registry: @@ -46,9 +52,25 @@ class TreeEqualityTestCase(TestCase): self.assertIsInstance(actual, nodetype) registry[nodetype](expected, actual) - def assertTextNodeEqual(self, expected, actual): - """Assert that two Text nodes have the same data.""" - self.assertEqual(expected.value, actual.value) + def assertArgumentNodeEqual(self, expected, actual): + """Assert that two Argument nodes have the same data.""" + pass + + def assertCommentNodeEqual(self, expected, actual): + """Assert that two Comment nodes have the same data.""" + pass + + def assertHeadingNodeEqual(self, expected, actual): + """Assert that two Heading nodes have the same data.""" + pass + + def assertHTMLEntityNodeEqual(self, expected, actual): + """Assert that two HTMLEntity nodes have the same data.""" + pass + + def assertTagNodeEqual(self, expected, actual): + """Assert that two Tag nodes have the same data.""" + pass def assertTemplateNodeEqual(self, expected, actual): """Assert that two Template nodes have the same data.""" @@ -62,6 +84,10 @@ class TreeEqualityTestCase(TestCase): self.assertWikicodeEqual(exp_param.value, act_param.value) self.assertIs(exp_param.showkey, act_param.showkey) + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + def assertWikilinkNodeEqual(self, expected, actual): """Assert that two Wikilink nodes have the same data.""" self.assertWikicodeEqual(expected.title, actual.title) diff --git a/tests/test_builder.py b/tests/test_builder.py index a3518fd..a80d8bf 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -23,8 +23,50 @@ from __future__ import unicode_literals import unittest -class TestBuilder(unittest.TestCase): - pass +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestBuilder(TreeEqualityTestCase): + """Tests for the builder, which turns tokens into Wikicode objects.""" + + def test_text(self): + """tests for building Text nodes""" + pass + + def test_template(self): + """tests for building Template nodes""" + pass + + def test_argument(self): + """tests for building Argument nodes""" + pass + + def test_wikilink(self): + """tests for building Wikilink nodes""" + pass + + def test_html_entity(self): + """tests for building HTMLEntity nodes""" + pass + + def test_heading(self): + """tests for building Heading nodes""" + pass + + def test_comment(self): + """tests for building Comment nodes""" + pass + + def test_tag(self): + """tests for building Tag nodes""" + pass if __name__ == "__main__": unittest.main(verbosity=2) From 404b4479a26ab89f41b2e9bae5c6ffc8d5777f67 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 21:30:19 -0400 Subject: [PATCH 59/67] Implement the remaining asserts in TreeEqualityTestCase. --- mwparserfromhell/nodes/html_entity.py | 5 ++++- tests/_test_tree_equality.py | 18 +++++++++++++----- tests/test_builder.py | 1 + 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 221040b..5b7607c 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -135,7 +135,10 @@ class HTMLEntity(Node): @hex_char.setter def hex_char(self, newval): - self._hex_char = bool(newval) + newval = str(newval) + if newval not in ("x", "X"): + raise ValueError(newval) + self._hex_char = newval def normalize(self): """Return the unicode character represented by the HTML entity.""" diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 16f4b49..2014ac1 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -54,23 +54,31 @@ class TreeEqualityTestCase(TestCase): def assertArgumentNodeEqual(self, expected, actual): """Assert that two Argument nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.name, actual.name) + if expected.default is not None: + self.assertWikicodeEqual(expected.default, actual.default) + else: + self.assertIs(None, actual.default) def assertCommentNodeEqual(self, expected, actual): """Assert that two Comment nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.contents, actual.contents) def assertHeadingNodeEqual(self, expected, actual): """Assert that two Heading nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.title, actual.title) + self.assertEqual(expected.level, actual.level) def assertHTMLEntityNodeEqual(self, expected, actual): """Assert that two HTMLEntity nodes have the same data.""" - pass + self.assertEqual(expected.value, actual.value) + self.assertIs(expected.named, actual.named) + self.assertIs(expected.hexadecimal, actual.hexadecimal) + self.assertEquals(expected.hex_char, actual.hex_char) def assertTagNodeEqual(self, expected, actual): """Assert that two Tag nodes have the same data.""" - pass + self.fail("Holding this until feature/html_tags is ready.") def assertTemplateNodeEqual(self, expected, actual): """Assert that two Template nodes have the same data.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index a80d8bf..e6919c1 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -64,6 +64,7 @@ class TestBuilder(TreeEqualityTestCase): """tests for building Comment nodes""" pass + @unittest.skip("holding this until feature/html_tags is ready") def test_tag(self): """tests for building Tag nodes""" pass From cb23587ab6e4cb3dfc21d817f2cb7b18c5542a60 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 3 Apr 2013 11:00:07 -0400 Subject: [PATCH 60/67] Adding some Builder tests --- tests/test_builder.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e6919c1..d577bfc 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -26,6 +26,8 @@ import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.parser import tokens +from mwparserfromhell.parser.builder import Builder from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode @@ -36,13 +38,34 @@ wrap = lambda L: Wikicode(SmartList(L)) class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" + def setUp(self): + self.builder = Builder() + def test_text(self): """tests for building Text nodes""" - pass + tests = [ + ([tokens.Text(text="foobar")], wrap([Text("foobar")])), + ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), + ([tokens.Text(text="spam"), tokens.Text(text="eggs")], + wrap([Text("spam"), Text("eggs")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_template(self): """tests for building Template nodes""" - pass + tests = [ + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])), + ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_argument(self): """tests for building Argument nodes""" From b8e8d057abc4fefec78f967adf30326669c0726c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Apr 2013 10:49:04 -0400 Subject: [PATCH 61/67] Finish test_template() --- tests/test_builder.py | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index d577bfc..952b501 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -47,7 +47,7 @@ class TestBuilder(TreeEqualityTestCase): ([tokens.Text(text="foobar")], wrap([Text("foobar")])), ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), ([tokens.Text(text="spam"), tokens.Text(text="eggs")], - wrap([Text("spam"), Text("eggs")])), + wrap([Text("spam"), Text("eggs")])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -55,14 +55,46 @@ class TestBuilder(TreeEqualityTestCase): def test_template(self): """tests for building Template nodes""" tests = [ - ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foobar")]))])), - ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], - wrap([Template(wrap([Text("spam"), Text("eggs")]))])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("1")]), wrap([Text("bar")]), + showkey=False)])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateParamSeparator(), tokens.Text(text="biz"), + tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), + tokens.TemplateParamSeparator(), tokens.Text(text="3"), + tokens.TemplateParamEquals(), tokens.Text(text="buff"), + tokens.TemplateParamSeparator(), tokens.Text(text="baff"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")])), + Parameter(wrap([Text("1")]), wrap([Text("biz")]), + showkey=False), + Parameter(wrap([Text("2")]), wrap([Text("buzz")]), + showkey=False), + Parameter(wrap([Text("3")]), wrap([Text("buff")])), + Parameter(wrap([Text("3")]), wrap([Text("baff")]), + showkey=False)])])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) From e32a6692f8ad9f8d6c57a56ca40e8aedf128c074 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Apr 2013 10:59:16 -0400 Subject: [PATCH 62/67] test_argument() --- tests/test_builder.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 952b501..e632644 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -101,7 +101,29 @@ class TestBuilder(TreeEqualityTestCase): def test_argument(self): """tests for building Argument nodes""" - pass + tests = [ + ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foobar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.ArgumentClose()], + wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.ArgumentSeparator(), tokens.Text(text="bar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.ArgumentSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_wikilink(self): """tests for building Wikilink nodes""" From 7289d8c070a6fcd2bceaa8e00e7661c9c21461a5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:25:48 -0400 Subject: [PATCH 63/67] test_wikilink(); fix indentation --- tests/test_builder.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e632644..ea38dae 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -68,7 +68,7 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateClose()], wrap([Template(wrap([Text("foo")]), params=[ Parameter(wrap([Text("1")]), wrap([Text("bar")]), - showkey=False)])])), + showkey=False)])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), @@ -89,12 +89,12 @@ class TestBuilder(TreeEqualityTestCase): wrap([Template(wrap([Text("foo")]), params=[ Parameter(wrap([Text("bar")]), wrap([Text("baz")])), Parameter(wrap([Text("1")]), wrap([Text("biz")]), - showkey=False), + showkey=False), Parameter(wrap([Text("2")]), wrap([Text("buzz")]), - showkey=False), + showkey=False), Parameter(wrap([Text("3")]), wrap([Text("buff")])), Parameter(wrap([Text("3")]), wrap([Text("baff")]), - showkey=False)])])), + showkey=False)])])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -127,7 +127,29 @@ class TestBuilder(TreeEqualityTestCase): def test_wikilink(self): """tests for building Wikilink nodes""" - pass + tests = [ + ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foobar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.WikilinkSeparator(), tokens.Text(text="bar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.WikilinkSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_html_entity(self): """tests for building HTMLEntity nodes""" From e9463543f46c49748740f69c5e5bcdb569338a2a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:46:43 -0400 Subject: [PATCH 64/67] test_html_entity() --- tests/test_builder.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index ea38dae..7dcbc0e 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -153,7 +153,23 @@ class TestBuilder(TreeEqualityTestCase): def test_html_entity(self): """tests for building HTMLEntity nodes""" - pass + tests = [ + ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.Text(text="107"), tokens.HTMLEntityEnd()], + wrap([HTMLEntity("107", named=False, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("6B", named=False, hexadecimal=True, + hex_char="X")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_heading(self): """tests for building Heading nodes""" From 132c6584d059497374c7f0c53285e6251beb6675 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:52:43 -0400 Subject: [PATCH 65/67] test_heading() and test_comment() --- tests/test_builder.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 7dcbc0e..410eb4a 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -173,11 +173,31 @@ class TestBuilder(TreeEqualityTestCase): def test_heading(self): """tests for building Heading nodes""" - pass + tests = [ + ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), + tokens.HeadingEnd()], + wrap([Heading(wrap([Text("foobar")]), 2)])), + + ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.HeadingEnd()], + wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_comment(self): """tests for building Comment nodes""" - pass + tests = [ + ([tokens.CommentStart(), tokens.Text(text="foobar"), + tokens.CommentEnd()], + wrap([Comment(wrap([Text("foobar")]))])), + + ([tokens.CommentStart(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.CommentEnd()], + wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) @unittest.skip("holding this until feature/html_tags is ready") def test_tag(self): From 094e867ee6d7a2f34c6555e318ccdb1622526484 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Apr 2013 15:45:51 -0400 Subject: [PATCH 66/67] Add test_integration(); add a horrible abuse of PEP8 --- tests/_test_tree_equality.py | 2 +- tests/test_builder.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 2014ac1..758a72e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -74,7 +74,7 @@ class TreeEqualityTestCase(TestCase): self.assertEqual(expected.value, actual.value) self.assertIs(expected.named, actual.named) self.assertIs(expected.hexadecimal, actual.hexadecimal) - self.assertEquals(expected.hex_char, actual.hex_char) + self.assertEqual(expected.hex_char, actual.hex_char) def assertTagNodeEqual(self, expected, actual): """Assert that two Tag nodes have the same data.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index 410eb4a..9425713 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -204,5 +204,25 @@ class TestBuilder(TreeEqualityTestCase): """tests for building Tag nodes""" pass + def test_integration(self): + """a test for building a combination of templates together""" + test = [tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.Text(text="foo"), tokens.TemplateClose(), + tokens.Text(text="bar"), tokens.TemplateParamSeparator(), + tokens.Text(text="baz"), tokens.TemplateParamEquals(), + tokens.Text(text="biz"), tokens.TemplateClose(), + tokens.Text(text="buzz"), tokens.TemplateClose(), + tokens.Text(text="usr"), tokens.TemplateParamSeparator(), + tokens.TemplateOpen(), tokens.Text(text="bin"), + tokens.TemplateClose(), tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( + "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), + wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ + Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), + showkey=False)])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + if __name__ == "__main__": unittest.main(verbosity=2) From 2d9b8a39b6509d8a39dcf12b90dbcb2e8f07433f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Apr 2013 16:17:47 -0400 Subject: [PATCH 67/67] test_integration2(); finish TestBuilder --- tests/test_builder.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/test_builder.py b/tests/test_builder.py index 9425713..1e578ed 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -206,6 +206,7 @@ class TestBuilder(TreeEqualityTestCase): def test_integration(self): """a test for building a combination of templates together""" + # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} test = [tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateClose(), @@ -224,5 +225,37 @@ class TestBuilder(TreeEqualityTestCase): showkey=False)])]) self.assertWikicodeEqual(valid, self.builder.build(test)) + def test_integration2(self): + """an even more audacious test for building a horrible wikicode mess""" + # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }} + test = [tokens.TemplateOpen(), tokens.Text(text="a"), + tokens.TemplateParamSeparator(), tokens.Text(text="b"), + tokens.TemplateParamSeparator(), tokens.TemplateOpen(), + tokens.Text(text="c"), tokens.TemplateParamSeparator(), + tokens.WikilinkOpen(), tokens.Text(text="d"), + tokens.WikilinkClose(), tokens.ArgumentOpen(), + tokens.Text(text="e"), tokens.ArgumentClose(), + tokens.TemplateClose(), tokens.TemplateClose(), + tokens.WikilinkOpen(), tokens.Text(text="f"), + tokens.WikilinkSeparator(), tokens.ArgumentOpen(), + tokens.Text(text="g"), tokens.ArgumentClose(), + tokens.CommentStart(), tokens.Text(text="h"), + tokens.CommentEnd(), tokens.WikilinkClose(), + tokens.TemplateOpen(), tokens.Text(text="i"), + tokens.TemplateParamSeparator(), tokens.Text(text="j"), + tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), + tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), + wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), + wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") + ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] + ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] + ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) + ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), + wrap([HTMLEntity("nbsp", named=True)]))])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + if __name__ == "__main__": unittest.main(verbosity=2)