diff --git a/.gitignore b/.gitignore index d70b37d..ec4e8ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc +*.so *.egg *.egg-info .DS_Store diff --git a/LICENSE b/LICENSE index 49b719e..413f1c4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index 77f12c7..9847c33 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,13 @@ so you can install the latest release with ``pip install mwparserfromhell`` cd mwparserfromhell python setup.py install -You can run the comprehensive unit testing suite with ``python setup.py test``. +If you get ``error: Unable to find vcvarsall.bat`` while installing, this is +because Windows can't find the compiler for C extensions. Consult this +`StackOverflow question`_ for help. You can also set ``ext_modules`` in +``setup.py`` to an empty list to prevent the extension from building. + +You can run the comprehensive unit testing suite with +``python setup.py test -q``. Usage ----- @@ -106,12 +112,12 @@ Integration ``Page`` objects have a ``parse`` method that essentially calls ``mwparserfromhell.parse()`` on ``page.get()``. -If you're using PyWikipedia_, your code might look like this:: +If you're using Pywikipedia_, your code might look like this:: import mwparserfromhell import wikipedia as pywikibot def parse(title): - site = pywikibot.get_site() + site = pywikibot.getSite() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) @@ -124,16 +130,19 @@ following code (via the API_):: import mwparserfromhell API_URL = "http://en.wikipedia.org/w/api.php" def parse(title): - raw = urllib.urlopen(API_URL, data).read() + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] return mwparserfromhell.parse(text) -.. _MediaWiki: http://mediawiki.org -.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig -.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 -.. _Python Package Index: http://pypi.python.org -.. _get pip: http://pypi.python.org/pypi/pip -.. _EarwigBot: https://github.com/earwig/earwigbot -.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ -.. _API: http://mediawiki.org/wiki/API +.. _MediaWiki: http://mediawiki.org +.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig +.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 +.. _Python Package Index: http://pypi.python.org +.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat +.. _get pip: http://pypi.python.org/pypi/pip +.. _EarwigBot: https://github.com/earwig/earwigbot +.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot +.. _API: http://mediawiki.org/wiki/API diff --git a/docs/conf.py b/docs/conf.py index cff089b..9fa1e02 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012 Ben Kurtovic' +copyright = u'2012, 2013 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/index.rst b/docs/index.rst index 24f42f2..4b4c392 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,10 +22,16 @@ so you can install the latest release with ``pip install mwparserfromhell`` cd mwparserfromhell python setup.py install +If you get ``error: Unable to find vcvarsall.bat`` while installing, this is +because Windows can't find the compiler for C extensions. Consult this +`StackOverflow question`_ for help. You can also set ``ext_modules`` in +``setup.py`` to an empty list to prevent the extension from building. + You can run the comprehensive unit testing suite with ``python setup.py test``. -.. _Python Package Index: http://pypi.python.org -.. _get pip: http://pypi.python.org/pypi/pip +.. _Python Package Index: http://pypi.python.org +.. _get pip: http://pypi.python.org/pypi/pip +.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat Contents -------- diff --git a/docs/integration.rst b/docs/integration.rst index d0e54db..78810b8 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -7,12 +7,12 @@ Integration :py:func:`mwparserfromhell.parse() ` on :py:meth:`~earwigbot.wiki.page.Page.get`. -If you're using PyWikipedia_, your code might look like this:: +If you're using Pywikipedia_, your code might look like this:: import mwparserfromhell import wikipedia as pywikibot def parse(title): - site = pywikibot.get_site() + site = pywikibot.getSite() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) @@ -31,5 +31,5 @@ following code (via the API_):: return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot -.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot .. _API: http://mediawiki.org/wiki/API diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 4f73a0e..99bc0c2 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,12 +29,11 @@ outrageously powerful parser for `MediaWiki `_ wikicode. from __future__ import unicode_literals __author__ = "Ben Kurtovic" -__copyright__ = "Copyright (C) 2012 Ben Kurtovic" +__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.2.dev" __email__ = "ben.kurtovic@verizon.net" -from . import nodes, parser, smart_list, string_mixin, wikicode +from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode -parse = lambda text: parser.Parser(text).parse() -parse.__doc__ = "Short for :py:meth:`.Parser.parse`." +parse = utils.parse_anything diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index a1b6b8f..bb81513 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -1,29 +1,29 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :py:class:`str` is set to -:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, -:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These -types are meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = sys.version_info[0] == 3 - -if py3k: - bytes = bytes - str = str - basestring = str - maxsize = sys.maxsize - import html.entities as htmlentities - -else: - bytes = str - str = unicode - basestring = basestring - maxsize = sys.maxint - import htmlentitydefs as htmlentities - -del sys +# -*- coding: utf-8 -*- + +""" +Implements support for both Python 2 and Python 3 by defining common types in +terms of their Python 2/3 variants. For example, :py:class:`str` is set to +:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, +:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These +types are meant to be imported directly from within the parser's modules. +""" + +import sys + +py3k = sys.version_info[0] == 3 + +if py3k: + bytes = bytes + str = str + basestring = str + maxsize = sys.maxsize + import html.entities as htmlentities + +else: + bytes = str + str = unicode + basestring = basestring + maxsize = sys.maxint + import htmlentitydefs as htmlentities + +del sys diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 86a8746..faaa0b2 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 918fac6..d7db92a 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -30,6 +30,7 @@ __all__ = ["Argument"] class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" + def __init__(self, name, default=None): super(Argument, self).__init__() self._name = name diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 3d06261..e96ce38 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,6 +29,7 @@ __all__ = ["Comment"] class Comment(Node): """Represents a hidden HTML comment, like ````.""" + def __init__(self, contents): super(Comment, self).__init__() self._contents = contents diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 2ce4bb1..e860f01 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 58a99a8..33ad851 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 8c5e654..c1c10a0 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 8f389d3..f001234 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index a3c6079..b51bd92 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ from __future__ import unicode_literals from . import Node -from ..compat import htmlentities, str +from ..compat import htmlentities, py3k, str __all__ = ["HTMLEntity"] @@ -63,28 +63,31 @@ class HTMLEntity(Node): return self.normalize() return self - def _unichr(self, value): - """Implement the builtin unichr() with support for non-BMP code points. + if not py3k: + @staticmethod + def _unichr(value): + """Implement builtin unichr() with support for non-BMP code points. - On wide Python builds, this functions like the normal unichr(). On - narrow builds, this returns the value's corresponding surrogate pair. - """ - try: - return unichr(value) - except ValueError: - # Test whether we're on the wide or narrow Python build. Check the - # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY): - if len("\U0001F64A") == 2: - # Ensure this is within the range we can encode: - if value > 0x10FFFF: - raise ValueError("unichr() arg not in range(0x110000)") - code = value - 0x10000 - if value < 0: # Invalid code point - raise - lead = 0xD800 + (code >> 10) - trail = 0xDC00 + (code % (1 << 10)) - return unichr(lead) + unichr(trail) - raise + On wide Python builds, this functions like the normal unichr(). On + narrow builds, this returns the value's encoded surrogate pair. + """ + try: + return unichr(value) + except ValueError: + # Test whether we're on the wide or narrow Python build. Check + # the length of a non-BMP code point + # (U+1F64A, SPEAK-NO-EVIL MONKEY): + if len("\U0001F64A") == 2: + # Ensure this is within the range we can encode: + if value > 0x10FFFF: + raise ValueError("unichr() arg not in range(0x110000)") + code = value - 0x10000 + if value < 0: # Invalid code point + raise + lead = 0xD800 + (code >> 10) + trail = 0xDC00 + (code % (1 << 10)) + return unichr(lead) + unichr(trail) + raise @property def value(self): @@ -119,28 +122,60 @@ class HTMLEntity(Node): @value.setter def value(self, newval): newval = str(newval) - if newval not in htmlentities.entitydefs: - test = int(self.value, 16) - if test < 0 or (test > 0x10FFFF and int(self.value) > 0x10FFFF): - raise ValueError(newval) + try: + int(newval) + except ValueError: + try: + int(newval, 16) + except ValueError: + if newval not in htmlentities.entitydefs: + raise ValueError("entity value is not a valid name") + self._named = True + self._hexadecimal = False + else: + if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF: + raise ValueError("entity value is not in range(0x110000)") + self._named = False + self._hexadecimal = True + else: + test = int(newval, 16 if self.hexadecimal else 10) + if test < 0 or test > 0x10FFFF: + raise ValueError("entity value is not in range(0x110000)") + self._named = False self._value = newval @named.setter def named(self, newval): - self._named = bool(newval) + newval = bool(newval) + if newval and self.value not in htmlentities.entitydefs: + raise ValueError("entity value is not a valid name") + if not newval: + try: + int(self.value, 16) + except ValueError: + err = "current entity value is not a valid Unicode codepoint" + raise ValueError(err) + self._named = newval @hexadecimal.setter def hexadecimal(self, newval): - self._hexadecimal = bool(newval) + newval = bool(newval) + if newval and self.named: + raise ValueError("a named entity cannot be hexadecimal") + self._hexadecimal = newval @hex_char.setter def hex_char(self, newval): - self._hex_char = bool(newval) + newval = str(newval) + if newval not in ("x", "X"): + raise ValueError(newval) + self._hex_char = newval def normalize(self): """Return the unicode character represented by the HTML entity.""" + chrfunc = chr if py3k else HTMLEntity._unichr if self.named: - return unichr(htmlentities.name2codepoint[self.value]) + return chrfunc(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return self._unichr(int(self.value, 16)) - return self._unichr(int(self.value)) + return chrfunc(int(self.value, 16)) + return chrfunc(int(self.value)) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index ecf6f2b..eb5d1ee 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 08ab4a5..3834d41 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -81,7 +81,7 @@ class Template(Node): in parameter names or values so they are not mistaken for new parameters. """ - replacement = HTMLEntity(value=ord(char)) + replacement = str(HTMLEntity(value=ord(char))) for node in code.filter_text(recursive=False): if char in node: code.replace(node, node.replace(char, replacement)) @@ -107,7 +107,7 @@ class Template(Node): values = tuple(theories.values()) best = max(values) confidence = float(best) / sum(values) - if confidence > 0.75: + if confidence >= 0.75: return tuple(theories.keys())[values.index(best)] def _get_spacing_conventions(self, use_names): @@ -142,9 +142,9 @@ class Template(Node): return False return True - def _remove_without_field(self, param, i, force_no_field): + def _remove_without_field(self, param, i): """Return False if a parameter name should be kept, otherwise True.""" - if not param.showkey and not force_no_field: + if not param.showkey: dependents = [not after.showkey for after in self.params[i+1:]] if any(dependents): return False @@ -183,11 +183,10 @@ class Template(Node): def get(self, name): """Get the parameter whose name is *name*. - The returned object is a - :py:class:`~.Parameter` instance. Raises :py:exc:`ValueError` if no - parameter has this name. Since multiple parameters can have the same - name, we'll return the last match, since the last parameter is the only - one read by the MediaWiki parser. + The returned object is a :py:class:`~.Parameter` instance. Raises + :py:exc:`ValueError` if no parameter has this name. Since multiple + parameters can have the same name, we'll return the last match, since + the last parameter is the only one read by the MediaWiki parser. """ name = name.strip() if isinstance(name, basestring) else str(name) for param in reversed(self.params): @@ -195,20 +194,34 @@ class Template(Node): return param raise ValueError(name) - def add(self, name, value, showkey=None, force_nonconformity=False): + def add(self, name, value, showkey=None, before=None, + preserve_spacing=True): """Add a parameter to the template with a given *name* and *value*. *name* and *value* can be anything parasable by - :py:func:`.utils.parse_anything`; pipes (and equal signs, if - appropriate) are automatically escaped from *value* where applicable. + :py:func:`.utils.parse_anything`; pipes and equal signs are + automatically escaped from *value* when appropriate. + If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent - guess. If *name* is already a parameter, we'll replace its value while - keeping the same spacing rules unless *force_nonconformity* is - ``True``. We will also try to guess the dominant spacing convention - when adding a new parameter using :py:meth:`_get_spacing_conventions` - unless *force_nonconformity* is ``True``. + guess. + + If *name* is already a parameter in the template, we'll replace its + value while keeping the same whitespace around it. We will also try to + guess the dominant spacing convention when adding a new parameter using + :py:meth:`_get_spacing_conventions`. + + If *before* is given (either a :py:class:`~.Parameter` object or a + name), then we will place the parameter immediately before this one. + Otherwise, it will be added at the end. If *before* is a name and + exists multiple times in the template, we will place it before the last + occurance. If *before* is not in the template, :py:exc:`ValueError` is + raised. The argument is ignored if the new parameter already exists. + + If *preserve_spacing* is ``False``, we will avoid preserving spacing + conventions when changing the value of an existing parameter or when + adding a new one. """ name, value = parse_anything(name), parse_anything(value) self._surface_escape(value, "|") @@ -217,14 +230,17 @@ class Template(Node): self.remove(name, keep_field=True) existing = self.get(name) if showkey is not None: - if not showkey: - self._surface_escape(value, "=") existing.showkey = showkey + if not existing.showkey: + self._surface_escape(value, "=") nodes = existing.value.nodes - if force_nonconformity: - existing.value = value - else: + if preserve_spacing: + for i in range(2): # Ignore empty text nodes + if not nodes[i]: + nodes[i] = None existing.value = parse_anything([nodes[0], value, nodes[1]]) + else: + existing.value = value return existing if showkey is None: @@ -246,43 +262,38 @@ class Template(Node): if not showkey: self._surface_escape(value, "=") - if not force_nonconformity: + if preserve_spacing: before_n, after_n = self._get_spacing_conventions(use_names=True) - if before_n and after_n: - name = parse_anything([before_n, name, after_n]) - elif before_n: - name = parse_anything([before_n, name]) - elif after_n: - name = parse_anything([name, after_n]) - before_v, after_v = self._get_spacing_conventions(use_names=False) - if before_v and after_v: - value = parse_anything([before_v, value, after_v]) - elif before_v: - value = parse_anything([before_v, value]) - elif after_v: - value = parse_anything([value, after_v]) + name = parse_anything([before_n, name, after_n]) + value = parse_anything([before_v, value, after_v]) param = Parameter(name, value, showkey) - self.params.append(param) + if before: + if not isinstance(before, Parameter): + before = self.get(before) + self.params.insert(self.params.index(before), param) + else: + self.params.append(param) return param - def remove(self, name, keep_field=False, force_no_field=False): + def remove(self, name, keep_field=False): """Remove a parameter from the template whose name is *name*. If *keep_field* is ``True``, we will keep the parameter's name, but blank its value. Otherwise, we will remove the parameter completely *unless* other parameters are dependent on it (e.g. removing ``bar`` from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what - we expected, so ``{{foo||baz}}`` will be produced instead), unless - *force_no_field* is also ``True``. If the parameter shows up multiple - times in the template, we will remove all instances of it (and keep - one if *keep_field* is ``True`` - that being the first instance if - none of the instances have dependents, otherwise that instance will be - kept). + we expected, so ``{{foo||baz}}`` will be produced instead). + + If the parameter shows up multiple times in the template, we will + remove all instances of it (and keep one if *keep_field* is ``True`` - + the first instance if none have dependents, otherwise the one with + dependents will be kept). """ name = name.strip() if isinstance(name, basestring) else str(name) removed = False + to_remove =[] for i, param in enumerate(self.params): if param.name.strip() == name: if keep_field: @@ -290,13 +301,15 @@ class Template(Node): self._blank_param_value(param.value) keep_field = False else: - self.params.remove(param) + to_remove.append(param) else: - if self._remove_without_field(param, i, force_no_field): - self.params.remove(param) + if self._remove_without_field(param, i): + to_remove.append(param) else: self._blank_param_value(param.value) if not removed: removed = True if not removed: raise ValueError(name) + for param in to_remove: + self.params.remove(param) diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 783d8eb..6fda3da 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,6 +29,7 @@ __all__ = ["Text"] class Text(Node): """Represents ordinary, unformatted text with no special properties.""" + def __init__(self, value): super(Text, self).__init__() self._value = value @@ -39,6 +40,9 @@ class Text(Node): def __strip__(self, normalize, collapse): return self + def __showtree__(self, write, get, mark): + write(str(self).encode("unicode_escape").decode("utf8")) + @property def value(self): """The actual text itself.""" diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 73f2a8d..527e9bb 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -30,6 +30,7 @@ __all__ = ["Wikilink"] class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): super(Wikilink, self).__init__() self._title = title @@ -78,4 +79,7 @@ class Wikilink(Node): @text.setter def text(self, value): - self._text = parse_anything(value) + if value is None: + self._text = None + else: + self._text = parse_anything(value) diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index fd8a314..1fb95b5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 2d9ea55..60bfaa9 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index d87da9a..3c9c798 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -77,6 +77,15 @@ Local (stack-specific) contexts: * :py:const:`TAG_BODY` * :py:const:`TAG_CLOSE` +* :py:const:`SAFETY_CHECK` + + * :py:const:`HAS_TEXT` + * :py:const:`FAIL_ON_TEXT` + * :py:const:`FAIL_NEXT` + * :py:const:`FAIL_ON_LBRACE` + * :py:const:`FAIL_ON_RBRACE` + * :py:const:`FAIL_ON_EQUALS` + Global contexts: * :py:const:`GL_HEADING` @@ -84,40 +93,47 @@ Global contexts: # Local contexts: -TEMPLATE = 0b000000000000000000111 -TEMPLATE_NAME = 0b000000000000000000001 -TEMPLATE_PARAM_KEY = 0b000000000000000000010 -TEMPLATE_PARAM_VALUE = 0b000000000000000000100 - -ARGUMENT = 0b000000000000000011000 -ARGUMENT_NAME = 0b000000000000000001000 -ARGUMENT_DEFAULT = 0b000000000000000010000 - -WIKILINK = 0b000000000000001100000 -WIKILINK_TITLE = 0b000000000000000100000 -WIKILINK_TEXT = 0b000000000000001000000 - -HEADING = 0b000000001111110000000 -HEADING_LEVEL_1 = 0b000000000000010000000 -HEADING_LEVEL_2 = 0b000000000000100000000 -HEADING_LEVEL_3 = 0b000000000001000000000 -HEADING_LEVEL_4 = 0b000000000010000000000 -HEADING_LEVEL_5 = 0b000000000100000000000 -HEADING_LEVEL_6 = 0b000000001000000000000 - -COMMENT = 0b000000010000000000000 - -TAG = 0b111111100000000000000 -TAG_OPEN = 0b001111100000000000000 -TAG_OPEN_NAME = 0b000000100000000000000 -TAG_OPEN_ATTR = 0b001111000000000000000 -TAG_OPEN_ATTR_NAME = 0b000001000000000000000 -TAG_OPEN_ATTR_BODY = 0b000010000000000000000 -TAG_OPEN_ATTR_QUOTED = 0b000100000000000000000 -TAG_OPEN_ATTR_IGNORE = 0b001000000000000000000 -TAG_BODY = 0b010000000000000000000 -TAG_CLOSE = 0b100000000000000000000 - +TEMPLATE = 0b000000000000000000000000111 +TEMPLATE_NAME = 0b000000000000000000000000001 +TEMPLATE_PARAM_KEY = 0b000000000000000000000000010 +TEMPLATE_PARAM_VALUE = 0b000000000000000000000000100 + +ARGUMENT = 0b000000000000000000000011000 +ARGUMENT_NAME = 0b000000000000000000000001000 +ARGUMENT_DEFAULT = 0b000000000000000000000010000 + +WIKILINK = 0b000000000000000000001100000 +WIKILINK_TITLE = 0b000000000000000000000100000 +WIKILINK_TEXT = 0b000000000000000000001000000 + +HEADING = 0b000000000000001111110000000 +HEADING_LEVEL_1 = 0b000000000000000000010000000 +HEADING_LEVEL_2 = 0b000000000000000000100000000 +HEADING_LEVEL_3 = 0b000000000000000001000000000 +HEADING_LEVEL_4 = 0b000000000000000010000000000 +HEADING_LEVEL_5 = 0b000000000000000100000000000 +HEADING_LEVEL_6 = 0b000000000000001000000000000 + +COMMENT = 0b000000000000010000000000000 + +TAG = 0b000000111111100000000000000 +TAG_OPEN = 0b000000001111100000000000000 +TAG_OPEN_NAME = 0b000000000000100000000000000 +TAG_OPEN_ATTR = 0b000000001111000000000000000 +TAG_OPEN_ATTR_NAME = 0b000000000001000000000000000 +TAG_OPEN_ATTR_BODY = 0b000000000010000000000000000 +TAG_OPEN_ATTR_QUOTED = 0b000000000100000000000000000 +TAG_OPEN_ATTR_IGNORE = 0b000000001000000000000000000 +TAG_BODY = 0b000000010000000000000000000 +TAG_CLOSE = 0b000000100000000000000000000 + +SAFETY_CHECK = 0b111111000000000000000000000 +HAS_TEXT = 0b000001000000000000000000000 +FAIL_ON_TEXT = 0b000010000000000000000000000 +FAIL_NEXT = 0b000100000000000000000000000 +FAIL_ON_LBRACE = 0b001000000000000000000000000 +FAIL_ON_RBRACE = 0b010000000000000000000000000 +FAIL_ON_EQUALS = 0b100000000000000000000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 71b6cc3..ca9fe8a 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -23,6 +23,11 @@ SOFTWARE. #include "tokenizer.h" +double log2(double n) +{ + return log(n) / log(2); +} + static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { @@ -52,8 +57,9 @@ Textbuffer_new(void) static void Tokenizer_dealloc(Tokenizer* self) { - Py_XDECREF(self->text); struct Stack *this = self->topstack, *next; + Py_XDECREF(self->text); + while (this) { Py_DECREF(this->stack); Textbuffer_dealloc(this->textbuffer); @@ -109,6 +115,8 @@ Tokenizer_push(Tokenizer* self, int context) return -1; top->next = self->topstack; self->topstack = top; + self->depth++; + self->cycles++; return 0; } @@ -137,20 +145,21 @@ Textbuffer_render(struct Textbuffer* self) static int Tokenizer_push_textbuffer(Tokenizer* self) { + PyObject *text, *kwargs, *token; struct Textbuffer* buffer = self->topstack->textbuffer; if (buffer->size == 0 && !buffer->next) return 0; - PyObject* text = Textbuffer_render(buffer); + text = Textbuffer_render(buffer); if (!text) return -1; - PyObject* kwargs = PyDict_New(); + kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(text); return -1; } PyDict_SetItemString(kwargs, "text", text); Py_DECREF(text); - PyObject* token = PyObject_Call(Text, NOARGS, kwargs); + token = PyObject_Call(Text, NOARGS, kwargs); Py_DECREF(kwargs); if (!token) return -1; @@ -174,6 +183,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) Textbuffer_dealloc(top->textbuffer); self->topstack = top->next; free(top); + self->depth--; } /* @@ -182,9 +192,10 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) static PyObject* Tokenizer_pop(Tokenizer* self) { + PyObject* stack; if (Tokenizer_push_textbuffer(self)) return NULL; - PyObject* stack = self->topstack->stack; + stack = self->topstack->stack; Py_INCREF(stack); Tokenizer_delete_top_of_stack(self); return stack; @@ -197,11 +208,13 @@ Tokenizer_pop(Tokenizer* self) static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) { + PyObject* stack; + int context; if (Tokenizer_push_textbuffer(self)) return NULL; - PyObject* stack = self->topstack->stack; + stack = self->topstack->stack; Py_INCREF(stack); - int context = self->topstack->context; + context = self->topstack->context; Tokenizer_delete_top_of_stack(self); self->topstack->context = context; return stack; @@ -373,9 +386,10 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { + Py_ssize_t index; if (delta > self->head) return EMPTY; - Py_ssize_t index = self->head - delta; + index = self->head - delta; return PyList_GET_ITEM(self->text, index); } @@ -389,7 +403,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) PyObject *tokenlist; self->head += 2; - while (Tokenizer_READ(self, 0) == *"{") { + while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { self->head++; braces++; } @@ -420,8 +434,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) if (Tokenizer_parse_template(self)) return -1; if (BAD_ROUTE) { + char text[MAX_BRACES + 1]; RESET_ROUTE(); - char text[braces + 1]; for (i = 0; i < braces; i++) text[i] = *"{"; text[braces] = *""; if (Tokenizer_write_text_then_stack(self, text)) { @@ -632,9 +646,10 @@ Tokenizer_handle_template_end(Tokenizer* self) static int Tokenizer_handle_argument_separator(Tokenizer* self) { + PyObject* token; self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; - PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL); + token = PyObject_CallObject(ArgumentSeparator, NULL); if (!token) return -1; if (Tokenizer_write(self, token)) { @@ -651,8 +666,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { - self->head += 2; PyObject* stack = Tokenizer_pop(self); + self->head += 2; return stack; } @@ -713,9 +728,10 @@ Tokenizer_parse_wikilink(Tokenizer* self) static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { + PyObject* token; self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; - PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL); + token = PyObject_CallObject(WikilinkSeparator, NULL); if (!token) return -1; if (Tokenizer_write(self, token)) { @@ -732,8 +748,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) { - self->head += 1; PyObject* stack = Tokenizer_pop(self); + self->head += 1; return stack; } @@ -759,11 +775,10 @@ Tokenizer_parse_heading(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset + best - 1; - char text[best + 1]; - for (i = 0; i < best; i++) text[i] = *"="; - text[best] = *""; - if (Tokenizer_write_text_then_stack(self, text)) - return -1; + for (i = 0; i < best; i++) { + if (Tokenizer_write_text(self, *"=")) + return -1; + } self->global ^= GL_HEADING; return 0; } @@ -798,13 +813,12 @@ Tokenizer_parse_heading(Tokenizer* self) Py_DECREF(token); if (heading->level < best) { diff = best - heading->level; - char difftext[diff + 1]; - for (i = 0; i < diff; i++) difftext[i] = *"="; - difftext[diff] = *""; - if (Tokenizer_write_text_then_stack(self, difftext)) { - Py_DECREF(heading->title); - free(heading); - return -1; + for (i = 0; i < diff; i++) { + if (Tokenizer_write_text(self, *"=")) { + Py_DECREF(heading->title); + free(heading); + return -1; + } } } if (Tokenizer_write_all(self, heading->title)) { @@ -844,28 +858,27 @@ Tokenizer_handle_heading_end(Tokenizer* self) self->head++; } current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; - level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); + level = current > best ? (best > 6 ? 6 : best) : + (current > 6 ? 6 : current); after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); if (BAD_ROUTE) { RESET_ROUTE(); if (level < best) { diff = best - level; - char difftext[diff + 1]; - for (i = 0; i < diff; i++) difftext[i] = *"="; - difftext[diff] = *""; - if (Tokenizer_write_text_then_stack(self, difftext)) - return NULL; + for (i = 0; i < diff; i++) { + if (Tokenizer_write_text(self, *"=")) + return NULL; + } } self->head = reset + best - 1; } else { - char text[best + 1]; - for (i = 0; i < best; i++) text[i] = *"="; - text[best] = *""; - if (Tokenizer_write_text_then_stack(self, text)) { - Py_DECREF(after->title); - free(after); - return NULL; + for (i = 0; i < best; i++) { + if (Tokenizer_write_text(self, *"=")) { + Py_DECREF(after->title); + free(after); + return NULL; + } } if (Tokenizer_write_all(self, after->title)) { Py_DECREF(after->title); @@ -897,8 +910,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *token, *kwargs, *textobj; Py_UNICODE this; - int numeric, hexadecimal, i, j, test; - char *valid, *text, *def; + int numeric, hexadecimal, i, j, zeroes, test; + char *valid, *text, *buffer, *def; #define FAIL_ROUTE_AND_EXIT() { \ Tokenizer_fail_route(self); \ @@ -959,17 +972,18 @@ Tokenizer_really_parse_entity(Tokenizer* self) else numeric = hexadecimal = 0; if (hexadecimal) - valid = "0123456789abcdefABCDEF"; + valid = HEXDIGITS; else if (numeric) - valid = "0123456789"; + valid = DIGITS; else - valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + valid = ALPHANUM; text = calloc(MAX_ENTITY_SIZE, sizeof(char)); if (!text) { PyErr_NoMemory(); return -1; } i = 0; + zeroes = 0; while (1) { this = Tokenizer_READ(self, 0); if (this == *";") { @@ -978,6 +992,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) break; } if (i == 0 && this == *"0") { + zeroes++; self->head++; continue; } @@ -1008,13 +1023,26 @@ Tokenizer_really_parse_entity(Tokenizer* self) i = 0; while (1) { def = entitydefs[i]; - if (!def) // We've reached the end of the def list without finding it + if (!def) // We've reached the end of the defs without finding it FAIL_ROUTE_AND_EXIT() if (strcmp(text, def) == 0) break; i++; } } + if (zeroes) { + buffer = calloc(strlen(text) + zeroes + 1, sizeof(char)); + if (!buffer) { + free(text); + PyErr_NoMemory(); + return -1; + } + for (i = 0; i < zeroes; i++) + strcat(buffer, "0"); + strcat(buffer, text); + free(text); + text = buffer; + } textobj = PyUnicode_FromString(text); if (!textobj) { free(text); @@ -1092,9 +1120,9 @@ Tokenizer_parse_comment(Tokenizer* self) self->head += 4; comment = Tokenizer_parse(self, LC_COMMENT); if (BAD_ROUTE) { + const char* text = "]]{{i|j= }} + test = [tokens.TemplateOpen(), tokens.Text(text="a"), + tokens.TemplateParamSeparator(), tokens.Text(text="b"), + tokens.TemplateParamSeparator(), tokens.TemplateOpen(), + tokens.Text(text="c"), tokens.TemplateParamSeparator(), + tokens.WikilinkOpen(), tokens.Text(text="d"), + tokens.WikilinkClose(), tokens.ArgumentOpen(), + tokens.Text(text="e"), tokens.ArgumentClose(), + tokens.TemplateClose(), tokens.TemplateClose(), + tokens.WikilinkOpen(), tokens.Text(text="f"), + tokens.WikilinkSeparator(), tokens.ArgumentOpen(), + tokens.Text(text="g"), tokens.ArgumentClose(), + tokens.CommentStart(), tokens.Text(text="h"), + tokens.CommentEnd(), tokens.WikilinkClose(), + tokens.TemplateOpen(), tokens.Text(text="i"), + tokens.TemplateParamSeparator(), tokens.Text(text="j"), + tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), + tokens.TemplateClose()] + valid = wrap( + [Template(wraptext("a"), params=[Parameter(wraptext("1"), wraptext( + "b"), showkey=False), Parameter(wraptext("2"), wrap([Template( + wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( + wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), + showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( + "g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[ + Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", + named=True)]))])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_comment.py b/tests/test_comment.py new file mode 100644 index 0000000..44225a2 --- /dev/null +++ b/tests/test_comment.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Comment + +from ._test_tree_equality import TreeEqualityTestCase + +class TestComment(TreeEqualityTestCase): + """Test cases for the Comment node.""" + + def test_unicode(self): + """test Comment.__unicode__()""" + node = Comment("foobar") + self.assertEqual("", str(node)) + + def test_iternodes(self): + """test Comment.__iternodes__()""" + node = Comment("foobar") + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + + def test_strip(self): + """test Comment.__strip__()""" + node = Comment("foobar") + for a in (True, False): + for b in (True, False): + self.assertIs(None, node.__strip__(a, b)) + + def test_showtree(self): + """test Comment.__showtree__()""" + output = [] + node = Comment("foobar") + node.__showtree__(output.append, None, None) + self.assertEqual([""], output) + + def test_contents(self): + """test getter/setter for the contents attribute""" + node = Comment("foobar") + self.assertEqual("foobar", node.contents) + node.contents = "barfoo" + self.assertEqual("barfoo", node.contents) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py new file mode 100644 index 0000000..2374516 --- /dev/null +++ b/tests/test_ctokenizer.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +try: + from mwparserfromhell.parser._tokenizer import CTokenizer +except ImportError: + CTokenizer = None + +from ._test_tokenizer import TokenizerTestCase + +@unittest.skipUnless(CTokenizer, "C tokenizer not available") +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the C tokenizer.""" + + @classmethod + def setUpClass(cls): + cls.tokenizer = CTokenizer + + if not TokenizerTestCase.skip_others: + def test_uses_c(self): + """make sure the C tokenizer identifies as using a C extension""" + self.assertTrue(CTokenizer.USES_C) + self.assertTrue(CTokenizer().USES_C) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_docs.py b/tests/test_docs.py new file mode 100644 index 0000000..8d95c47 --- /dev/null +++ b/tests/test_docs.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import print_function, unicode_literals +import json +import unittest + +import mwparserfromhell +from mwparserfromhell.compat import py3k, str + +from .compat import StringIO, urlencode, urlopen + +class TestDocs(unittest.TestCase): + """Integration test cases for mwparserfromhell's documentation.""" + + def assertPrint(self, input, output): + """Assertion check that *input*, when printed, produces *output*.""" + buff = StringIO() + print(input, end="", file=buff) + buff.seek(0) + self.assertEqual(output, buff.read()) + + def test_readme_1(self): + """test a block of example code in the README""" + text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" + wikicode = mwparserfromhell.parse(text) + self.assertPrint(wikicode, + "I has a template! {{foo|bar|baz|eggs=spam}} See it?") + templates = wikicode.filter_templates() + if py3k: + self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") + else: + self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") + template = templates[0] + self.assertPrint(template.name, "foo") + if py3k: + self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") + else: + self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") + self.assertPrint(template.get(1).value, "bar") + self.assertPrint(template.get("eggs").value, "spam") + + def test_readme_2(self): + """test a block of example code in the README""" + code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") + if py3k: + self.assertPrint(code.filter_templates(), + "['{{foo|this {{includes a|template}}}}']") + else: + self.assertPrint(code.filter_templates(), + "[u'{{foo|this {{includes a|template}}}}']") + foo = code.filter_templates()[0] + self.assertPrint(foo.get(1).value, "this {{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0], + "{{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value, + "template") + + def test_readme_3(self): + """test a block of example code in the README""" + text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" + temps = mwparserfromhell.parse(text).filter_templates(recursive=True) + if py3k: + res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" + else: + res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" + self.assertPrint(temps, res) + + def test_readme_4(self): + """test a block of example code in the README""" + text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" + code = mwparserfromhell.parse(text) + for template in code.filter_templates(): + if template.name == "cleanup" and not template.has_param("date"): + template.add("date", "July 2012") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" + self.assertPrint(code, res) + code.replace("{{uncategorized}}", "{{bar-stub}}") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(code, res) + if py3k: + res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" + else: + res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" + self.assertPrint(code.filter_templates(), res) + text = str(code) + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(text, res) + self.assertEqual(text, code) + + def test_readme_5(self): + """test a block of example code in the README; includes a web call""" + url1 = "http://en.wikipedia.org/w/api.php" + url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" + title = "Test" + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + try: + raw = urlopen(url1, urlencode(data).encode("utf8")).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + res = json.loads(raw.decode("utf8")) + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + try: + expected = urlopen(url2.format(title)).read().decode("utf8") + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + actual = mwparserfromhell.parse(text) + self.assertEqual(expected, actual) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_heading.py b/tests/test_heading.py new file mode 100644 index 0000000..7a65872 --- /dev/null +++ b/tests/test_heading.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Heading, Text + +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext + +class TestHeading(TreeEqualityTestCase): + """Test cases for the Heading node.""" + + def test_unicode(self): + """test Heading.__unicode__()""" + node = Heading(wraptext("foobar"), 2) + self.assertEqual("==foobar==", str(node)) + node2 = Heading(wraptext(" zzz "), 5) + self.assertEqual("===== zzz =====", str(node2)) + + def test_iternodes(self): + """test Heading.__iternodes__()""" + text1, text2 = Text("foo"), Text("bar") + node = Heading(wrap([text1, text2]), 3) + gen = node.__iternodes__(getnodes) + self.assertEqual((None, node), next(gen)) + self.assertEqual((node.title, text1), next(gen)) + self.assertEqual((node.title, text2), next(gen)) + self.assertRaises(StopIteration, next, gen) + + def test_strip(self): + """test Heading.__strip__()""" + node = Heading(wraptext("foobar"), 3) + for a in (True, False): + for b in (True, False): + self.assertEqual("foobar", node.__strip__(a, b)) + + def test_showtree(self): + """test Heading.__showtree__()""" + output = [] + getter = object() + get = lambda code: output.append((getter, code)) + node1 = Heading(wraptext("foobar"), 3) + node2 = Heading(wraptext(" baz "), 4) + node1.__showtree__(output.append, get, None) + node2.__showtree__(output.append, get, None) + valid = ["===", (getter, node1.title), "===", + "====", (getter, node2.title), "===="] + self.assertEqual(valid, output) + + def test_title(self): + """test getter/setter for the title attribute""" + title = wraptext("foobar") + node = Heading(title, 3) + self.assertIs(title, node.title) + node.title = "héhehé" + self.assertWikicodeEqual(wraptext("héhehé"), node.title) + + def test_level(self): + """test getter/setter for the level attribute""" + node = Heading(wraptext("foobar"), 3) + self.assertEqual(3, node.level) + node.level = 5 + self.assertEqual(5, node.level) + self.assertRaises(ValueError, setattr, node, "level", 0) + self.assertRaises(ValueError, setattr, node, "level", 7) + self.assertRaises(ValueError, setattr, node, "level", "abc") + self.assertRaises(ValueError, setattr, node, "level", False) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py new file mode 100644 index 0000000..d38e5ec --- /dev/null +++ b/tests/test_html_entity.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import HTMLEntity + +from ._test_tree_equality import TreeEqualityTestCase, wrap + +class TestHTMLEntity(TreeEqualityTestCase): + """Test cases for the HTMLEntity node.""" + + def test_unicode(self): + """test HTMLEntity.__unicode__()""" + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("6b", named=False, hexadecimal=True) + node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X") + self.assertEqual(" ", str(node1)) + self.assertEqual("k", str(node2)) + self.assertEqual("k", str(node3)) + self.assertEqual("l", str(node4)) + + def test_iternodes(self): + """test HTMLEntity.__iternodes__()""" + node = HTMLEntity("nbsp", named=True, hexadecimal=False) + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + + def test_strip(self): + """test HTMLEntity.__strip__()""" + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("e9", named=False, hexadecimal=True) + for a in (True, False): + self.assertEqual("\xa0", node1.__strip__(True, a)) + self.assertEqual(" ", node1.__strip__(False, a)) + self.assertEqual("k", node2.__strip__(True, a)) + self.assertEqual("k", node2.__strip__(False, a)) + self.assertEqual("é", node3.__strip__(True, a)) + self.assertEqual("é", node3.__strip__(False, a)) + + def test_showtree(self): + """test HTMLEntity.__showtree__()""" + output = [] + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("e9", named=False, hexadecimal=True) + node1.__showtree__(output.append, None, None) + node2.__showtree__(output.append, None, None) + node3.__showtree__(output.append, None, None) + res = [" ", "k", "é"] + self.assertEqual(res, output) + + def test_value(self): + """test getter/setter for the value attribute""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertEqual("nbsp", node1.value) + self.assertEqual("107", node2.value) + self.assertEqual("e9", node3.value) + + node1.value = "ffa4" + node2.value = 72 + node3.value = "Sigma" + self.assertEqual("ffa4", node1.value) + self.assertFalse(node1.named) + self.assertTrue(node1.hexadecimal) + self.assertEqual("72", node2.value) + self.assertFalse(node2.named) + self.assertFalse(node2.hexadecimal) + self.assertEqual("Sigma", node3.value) + self.assertTrue(node3.named) + self.assertFalse(node3.hexadecimal) + + node1.value = "10FFFF" + node2.value = 110000 + node2.value = 1114111 + self.assertRaises(ValueError, setattr, node3, "value", "") + self.assertRaises(ValueError, setattr, node3, "value", "foobar") + self.assertRaises(ValueError, setattr, node3, "value", True) + self.assertRaises(ValueError, setattr, node3, "value", -1) + self.assertRaises(ValueError, setattr, node1, "value", 110000) + self.assertRaises(ValueError, setattr, node1, "value", "1114112") + + def test_named(self): + """test getter/setter for the named attribute""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertTrue(node1.named) + self.assertFalse(node2.named) + self.assertFalse(node3.named) + node1.named = 1 + node2.named = 0 + node3.named = 0 + self.assertTrue(node1.named) + self.assertFalse(node2.named) + self.assertFalse(node3.named) + self.assertRaises(ValueError, setattr, node1, "named", False) + self.assertRaises(ValueError, setattr, node2, "named", True) + self.assertRaises(ValueError, setattr, node3, "named", True) + + def test_hexadecimal(self): + """test getter/setter for the hexadecimal attribute""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertFalse(node1.hexadecimal) + self.assertFalse(node2.hexadecimal) + self.assertTrue(node3.hexadecimal) + node1.hexadecimal = False + node2.hexadecimal = True + node3.hexadecimal = False + self.assertFalse(node1.hexadecimal) + self.assertTrue(node2.hexadecimal) + self.assertFalse(node3.hexadecimal) + self.assertRaises(ValueError, setattr, node1, "hexadecimal", True) + + def test_hex_char(self): + """test getter/setter for the hex_char attribute""" + node1 = HTMLEntity("e9") + node2 = HTMLEntity("e9", hex_char="X") + self.assertEqual("x", node1.hex_char) + self.assertEqual("X", node2.hex_char) + node1.hex_char = "X" + node2.hex_char = "x" + self.assertEqual("X", node1.hex_char) + self.assertEqual("x", node2.hex_char) + self.assertRaises(ValueError, setattr, node1, "hex_char", 123) + self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar") + self.assertRaises(ValueError, setattr, node1, "hex_char", True) + + def test_normalize(self): + """test getter/setter for the normalize attribute""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + node4 = HTMLEntity("1f648") + self.assertEqual("\xa0", node1.normalize()) + self.assertEqual("k", node2.normalize()) + self.assertEqual("é", node3.normalize()) + self.assertEqual("\U0001F648", node4.normalize()) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 2d5515b..4786e12 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,100 +20,56 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text +from mwparserfromhell.nodes.extras import Parameter -class TestParameter(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.value1 = "bar" - self.value2 = "{{spam}}" - self.value3 = "bar{{spam}}" - self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here" - self.templates2 = [Template("spam")] - self.templates3 = [Template("spam")] - self.templates4 = [Template("eggs", [Parameter("1", "spam"), - Parameter("baz", "buz")]), - Template("goes")] +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext - def test_construct(self): - Parameter(self.name, self.value1) - Parameter(self.name, self.value2, self.templates2) - Parameter(name=self.name, value=self.value3) - Parameter(name=self.name, value=self.value4, templates=self.templates4) +class TestParameter(TreeEqualityTestCase): + """Test cases for the Parameter node extra.""" + + def test_unicode(self): + """test Parameter.__unicode__()""" + node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) + self.assertEqual("foo", str(node)) + node2 = Parameter(wraptext("foo"), wraptext("bar")) + self.assertEqual("foo=bar", str(node2)) def test_name(self): - params = [ - Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(name=self.name, value=self.value3), - Parameter(name=self.name, value=self.value4, - templates=self.templates4) - ] - for param in params: - self.assertEqual(param.name, self.name) + """test getter/setter for the name attribute""" + name1 = wraptext("1") + name2 = wraptext("foobar") + node1 = Parameter(name1, wraptext("foobar"), showkey=False) + node2 = Parameter(name2, wraptext("baz")) + self.assertIs(name1, node1.name) + self.assertIs(name2, node2.name) + node1.name = "héhehé" + node2.name = "héhehé" + self.assertWikicodeEqual(wraptext("héhehé"), node1.name) + self.assertWikicodeEqual(wraptext("héhehé"), node2.name) def test_value(self): - tests = [ - (Parameter(self.name, self.value1), self.value1), - (Parameter(self.name, self.value2, self.templates2), self.value2), - (Parameter(name=self.name, value=self.value3), self.value3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.value4) - ] - for param, correct in tests: - self.assertEqual(param.value, correct) - - def test_templates(self): - tests = [ - (Parameter(self.name, self.value3, self.templates3), - self.templates3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.templates4) - ] - for param, correct in tests: - self.assertEqual(param.templates, correct) - - def test_magic(self): - params = [Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(self.name, self.value3, self.templates3), - Parameter(self.name, self.value4, self.templates4)] - for param in params: - self.assertEqual(repr(param), repr(param.value)) - self.assertEqual(str(param), str(param.value)) - self.assertIs(param < "eggs", param.value < "eggs") - self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}") - self.assertIs(param == "bar", param.value == "bar") - self.assertIs(param != "bar", param.value != "bar") - self.assertIs(param > "eggs", param.value > "eggs") - self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}") - self.assertEquals(bool(param), bool(param.value)) - self.assertEquals(len(param), len(param.value)) - self.assertEquals(list(param), list(param.value)) - self.assertEquals(param[2], param.value[2]) - self.assertEquals(list(reversed(param)), - list(reversed(param.value))) - self.assertIs("bar" in param, "bar" in param.value) - self.assertEquals(param + "test", param.value + "test") - self.assertEquals("test" + param, "test" + param.value) - # add param - # add template left - # add template right - - self.assertEquals(param * 3, Parameter(param.name, param.value * 3, - param.templates * 3)) - self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, - 3 * param.templates)) + """test getter/setter for the value attribute""" + value = wraptext("bar") + node = Parameter(wraptext("foo"), value) + self.assertIs(value, node.value) + node.value = "héhehé" + self.assertWikicodeEqual(wraptext("héhehé"), node.value) - # add param inplace - # add template implace - # add str inplace - # multiply int inplace - self.assertIsInstance(param, Parameter) - self.assertIsInstance(param.value, str) + def test_showkey(self): + """test getter/setter for the showkey attribute""" + node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) + node2 = Parameter(wraptext("foo"), wraptext("bar")) + self.assertFalse(node1.showkey) + self.assertTrue(node2.showkey) + node1.showkey = True + node2.showkey = "" + self.assertTrue(node1.showkey) + self.assertFalse(node2.showkey) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py index 0c989b8..ec5f065 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,44 +20,47 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.parser import Parser -from mwparserfromhell.template import Template +from mwparserfromhell import parser +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter -TESTS = [ - ("", []), - ("abcdef ghijhk", []), - ("abc{this is not a template}def", []), - ("neither is {{this one}nor} {this one {despite}} containing braces", []), - ("this is an acceptable {{template}}", [Template("template")]), - ("{{multiple}}{{templates}}", [Template("multiple"), - Template("templates")]), - ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), - ("{{{no templates here}}}", []), - ("{ {{templates here}}}", [Template("templates here")]), - ("{{{{I do not exist}}}}", []), - ("{{foo|bar|baz|eggs=spam}}", - [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), - Parameter("eggs", "spam")])]), - ("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}", - [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), - Parameter("2", "pqr"), Parameter("st", "uv"), - Parameter("3", "wx"), Parameter("4", "yz")])]), - ("{{this has a|{{template}}|inside of it}}", - [Template("this has a", [Parameter("1", "{{template}}", - [Template("template")]), - Parameter("2", "inside of it")])]), - ("{{{{I exist}} }}", [Template("I exist", [] )]), - ("{{}}") -] +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext +from .compat import range -class TestParser(unittest.TestCase): - def test_parse(self): - parser = Parser() - for unparsed, parsed in TESTS: - self.assertEqual(parser.parse(unparsed), parsed) +class TestParser(TreeEqualityTestCase): + """Tests for the Parser class itself, which tokenizes and builds nodes.""" + + def test_use_c(self): + """make sure the correct tokenizer is used""" + if parser.use_c: + self.assertTrue(parser.Parser(None)._tokenizer.USES_C) + parser.use_c = False + self.assertFalse(parser.Parser(None)._tokenizer.USES_C) + + def test_parsing(self): + """integration test for parsing overall""" + text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" + expected = wrap([ + Text("this is text; "), + Template(wraptext("this"), [ + Parameter(wraptext("is"), wraptext("a")), + Parameter(wraptext("template"), wrap([ + Template(wraptext("with"), [ + Parameter(wraptext("1"), + wrap([Wikilink(wraptext("links"))]), + showkey=False), + Parameter(wraptext("2"), + wraptext("in"), showkey=False) + ]), + Text("it") + ])) + ]) + ]) + actual = parser.Parser(text).parse() + self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py new file mode 100644 index 0000000..0211e7f --- /dev/null +++ b/tests/test_pytokenizer.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.parser.tokenizer import Tokenizer + +from ._test_tokenizer import TokenizerTestCase + +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the Python tokenizer.""" + + @classmethod + def setUpClass(cls): + cls.tokenizer = Tokenizer + + if not TokenizerTestCase.skip_others: + def test_uses_c(self): + """make sure the Python tokenizer identifies as not using C""" + self.assertFalse(Tokenizer.USES_C) + self.assertFalse(Tokenizer().USES_C) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py new file mode 100644 index 0000000..25df555 --- /dev/null +++ b/tests/test_smart_list.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import py3k +from mwparserfromhell.smart_list import SmartList, _ListProxy + +from .compat import range + +class TestSmartList(unittest.TestCase): + """Test cases for the SmartList class and its child, _ListProxy.""" + + def _test_get_set_del_item(self, builder): + """Run tests on __get/set/delitem__ of a list built with *builder*.""" + def assign(L, s1, s2, s3, val): + L[s1:s2:s3] = val + def delete(L, s1): + del L[s1] + + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder(list(range(10))) + + self.assertEqual(1, list1[1]) + self.assertEqual("one", list1[-2]) + self.assertEqual([2, 3], list1[2:4]) + self.assertRaises(IndexError, lambda: list1[6]) + self.assertRaises(IndexError, lambda: list1[-7]) + + self.assertEqual([0, 1, 2], list1[:3]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEqual([3, "one", "two"], list1[3:]) + self.assertEqual(["one", "two"], list1[-2:]) + self.assertEqual([0, 1], list1[:-4]) + self.assertEqual([], list1[6:]) + self.assertEqual([], list1[4:2]) + + self.assertEqual([0, 2, "one"], list1[0:5:2]) + self.assertEqual([0, 2], list1[0:-3:2]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEqual([2, 3, "one", "two"], list1[2::]) + self.assertEqual([0, 1, 2, 3], list1[:4:]) + self.assertEqual([2, 3], list1[2:4:]) + self.assertEqual([0, 2, 4, 6, 8], list2[::2]) + self.assertEqual([2, 5, 8], list2[2::3]) + self.assertEqual([0, 3], list2[:6:3]) + self.assertEqual([2, 5, 8], list2[-8:9:3]) + self.assertEqual([], list2[100000:1000:-100]) + + list1[3] = 100 + self.assertEqual(100, list1[3]) + list1[-3] = 101 + self.assertEqual([0, 1, 2, 101, "one", "two"], list1) + list1[5:] = [6, 7, 8] + self.assertEqual([6, 7, 8], list1[5:]) + self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) + list1[2:4] = [-1, -2, -3, -4, -5] + self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + list1[0:-3] = [99] + self.assertEqual([99, 6, 7, 8], list1) + list2[0:6:2] = [100, 102, 104] + self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + list2[::3] = [200, 203, 206, 209] + self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + list2[::] = range(7) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) + self.assertRaises(ValueError, assign, list2, 0, 5, 2, + [100, 102, 104, 106]) + + del list2[2] + self.assertEqual([0, 1, 3, 4, 5, 6], list2) + del list2[-3] + self.assertEqual([0, 1, 3, 5, 6], list2) + self.assertRaises(IndexError, delete, list2, 100) + self.assertRaises(IndexError, delete, list2, -6) + list2[:] = range(10) + del list2[3:6] + self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) + del list2[-2:] + self.assertEqual([0, 1, 2, 6, 7], list2) + del list2[:2] + self.assertEqual([2, 6, 7], list2) + list2[:] = range(10) + del list2[2:8:2] + self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) + + def _test_add_radd_iadd(self, builder): + """Run tests on __r/i/add__ of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(range(5, 10)) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEqual([0, 1, 2, 3, 4], list1) + self.assertEqual(list(range(10)), list1 + list2) + self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEqual([0, 1, 2, 3, 4], list1) + list1 += ["foo", "bar", "baz"] + self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) + + def _test_other_magic_methods(self, builder): + """Run tests on other magic methods of a list built with *builder*.""" + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder([]) + list3 = builder([0, 2, 3, 4]) + list4 = builder([0, 1, 2]) + + if py3k: + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + else: + self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + + self.assertTrue(list1 < list3) + self.assertTrue(list1 <= list3) + self.assertFalse(list1 == list3) + self.assertTrue(list1 != list3) + self.assertFalse(list1 > list3) + self.assertFalse(list1 >= list3) + + other1 = [0, 2, 3, 4] + self.assertTrue(list1 < other1) + self.assertTrue(list1 <= other1) + self.assertFalse(list1 == other1) + self.assertTrue(list1 != other1) + self.assertFalse(list1 > other1) + self.assertFalse(list1 >= other1) + + other2 = [0, 0, 1, 2] + self.assertFalse(list1 < other2) + self.assertFalse(list1 <= other2) + self.assertFalse(list1 == other2) + self.assertTrue(list1 != other2) + self.assertTrue(list1 > other2) + self.assertTrue(list1 >= other2) + + other3 = [0, 1, 2, 3, "one", "two"] + self.assertFalse(list1 < other3) + self.assertTrue(list1 <= other3) + self.assertTrue(list1 == other3) + self.assertFalse(list1 != other3) + self.assertFalse(list1 > other3) + self.assertTrue(list1 >= other3) + + self.assertTrue(bool(list1)) + self.assertFalse(bool(list2)) + + self.assertEqual(6, len(list1)) + self.assertEqual(0, len(list2)) + + out = [] + for obj in list1: + out.append(obj) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) + + out = [] + for ch in list2: + out.append(ch) + self.assertEqual([], out) + + gen1 = iter(list1) + out = [] + for i in range(len(list1)): + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) + gen2 = iter(list2) + self.assertRaises(StopIteration, next, gen2) + + self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEqual([], list(reversed(list2))) + + self.assertTrue("one" in list1) + self.assertTrue(3 in list1) + self.assertFalse(10 in list1) + self.assertFalse(0 in list2) + + self.assertEqual([], list2 * 5) + self.assertEqual([], 5 * list2) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + list4 *= 2 + self.assertEqual([0, 1, 2, 0, 1, 2], list4) + + def _test_list_methods(self, builder): + """Run tests on the public methods of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(["foo"]) + list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEqual(0, list1.count(6)) + self.assertEqual(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEqual(1, list1.index(1)) + self.assertEqual(6, list1.index(1, 3)) + self.assertEqual(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(13, 8) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEqual(8, list1.pop()) + self.assertEqual(7, list1.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEqual(-1, list1.pop(0)) + self.assertEqual(5, list1.pop(5)) + self.assertEqual(6.5, list1.pop(-1)) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEqual("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEqual([], list2) + + list1.remove(6) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEqual([0, 2, 3, 4, 2, 5], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEqual([5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEqual([0, 2, 2, 3, 4, 5], list1) + list1.sort(reverse=True) + self.assertEqual([5, 4, 3, 2, 2, 0], list1) + if not py3k: + func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 + list1.sort(cmp=func) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=func, reverse=True) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + + def test_docs(self): + """make sure the methods of SmartList/_ListProxy have docstrings""" + methods = ["append", "count", "extend", "index", "insert", "pop", + "remove", "reverse", "sort"] + for meth in methods: + expected = getattr(list, meth).__doc__ + smartlist_doc = getattr(SmartList, meth).__doc__ + listproxy_doc = getattr(_ListProxy, meth).__doc__ + self.assertEqual(expected, smartlist_doc) + self.assertEqual(expected, listproxy_doc) + + def test_doctest(self): + """make sure the test embedded in SmartList's docstring passes""" + parent = SmartList([0, 1, 2, 3]) + self.assertEqual([0, 1, 2, 3], parent) + child = parent[2:] + self.assertEqual([2, 3], child) + child.append(4) + self.assertEqual([2, 3, 4], child) + self.assertEqual([0, 1, 2, 3, 4], parent) + + def test_parent_get_set_del(self): + """make sure SmartList's getitem/setitem/delitem work""" + self._test_get_set_del_item(SmartList) + + def test_parent_add(self): + """make sure SmartList's add/radd/iadd work""" + self._test_add_radd_iadd(SmartList) + + def test_parent_unaffected_magics(self): + """sanity checks against SmartList features that were not modified""" + self._test_other_magic_methods(SmartList) + + def test_parent_methods(self): + """make sure SmartList's non-magic methods work, like append()""" + self._test_list_methods(SmartList) + + def test_child_get_set_del(self): + """make sure _ListProxy's getitem/setitem/delitem work""" + self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) + self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) + self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_get_set_del_item(builder) + + def test_child_add(self): + """make sure _ListProxy's add/radd/iadd work""" + self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) + self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) + self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_add_radd_iadd(builder) + + def test_child_other_magics(self): + """make sure _ListProxy's other magically implemented features work""" + self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) + self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_other_magic_methods(builder) + + def test_child_methods(self): + """make sure _ListProxy's non-magic methods work, like append()""" + self._test_list_methods(lambda L: SmartList(list(L))[:]) + self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_list_methods(builder) + + def test_influence(self): + """make sure changes are propagated from parents to children""" + parent = SmartList([0, 1, 2, 3, 4, 5]) + child1 = parent[2:] + child2 = parent[2:5] + + parent.append(6) + child1.append(7) + child2.append(4.5) + self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEqual([2, 3, 4, 4.5], child2) + + parent.insert(0, -1) + parent.insert(4, 2.5) + parent.insert(10, 6.5) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEqual([2, 2.5, 3, 4, 4.5], child2) + + self.assertEqual(7, parent.pop()) + self.assertEqual(6.5, child1.pop()) + self.assertEqual(4.5, child2.pop()) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) + self.assertEqual([2, 2.5, 3, 4], child2) + + parent.remove(-1) + child1.remove(2.5) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) + + self.assertEqual(0, parent.pop(0)) + self.assertEqual([1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) + + child2.reverse() + self.assertEqual([1, 4, 3, 2, 5, 6], parent) + self.assertEqual([4, 3, 2, 5, 6], child1) + self.assertEqual([4, 3, 2], child2) + + parent.extend([7, 8]) + child1.extend([8.1, 8.2]) + child2.extend([1.9, 1.8]) + self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEqual([4, 3, 2, 1.9, 1.8], child2) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py new file mode 100644 index 0000000..306f2fd --- /dev/null +++ b/tests/test_string_mixin.py @@ -0,0 +1,435 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +from sys import getdefaultencoding +from types import GeneratorType +import unittest + +from mwparserfromhell.compat import bytes, py3k, str +from mwparserfromhell.string_mixin import StringMixIn + +from .compat import range + +class _FakeString(StringMixIn): + def __init__(self, data): + self._data = data + + def __unicode__(self): + return self._data + + +class TestStringMixIn(unittest.TestCase): + """Test cases for the StringMixIn class.""" + + def test_docs(self): + """make sure the various methods of StringMixIn have docstrings""" + methods = [ + "capitalize", "center", "count", "encode", "endswith", + "expandtabs", "find", "format", "index", "isalnum", "isalpha", + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lower", "lstrip", + "partition", "replace", "rfind", "rindex", "rjust", "rpartition", + "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", + "swapcase", "title", "translate", "upper", "zfill"] + if py3k: + methods.extend(["casefold", "format_map", "isidentifier", + "isprintable", "maketrans"]) + else: + methods.append("decode") + for meth in methods: + expected = getattr(str, meth).__doc__ + actual = getattr(StringMixIn, meth).__doc__ + self.assertEqual(expected, actual) + + def test_types(self): + """make sure StringMixIns convert to different types correctly""" + fstr = _FakeString("fake string") + self.assertEqual(str(fstr), "fake string") + self.assertEqual(bytes(fstr), b"fake string") + if py3k: + self.assertEqual(repr(fstr), "'fake string'") + else: + self.assertEqual(repr(fstr), b"u'fake string'") + + self.assertIsInstance(str(fstr), str) + self.assertIsInstance(bytes(fstr), bytes) + if py3k: + self.assertIsInstance(repr(fstr), str) + else: + self.assertIsInstance(repr(fstr), bytes) + + def test_comparisons(self): + """make sure comparison operators work""" + str1 = _FakeString("this is a fake string") + str2 = _FakeString("this is a fake string") + str3 = _FakeString("fake string, this is") + str4 = "this is a fake string" + str5 = "fake string, this is" + + self.assertFalse(str1 > str2) + self.assertTrue(str1 >= str2) + self.assertTrue(str1 == str2) + self.assertFalse(str1 != str2) + self.assertFalse(str1 < str2) + self.assertTrue(str1 <= str2) + + self.assertTrue(str1 > str3) + self.assertTrue(str1 >= str3) + self.assertFalse(str1 == str3) + self.assertTrue(str1 != str3) + self.assertFalse(str1 < str3) + self.assertFalse(str1 <= str3) + + self.assertFalse(str1 > str4) + self.assertTrue(str1 >= str4) + self.assertTrue(str1 == str4) + self.assertFalse(str1 != str4) + self.assertFalse(str1 < str4) + self.assertTrue(str1 <= str4) + + self.assertTrue(str1 > str5) + self.assertTrue(str1 >= str5) + self.assertFalse(str1 == str5) + self.assertTrue(str1 != str5) + self.assertFalse(str1 < str5) + self.assertFalse(str1 <= str5) + + def test_other_magics(self): + """test other magically implemented features, like len() and iter()""" + str1 = _FakeString("fake string") + str2 = _FakeString("") + expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] + + self.assertTrue(str1) + self.assertFalse(str2) + self.assertEqual(11, len(str1)) + self.assertEqual(0, len(str2)) + + out = [] + for ch in str1: + out.append(ch) + self.assertEqual(expected, out) + + out = [] + for ch in str2: + out.append(ch) + self.assertEqual([], out) + + gen1 = iter(str1) + gen2 = iter(str2) + self.assertIsInstance(gen1, GeneratorType) + self.assertIsInstance(gen2, GeneratorType) + + out = [] + for i in range(len(str1)): + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) + self.assertEqual(expected, out) + self.assertRaises(StopIteration, next, gen2) + + self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEqual([], list(reversed(str2))) + + self.assertEqual("f", str1[0]) + self.assertEqual(" ", str1[4]) + self.assertEqual("g", str1[10]) + self.assertEqual("n", str1[-2]) + self.assertRaises(IndexError, lambda: str1[11]) + self.assertRaises(IndexError, lambda: str2[0]) + + self.assertTrue("k" in str1) + self.assertTrue("fake" in str1) + self.assertTrue("str" in str1) + self.assertTrue("" in str1) + self.assertTrue("" in str2) + self.assertFalse("real" in str1) + self.assertFalse("s" in str2) + + def test_other_methods(self): + """test the remaining non-magic methods of StringMixIn""" + str1 = _FakeString("fake string") + self.assertEqual("Fake string", str1.capitalize()) + + self.assertEqual(" fake string ", str1.center(15)) + self.assertEqual(" fake string ", str1.center(16)) + self.assertEqual("qqfake stringqq", str1.center(15, "q")) + + self.assertEqual(1, str1.count("e")) + self.assertEqual(0, str1.count("z")) + self.assertEqual(1, str1.count("r", 7)) + self.assertEqual(0, str1.count("r", 8)) + self.assertEqual(1, str1.count("r", 5, 9)) + self.assertEqual(0, str1.count("r", 5, 7)) + + if not py3k: + str2 = _FakeString("fo") + self.assertEqual(str1, str1.decode()) + actual = _FakeString("\\U00010332\\U0001033f\\U00010344") + self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertRaises(UnicodeError, str2.decode, "punycode") + self.assertEqual("", str2.decode("punycode", "ignore")) + + str3 = _FakeString("𐌲𐌿𐍄") + actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" + self.assertEqual(b"fake string", str1.encode()) + self.assertEqual(actual, str3.encode("utf-8")) + self.assertEqual(actual, str3.encode(encoding="utf-8")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode()) + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="strict")) + self.assertEqual(b"", str3.encode("ascii", "ignore")) + if getdefaultencoding() == "ascii": + self.assertEqual(b"", str3.encode(errors="ignore")) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="ignore")) + + self.assertTrue(str1.endswith("ing")) + self.assertFalse(str1.endswith("ingh")) + + str4 = _FakeString("\tfoobar") + self.assertEqual("fake string", str1) + self.assertEqual(" foobar", str4.expandtabs()) + self.assertEqual(" foobar", str4.expandtabs(4)) + + self.assertEqual(3, str1.find("e")) + self.assertEqual(-1, str1.find("z")) + self.assertEqual(7, str1.find("r", 7)) + self.assertEqual(-1, str1.find("r", 8)) + self.assertEqual(7, str1.find("r", 5, 9)) + self.assertEqual(-1, str1.find("r", 5, 7)) + + str5 = _FakeString("foo{0}baz") + str6 = _FakeString("foo{abc}baz") + str7 = _FakeString("foo{0}{abc}buzz") + str8 = _FakeString("{0}{1}") + self.assertEqual("fake string", str1.format()) + self.assertEqual("foobarbaz", str5.format("bar")) + self.assertEqual("foobarbaz", str6.format(abc="bar")) + self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertRaises(IndexError, str8.format, "abc") + + if py3k: + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertRaises(ValueError, str5.format_map, {0: "abc"}) + + self.assertEqual(3, str1.index("e")) + self.assertRaises(ValueError, str1.index, "z") + self.assertEqual(7, str1.index("r", 7)) + self.assertRaises(ValueError, str1.index, "r", 8) + self.assertEqual(7, str1.index("r", 5, 9)) + self.assertRaises(ValueError, str1.index, "r", 5, 7) + + str9 = _FakeString("foobar") + str10 = _FakeString("foobar123") + str11 = _FakeString("foo bar") + self.assertTrue(str9.isalnum()) + self.assertTrue(str10.isalnum()) + self.assertFalse(str11.isalnum()) + + self.assertTrue(str9.isalpha()) + self.assertFalse(str10.isalpha()) + self.assertFalse(str11.isalpha()) + + str12 = _FakeString("123") + str13 = _FakeString("\u2155") + str14 = _FakeString("\u00B2") + self.assertFalse(str9.isdecimal()) + self.assertTrue(str12.isdecimal()) + self.assertFalse(str13.isdecimal()) + self.assertFalse(str14.isdecimal()) + + self.assertFalse(str9.isdigit()) + self.assertTrue(str12.isdigit()) + self.assertFalse(str13.isdigit()) + self.assertTrue(str14.isdigit()) + + if py3k: + self.assertTrue(str9.isidentifier()) + self.assertTrue(str10.isidentifier()) + self.assertFalse(str11.isidentifier()) + self.assertFalse(str12.isidentifier()) + + str15 = _FakeString("") + str16 = _FakeString("FooBar") + self.assertTrue(str9.islower()) + self.assertFalse(str15.islower()) + self.assertFalse(str16.islower()) + + self.assertFalse(str9.isnumeric()) + self.assertTrue(str12.isnumeric()) + self.assertTrue(str13.isnumeric()) + self.assertTrue(str14.isnumeric()) + + if py3k: + str16B = _FakeString("\x01\x02") + self.assertTrue(str9.isprintable()) + self.assertTrue(str13.isprintable()) + self.assertTrue(str14.isprintable()) + self.assertTrue(str15.isprintable()) + self.assertFalse(str16B.isprintable()) + + str17 = _FakeString(" ") + str18 = _FakeString("\t \t \r\n") + self.assertFalse(str1.isspace()) + self.assertFalse(str9.isspace()) + self.assertTrue(str17.isspace()) + self.assertTrue(str18.isspace()) + + str19 = _FakeString("This Sentence Looks Like A Title") + str20 = _FakeString("This sentence doesn't LookLikeATitle") + self.assertFalse(str15.istitle()) + self.assertTrue(str19.istitle()) + self.assertFalse(str20.istitle()) + + str21 = _FakeString("FOOBAR") + self.assertFalse(str9.isupper()) + self.assertFalse(str15.isupper()) + self.assertTrue(str21.isupper()) + + self.assertEqual("foobar", str15.join(["foo", "bar"])) + self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + + self.assertEqual("fake string ", str1.ljust(15)) + self.assertEqual("fake string ", str1.ljust(16)) + self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) + + str22 = _FakeString("ß") + self.assertEqual("", str15.lower()) + self.assertEqual("foobar", str16.lower()) + self.assertEqual("ß", str22.lower()) + if py3k: + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) + + str23 = _FakeString(" fake string ") + self.assertEqual("fake string", str1.lstrip()) + self.assertEqual("fake string ", str23.lstrip()) + self.assertEqual("ke string", str1.lstrip("abcdef")) + + self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) + self.assertEqual(("fake string", "", ""), str1.partition("asdf")) + + str24 = _FakeString("boo foo moo") + self.assertEqual("real string", str1.replace("fake", "real")) + self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) + + self.assertEqual(3, str1.rfind("e")) + self.assertEqual(-1, str1.rfind("z")) + self.assertEqual(7, str1.rfind("r", 7)) + self.assertEqual(-1, str1.rfind("r", 8)) + self.assertEqual(7, str1.rfind("r", 5, 9)) + self.assertEqual(-1, str1.rfind("r", 5, 7)) + + self.assertEqual(3, str1.rindex("e")) + self.assertRaises(ValueError, str1.rindex, "z") + self.assertEqual(7, str1.rindex("r", 7)) + self.assertRaises(ValueError, str1.rindex, "r", 8) + self.assertEqual(7, str1.rindex("r", 5, 9)) + self.assertRaises(ValueError, str1.rindex, "r", 5, 7) + + self.assertEqual(" fake string", str1.rjust(15)) + self.assertEqual(" fake string", str1.rjust(16)) + self.assertEqual("qqqqfake string", str1.rjust(15, "q")) + + self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) + + str25 = _FakeString(" this is a sentence with whitespace ") + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit()) + self.assertEqual(actual, str25.rsplit(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEqual(actual, str25.rsplit(" ")) + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit(None, 3)) + actual = [" this is a sentence with", "", "whitespace", ""] + self.assertEqual(actual, str25.rsplit(" ", 3)) + if py3k: + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit(maxsplit=3)) + + self.assertEqual("fake string", str1.rstrip()) + self.assertEqual(" fake string", str23.rstrip()) + self.assertEqual("fake stri", str1.rstrip("ngr")) + + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.split()) + self.assertEqual(actual, str25.split(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEqual(actual, str25.split(" ")) + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEqual(actual, str25.split(None, 3)) + actual = ["", "", "", "this is a sentence with whitespace "] + self.assertEqual(actual, str25.split(" ", 3)) + if py3k: + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEqual(actual, str25.split(maxsplit=3)) + + str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") + self.assertEqual(["lines", "of", "text", "are", "presented", "here"], + str26.splitlines()) + self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", + "presented\n", "here"], str26.splitlines(True)) + + self.assertTrue(str1.startswith("fake")) + self.assertFalse(str1.startswith("faker")) + + self.assertEqual("fake string", str1.strip()) + self.assertEqual("fake string", str23.strip()) + self.assertEqual("ke stri", str1.strip("abcdefngr")) + + self.assertEqual("fOObAR", str16.swapcase()) + + self.assertEqual("Fake String", str1.title()) + + if py3k: + table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", + 117: "5"}) + table2 = str.maketrans("aeiou", "12345") + table3 = str.maketrans("aeiou", "12345", "rts") + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) + else: + table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} + self.assertEqual("f1k2 str3ng", str1.translate(table)) + + self.assertEqual("", str15.upper()) + self.assertEqual("FOOBAR", str16.upper()) + + self.assertEqual("123", str12.zfill(3)) + self.assertEqual("000123", str12.zfill(6)) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py index b006033..28592df 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,87 +20,345 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from itertools import permutations +from __future__ import unicode_literals import unittest -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import HTMLEntity, Template, Text +from mwparserfromhell.nodes.extras import Parameter +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext -class TestTemplate(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.bar = Parameter("1", "bar") - self.baz = Parameter("2", "baz") - self.eggs = Parameter("eggs", "spam") - self.params = [self.bar, self.baz, self.eggs] +pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) +pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) - def test_construct(self): - Template(self.name) - Template(self.name, self.params) - Template(name=self.name) - Template(name=self.name, params=self.params) +class TestTemplate(TreeEqualityTestCase): + """Test cases for the Template node.""" + + def test_unicode(self): + """test Template.__unicode__()""" + node = Template(wraptext("foobar")) + self.assertEqual("{{foobar}}", str(node)) + node2 = Template(wraptext("foo"), + [pgenh("1", "bar"), pgens("abc", "def")]) + self.assertEqual("{{foo|bar|abc=def}}", str(node2)) + + def test_iternodes(self): + """test Template.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") + node2n4, node2n5 = Text("def"), Text("ghi") + node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) + node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), + showkey=True) + node1 = Template(wrap([node1n1])) + node2 = Template(wrap([node2n1]), [node2p1, node2p2]) + + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.name, node1n1), next(gen1)) + self.assertEqual((node2.name, node2n1), next(gen2)) + self.assertEqual((node2.params[0].value, node2n2), next(gen2)) + self.assertEqual((node2.params[1].name, node2n3), next(gen2)) + self.assertEqual((node2.params[1].value, node2n4), next(gen2)) + self.assertEqual((node2.params[1].value, node2n5), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + + def test_strip(self): + """test Template.__strip__()""" + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), + [pgenh("1", "bar"), pgens("abc", "def")]) + for a in (True, False): + for b in (True, False): + self.assertEqual(None, node1.__strip__(a, b)) + self.assertEqual(None, node2.__strip__(a, b)) + + def test_showtree(self): + """test Template.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), + [pgenh("1", "bar"), pgens("abc", "def")]) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "{{", (getter, node1.name), "}}", "{{", (getter, node2.name), + " | ", marker, (getter, node2.params[0].name), " = ", marker, + (getter, node2.params[0].value), " | ", marker, + (getter, node2.params[1].name), " = ", marker, + (getter, node2.params[1].value), "}}"] + self.assertEqual(valid, output) def test_name(self): - templates = [ - Template(self.name), - Template(self.name, self.params), - Template(name=self.name), - Template(name=self.name, params=self.params) - ] - for template in templates: - self.assertEqual(template.name, self.name) + """test getter/setter for the name attribute""" + name = wraptext("foobar") + node1 = Template(name) + node2 = Template(name, [pgenh("1", "bar")]) + self.assertIs(name, node1.name) + self.assertIs(name, node2.name) + node1.name = "asdf" + node2.name = "téstïng" + self.assertWikicodeEqual(wraptext("asdf"), node1.name) + self.assertWikicodeEqual(wraptext("téstïng"), node2.name) def test_params(self): - for template in (Template(self.name), Template(name=self.name)): - self.assertEqual(template.params, []) - for template in (Template(self.name, self.params), - Template(name=self.name, params=self.params)): - self.assertEqual(template.params, self.params) - - def test_getitem(self): - template = Template(name=self.name, params=self.params) - self.assertIs(template[0], self.bar) - self.assertIs(template[1], self.baz) - self.assertIs(template[2], self.eggs) - self.assertIs(template["1"], self.bar) - self.assertIs(template["2"], self.baz) - self.assertIs(template["eggs"], self.eggs) - - def test_render(self): - tests = [ - (Template(self.name), "{{foo}}"), - (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") - ] - for template, rendered in tests: - self.assertEqual(template.render(), rendered) - - def test_repr(self): - correct1= 'Template(name=foo, params={})' - correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})' - tests = [(Template(self.name), correct1), - (Template(self.name, self.params), correct2)] - for template, correct in tests: - self.assertEqual(repr(template), correct) - self.assertEqual(str(template), correct) - - def test_cmp(self): - tmp1 = Template(self.name) - tmp2 = Template(name=self.name) - tmp3 = Template(self.name, []) - tmp4 = Template(name=self.name, params=[]) - tmp5 = Template(self.name, self.params) - tmp6 = Template(name=self.name, params=self.params) - - for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA, tmpB in permutations((tmp5, tmp6), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA in (tmp5, tmp6): - for tmpB in (tmp1, tmp2, tmp3, tmp4): - self.assertNotEqual(tmpA, tmpB) - self.assertNotEqual(tmpB, tmpA) + """test getter for the params attribute""" + node1 = Template(wraptext("foobar")) + plist = [pgenh("1", "bar"), pgens("abc", "def")] + node2 = Template(wraptext("foo"), plist) + self.assertEqual([], node1.params) + self.assertIs(plist, node2.params) + + def test_has_param(self): + """test Template.has_param()""" + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), + [pgenh("1", "bar"), pgens("\nabc ", "def")]) + node3 = Template(wraptext("foo"), + [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) + node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) + self.assertFalse(node1.has_param("foobar")) + self.assertTrue(node2.has_param(1)) + self.assertTrue(node2.has_param("abc")) + self.assertFalse(node2.has_param("def")) + self.assertTrue(node3.has_param("1")) + self.assertTrue(node3.has_param(" b ")) + self.assertFalse(node4.has_param("b")) + self.assertTrue(node3.has_param("b", False)) + self.assertTrue(node4.has_param("b", False)) + + def test_get(self): + """test Template.get()""" + node1 = Template(wraptext("foobar")) + node2p1 = pgenh("1", "bar") + node2p2 = pgens("abc", "def") + node2 = Template(wraptext("foo"), [node2p1, node2p2]) + node3p1 = pgens("b", "c") + node3p2 = pgens("1", "d") + node3 = Template(wraptext("foo"), [pgenh("1", "a"), node3p1, node3p2]) + node4p1 = pgens(" b", " ") + node4 = Template(wraptext("foo"), [pgenh("1", "a"), node4p1]) + self.assertRaises(ValueError, node1.get, "foobar") + self.assertIs(node2p1, node2.get(1)) + self.assertIs(node2p2, node2.get("abc")) + self.assertRaises(ValueError, node2.get, "def") + self.assertIs(node3p1, node3.get("b")) + self.assertIs(node3p2, node3.get("1")) + self.assertIs(node4p1, node4.get("b ")) + + def test_add(self): + """test Template.add()""" + node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node5 = Template(wraptext("a"), [pgens("b", "c"), + pgens(" d ", "e")]) + node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), + pgens("b", "e")]) + node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node8p = pgenh("1", "d") + node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) + node9 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) + node11 = Template(wraptext("a"), [pgens("b", "c")]) + node12 = Template(wraptext("a"), [pgens("b", "c")]) + node13 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node14 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node15 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node16 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node17 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node18 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node19 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node20 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node21 = Template(wraptext("a"), [pgenh("1", "b")]) + node22 = Template(wraptext("a"), [pgenh("1", "b")]) + node23 = Template(wraptext("a"), [pgenh("1", "b")]) + node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgenh("3", "d"), pgenh("4", "e")]) + node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgens("4", "d"), pgens("5", "e")]) + node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgens("4", "d"), pgens("5", "e")]) + node27 = Template(wraptext("a"), [pgenh("1", "b")]) + node28 = Template(wraptext("a"), [pgenh("1", "b")]) + node29 = Template(wraptext("a"), [pgens("b", "c")]) + node30 = Template(wraptext("a"), [pgenh("1", "b")]) + node31 = Template(wraptext("a"), [pgenh("1", "b")]) + node32 = Template(wraptext("a"), [pgens("1", "b")]) + node33 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node34 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node35 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node36 = Template(wraptext("a"), [ + pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) + node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("b", "f"), pgens("b", "h"), + pgens("i", "j")]) + node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("b", "f"), pgens("b", "h"), + pgens("i", "j")]) + node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), + pgens("1", "c"), pgens("2", "d")]) + node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), + pgenh("1", "c"), pgenh("2", "d")]) + node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("f", "g")]) + + node1.add("e", "f", showkey=True) + node2.add(2, "g", showkey=False) + node3.add("e", "foo|bar", showkey=True) + node4.add("e", "f", showkey=True, before="b") + node5.add("f", "g", showkey=True, before=" d ") + node6.add("f", "g", showkey=True, before="b") + self.assertRaises(ValueError, node7.add, "e", "f", showkey=True, + before="q") + node8.add("e", "f", showkey=True, before=node8p) + node9.add("e", "f", showkey=True, before=pgenh("1", "d")) + self.assertRaises(ValueError, node10.add, "e", "f", showkey=True, + before=pgenh("1", "d")) + node11.add("d", "foo=bar", showkey=True) + node12.add("1", "foo=bar", showkey=False) + node13.add("h", "i", showkey=True) + node14.add("j", "k", showkey=True) + node15.add("h", "i", showkey=True) + node16.add("h", "i", showkey=True, preserve_spacing=False) + node17.add("h", "i", showkey=False) + node18.add("j", "k", showkey=False) + node19.add("h", "i", showkey=False) + node20.add("h", "i", showkey=False, preserve_spacing=False) + node21.add("2", "c") + node22.add("3", "c") + node23.add("c", "d") + node24.add("5", "f") + node25.add("3", "f") + node26.add("6", "f") + node27.add("c", "foo=bar") + node28.add("2", "foo=bar") + node29.add("b", "d") + node30.add("1", "foo=bar") + node31.add("1", "foo=bar", showkey=True) + node32.add("1", "foo=bar", showkey=False) + node33.add("d", "foo") + node34.add("f", "foo") + node35.add("f", "foo") + node36.add("d", "foo", preserve_spacing=False) + node37.add("b", "k") + node38.add("1", "e") + node39.add("1", "e") + node40.add("d", "h", before="b") + + self.assertEqual("{{a|b=c|d|e=f}}", node1) + self.assertEqual("{{a|b=c|d|g}}", node2) + self.assertEqual("{{a|b=c|d|e=foo|bar}}", node3) + self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity) + self.assertEqual("{{a|e=f|b=c|d}}", node4) + self.assertEqual("{{a|b=c|f=g| d =e}}", node5) + self.assertEqual("{{a|b=c|b=d|f=g|b=e}}", node6) + self.assertEqual("{{a|b=c|d}}", node7) + self.assertEqual("{{a|b=c|e=f|d}}", node8) + self.assertEqual("{{a|b=c|e=f|d}}", node9) + self.assertEqual("{{a|b=c|e}}", node10) + self.assertEqual("{{a|b=c|d=foo=bar}}", node11) + self.assertEqual("{{a|b=c|foo=bar}}", node12) + self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) + self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) + self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) + self.assertEqual("{{a|b|c}}", node21) + self.assertEqual("{{a|b|3=c}}", node22) + self.assertEqual("{{a|b|c=d}}", node23) + self.assertEqual("{{a|b|c|d|e|f}}", node24) + self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25) + self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26) + self.assertEqual("{{a|b|c=foo=bar}}", node27) + self.assertEqual("{{a|b|foo=bar}}", node28) + self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) + self.assertEqual("{{a|b=d}}", node29) + self.assertEqual("{{a|foo=bar}}", node30) + self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) + self.assertEqual("{{a|1=foo=bar}}", node31) + self.assertEqual("{{a|foo=bar}}", node32) + self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) + self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33) + self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =foo }}", node35) + self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36) + self.assertEqual("{{a|b=k|d=e|i=j}}", node37) + self.assertEqual("{{a|1=e|x=y|2=d}}", node38) + self.assertEqual("{{a|x=y|e|d}}", node39) + self.assertEqual("{{a|b=c|d=h|f=g}}", node40) + + def test_remove(self): + """test Template.remove()""" + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgens("abc", "def")]) + node3 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgens("abc", "def")]) + node4 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgenh("2", "baz")]) + node5 = Template(wraptext("foo"), [ + pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) + node6 = Template(wraptext("foo"), [ + pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) + node7 = Template(wraptext("foo"), [ + pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) + node8 = Template(wraptext("foo"), [ + pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) + node9 = Template(wraptext("foo"), [ + pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) + node10 = Template(wraptext("foo"), [ + pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) + + node2.remove("1") + node2.remove("abc") + node3.remove(1, keep_field=True) + node3.remove("abc", keep_field=True) + node4.remove("1", keep_field=False) + node5.remove("a", keep_field=False) + node6.remove("a", keep_field=True) + node7.remove(1, keep_field=True) + node8.remove(1, keep_field=False) + node9.remove(1, keep_field=True) + node10.remove(1, keep_field=False) + + self.assertRaises(ValueError, node1.remove, 1) + self.assertRaises(ValueError, node1.remove, "a") + self.assertRaises(ValueError, node2.remove, "1") + self.assertEqual("{{foo}}", node2) + self.assertEqual("{{foo||abc=}}", node3) + self.assertEqual("{{foo||baz}}", node4) + self.assertEqual("{{foo|b=c}}", node5) + self.assertEqual("{{foo| a=|b=c}}", node6) + self.assertEqual("{{foo|1 =|2=c}}", node7) + self.assertEqual("{{foo|2=c}}", node8) + self.assertEqual("{{foo||c}}", node9) + self.assertEqual("{{foo||c}}", node10) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 0000000..35ac340 --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text + +class TestText(unittest.TestCase): + """Test cases for the Text node.""" + + def test_unicode(self): + """test Text.__unicode__()""" + node = Text("foobar") + self.assertEqual("foobar", str(node)) + node2 = Text("fóóbar") + self.assertEqual("fóóbar", str(node2)) + + def test_iternodes(self): + """test Text.__iternodes__()""" + node = Text("foobar") + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + + def test_strip(self): + """test Text.__strip__()""" + node = Text("foobar") + for a in (True, False): + for b in (True, False): + self.assertIs(node, node.__strip__(a, b)) + + def test_showtree(self): + """test Text.__showtree__()""" + output = [] + node1 = Text("foobar") + node2 = Text("fóóbar") + node3 = Text("𐌲𐌿𐍄") + node1.__showtree__(output.append, None, None) + node2.__showtree__(output.append, None, None) + node3.__showtree__(output.append, None, None) + res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] + self.assertEqual(res, output) + + def test_value(self): + """test getter/setter for the value attribute""" + node = Text("foobar") + self.assertEqual("foobar", node.value) + self.assertIsInstance(node.value, str) + node.value = "héhéhé" + self.assertEqual("héhéhé", node.value) + self.assertIsInstance(node.value, str) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py new file mode 100644 index 0000000..4620982 --- /dev/null +++ b/tests/test_tokens.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import py3k +from mwparserfromhell.parser import tokens + +class TestTokens(unittest.TestCase): + """Test cases for the Token class and its subclasses.""" + + def test_issubclass(self): + """check that all classes within the tokens module are really Tokens""" + for name in tokens.__all__: + klass = getattr(tokens, name) + self.assertTrue(issubclass(klass, tokens.Token)) + self.assertIsInstance(klass(), klass) + self.assertIsInstance(klass(), tokens.Token) + + def test_attributes(self): + """check that Token attributes can be managed properly""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + + self.assertEqual("bar", token2.foo) + self.assertEqual(123, token2.baz) + self.assertRaises(KeyError, lambda: token1.foo) + self.assertRaises(KeyError, lambda: token2.bar) + + token1.spam = "eggs" + token2.foo = "ham" + del token2.baz + + self.assertEqual("eggs", token1.spam) + self.assertEqual("ham", token2.foo) + self.assertRaises(KeyError, lambda: token2.baz) + self.assertRaises(KeyError, delattr, token2, "baz") + + def test_repr(self): + """check that repr() on a Token works as expected""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + token3 = tokens.Text(text="earwig" * 100) + hundredchars = ("earwig" * 100)[:97] + "..." + + self.assertEqual("Token()", repr(token1)) + if py3k: + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" + token3repr = "Text(text='" + hundredchars + "')" + else: + token2repr1 = "Token(foo=u'bar', baz=123)" + token2repr2 = "Token(baz=123, foo=u'bar')" + token3repr = "Text(text=u'" + hundredchars + "')" + token2repr = repr(token2) + self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) + self.assertEqual(token3repr, repr(token3)) + + def test_equality(self): + """check that equivalent tokens are considered equal""" + token1 = tokens.Token() + token2 = tokens.Token() + token3 = tokens.Token(foo="bar", baz=123) + token4 = tokens.Text(text="asdf") + token5 = tokens.Text(text="asdf") + token6 = tokens.TemplateOpen(text="asdf") + + self.assertEqual(token1, token2) + self.assertEqual(token2, token1) + self.assertEqual(token4, token5) + self.assertEqual(token5, token4) + self.assertNotEqual(token1, token3) + self.assertNotEqual(token2, token3) + self.assertNotEqual(token4, token6) + self.assertNotEqual(token5, token6) + + def test_repr_equality(self): + "check that eval(repr(token)) == token" + tests = [ + tokens.Token(), + tokens.Token(foo="bar", baz=123), + tokens.Text(text="earwig") + ] + for token in tests: + self.assertEqual(token, eval(repr(token), vars(tokens))) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..80a0e5e --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.utils import parse_anything + +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext + +class TestUtils(TreeEqualityTestCase): + """Tests for the utils module, which provides parse_anything().""" + + def test_parse_anything_valid(self): + """tests for valid input to utils.parse_anything()""" + tests = [ + (wraptext("foobar"), wraptext("foobar")), + (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), + ("fóóbar", wraptext("fóóbar")), + (b"foob\xc3\xa1r", wraptext("foobár")), + (123, wraptext("123")), + (True, wraptext("True")), + (None, wrap([])), + ([Text("foo"), Text("bar"), Text("baz")], + wraptext("foo", "bar", "baz")), + ([wraptext("foo"), Text("bar"), "baz", 123, 456], + wraptext("foo", "bar", "baz", "123", "456")), + ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, parse_anything(test)) + + def test_parse_anything_invalid(self): + """tests for invalid input to utils.parse_anything()""" + self.assertRaises(ValueError, parse_anything, Ellipsis) + self.assertRaises(ValueError, parse_anything, object) + self.assertRaises(ValueError, parse_anything, object()) + self.assertRaises(ValueError, parse_anything, type) + self.assertRaises(ValueError, parse_anything, ["foo", [object]]) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py new file mode 100644 index 0000000..8dfa655 --- /dev/null +++ b/tests/test_wikicode.py @@ -0,0 +1,364 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import re +from types import GeneratorType +import unittest + +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Node, Tag, Template, Text, Wikilink) +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode +from mwparserfromhell import parse +from mwparserfromhell.compat import py3k, str + +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext + +class TestWikicode(TreeEqualityTestCase): + """Tests for the Wikicode class, which manages a list of nodes.""" + + def test_unicode(self): + """test Wikicode.__unicode__()""" + code1 = parse("foobar") + code2 = parse("Have a {{template}} and a [[page|link]]") + self.assertEqual("foobar", str(code1)) + self.assertEqual("Have a {{template}} and a [[page|link]]", str(code2)) + + def test_nodes(self): + """test getter/setter for the nodes attribute""" + code = parse("Have a {{template}}") + self.assertEqual(["Have a ", "{{template}}"], code.nodes) + L1 = SmartList([Text("foobar"), Template(wraptext("abc"))]) + L2 = [Text("barfoo"), Template(wraptext("cba"))] + L3 = "abc{{def}}" + code.nodes = L1 + self.assertIs(L1, code.nodes) + code.nodes = L2 + self.assertIs(L2, code.nodes) + code.nodes = L3 + self.assertEqual(["abc", "{{def}}"], code.nodes) + self.assertRaises(ValueError, setattr, code, "nodes", object) + + def test_get(self): + """test Wikicode.get()""" + code = parse("Have a {{template}} and a [[page|link]]") + self.assertIs(code.nodes[0], code.get(0)) + self.assertIs(code.nodes[2], code.get(2)) + self.assertRaises(IndexError, code.get, 4) + + def test_set(self): + """test Wikicode.set()""" + code = parse("Have a {{template}} and a [[page|link]]") + code.set(1, "{{{argument}}}") + self.assertEqual("Have a {{{argument}}} and a [[page|link]]", code) + self.assertIsInstance(code.get(1), Argument) + code.set(2, None) + self.assertEqual("Have a {{{argument}}}[[page|link]]", code) + code.set(-3, "This is an ") + self.assertEqual("This is an {{{argument}}}[[page|link]]", code) + self.assertRaises(ValueError, code.set, 1, "foo {{bar}}") + self.assertRaises(IndexError, code.set, 3, "{{baz}}") + self.assertRaises(IndexError, code.set, -4, "{{baz}}") + + def test_index(self): + """test Wikicode.index()""" + code = parse("Have a {{template}} and a [[page|link]]") + self.assertEqual(0, code.index("Have a ")) + self.assertEqual(3, code.index("[[page|link]]")) + self.assertEqual(1, code.index(code.get(1))) + self.assertRaises(ValueError, code.index, "foo") + + code = parse("{{foo}}{{bar|{{baz}}}}") + self.assertEqual(1, code.index("{{bar|{{baz}}}}")) + self.assertEqual(1, code.index("{{baz}}", recursive=True)) + self.assertEqual(1, code.index(code.get(1).get(1).value, + recursive=True)) + self.assertRaises(ValueError, code.index, "{{baz}}", recursive=False) + self.assertRaises(ValueError, code.index, + code.get(1).get(1).value, recursive=False) + + def test_insert(self): + """test Wikicode.insert()""" + code = parse("Have a {{template}} and a [[page|link]]") + code.insert(1, "{{{argument}}}") + self.assertEqual( + "Have a {{{argument}}}{{template}} and a [[page|link]]", code) + self.assertIsInstance(code.get(1), Argument) + code.insert(2, None) + self.assertEqual( + "Have a {{{argument}}}{{template}} and a [[page|link]]", code) + code.insert(-3, Text("foo")) + self.assertEqual( + "Have a {{{argument}}}foo{{template}} and a [[page|link]]", code) + + code2 = parse("{{foo}}{{bar}}{{baz}}") + code2.insert(1, "abc{{def}}ghi[[jk]]") + self.assertEqual("{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}", code2) + self.assertEqual(["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", + "{{bar}}", "{{baz}}"], code2.nodes) + + code3 = parse("{{foo}}bar") + code3.insert(1000, "[[baz]]") + code3.insert(-1000, "derp") + self.assertEqual("derp{{foo}}bar[[baz]]", code3) + + def test_insert_before(self): + """test Wikicode.insert_before()""" + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.insert_before("{{b}}", "x", recursive=True) + code.insert_before("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}x{{b}}{{c}}[[y]]{{d}}", code) + code.insert_before(code.get(2), "z") + self.assertEqual("{{a}}xz{{b}}{{c}}[[y]]{{d}}", code) + self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.insert_before(code2.get(0).params[0].value.get(0), "x", + recursive=True) + code2.insert_before("{{f}}", "y", recursive=True) + self.assertEqual("{{a|x{{b}}|{{c|d=y{{f}}}}}}", code2) + self.assertRaises(ValueError, code2.insert_before, "{{f}}", "y", + recursive=False) + + def test_insert_after(self): + """test Wikicode.insert_after()""" + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.insert_after("{{b}}", "x", recursive=True) + code.insert_after("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}{{b}}x{{c}}{{d}}[[y]]", code) + code.insert_after(code.get(2), "z") + self.assertEqual("{{a}}{{b}}xz{{c}}{{d}}[[y]]", code) + self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.insert_after(code2.get(0).params[0].value.get(0), "x", + recursive=True) + code2.insert_after("{{f}}", "y", recursive=True) + self.assertEqual("{{a|{{b}}x|{{c|d={{f}}y}}}}", code2) + self.assertRaises(ValueError, code2.insert_after, "{{f}}", "y", + recursive=False) + + def test_replace(self): + """test Wikicode.replace()""" + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.replace("{{b}}", "x", recursive=True) + code.replace("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}x{{c}}[[y]]", code) + code.replace(code.get(1), "z") + self.assertEqual("{{a}}z{{c}}[[y]]", code) + self.assertRaises(ValueError, code.replace, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.replace, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.replace(code2.get(0).params[0].value.get(0), "x", recursive=True) + code2.replace("{{f}}", "y", recursive=True) + self.assertEqual("{{a|x|{{c|d=y}}}}", code2) + self.assertRaises(ValueError, code2.replace, "y", "z", recursive=False) + + def test_append(self): + """test Wikicode.append()""" + code = parse("Have a {{template}}") + code.append("{{{argument}}}") + self.assertEqual("Have a {{template}}{{{argument}}}", code) + self.assertIsInstance(code.get(2), Argument) + code.append(None) + self.assertEqual("Have a {{template}}{{{argument}}}", code) + code.append(Text(" foo")) + self.assertEqual("Have a {{template}}{{{argument}}} foo", code) + self.assertRaises(ValueError, code.append, slice(0, 1)) + + def test_remove(self): + """test Wikicode.remove()""" + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.remove("{{b}}", recursive=True) + code.remove(code.get(1), recursive=True) + self.assertEqual("{{a}}{{d}}", code) + self.assertRaises(ValueError, code.remove, "{{r}}", recursive=True) + self.assertRaises(ValueError, code.remove, "{{r}}", recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") + code2.remove(code2.get(0).params[0].value.get(0), recursive=True) + code2.remove("{{f}}", recursive=True) + self.assertEqual("{{a||{{c|d={{h}}}}}}", code2) + self.assertRaises(ValueError, code2.remove, "{{h}}", recursive=False) + + def test_filter_family(self): + """test the Wikicode.i?filter() family of functions""" + def genlist(gen): + self.assertIsInstance(gen, GeneratorType) + return list(gen) + ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) + + code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") + for func in (code.filter, ifilter(code)): + self.assertEqual(["a", "{{b}}", "c", "[[d]]", "{{{e}}}", "{{f}}", + "[[g]]"], func()) + self.assertEqual(["{{{e}}}"], func(forcetype=Argument)) + self.assertIs(code.get(4), func(forcetype=Argument)[0]) + self.assertEqual(["a", "c"], func(forcetype=Text)) + self.assertEqual([], func(forcetype=Heading)) + self.assertRaises(TypeError, func, forcetype=True) + + funcs = [ + lambda name, **kw: getattr(code, "filter_" + name)(**kw), + lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) + ] + for get_filter in funcs: + self.assertEqual(["{{{e}}}"], get_filter("arguments")) + self.assertIs(code.get(4), get_filter("arguments")[0]) + self.assertEqual([], get_filter("comments")) + self.assertEqual([], get_filter("headings")) + self.assertEqual([], get_filter("html_entities")) + self.assertEqual([], get_filter("tags")) + self.assertEqual(["{{b}}", "{{f}}"], get_filter("templates")) + self.assertEqual(["a", "c"], get_filter("text")) + self.assertEqual(["[[d]]", "[[g]]"], get_filter("wikilinks")) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") + for func in (code2.filter, ifilter(code2)): + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], + func(recursive=False, forcetype=Template)) + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", + "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], + func(recursive=True, forcetype=Template)) + + code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") + for func in (code3.filter, ifilter(code3)): + self.assertEqual(["{{foobar}}", "{{FOO}}"], func(matches=r"foo")) + self.assertEqual(["{{foobar}}", "{{FOO}}"], + func(matches=r"^{{foo.*?}}")) + self.assertEqual(["{{foobar}}"], + func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) + self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) + self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) + + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], + code2.filter_templates(recursive=False)) + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", + "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], + code2.filter_templates(recursive=True)) + self.assertEqual(["{{baz}}", "{{bz}}"], + code3.filter_templates(matches=r"^{{b.*?z")) + self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) + self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0)) + + self.assertRaises(TypeError, code.filter_templates, 100) + self.assertRaises(TypeError, code.filter_templates, a=42) + self.assertRaises(TypeError, code.filter_templates, forcetype=Template) + + def test_get_sections(self): + """test Wikicode.get_sections()""" + page1 = parse("") + page2 = parse("==Heading==") + page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n") + + p4_lead = "This is a lead.\n" + p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n" + p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n•Some content.\n\n" + p4_IB = "=== Section I.B ===\n" + p4_IB1 + p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB + p4_II = "== Section II ==\nSection II body.\n\n" + p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n" + p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!" + p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1 + p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2 + p4_III = "== Section III ==\n" + p4_IIIA + page4 = parse(p4_lead + p4_I + p4_II + p4_III) + + self.assertEqual([], page1.get_sections()) + self.assertEqual(["", "==Heading=="], page2.get_sections()) + self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", + "====Gnidaeh====\n"], page3.get_sections()) + self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, + p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + page4.get_sections()) + + self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) + self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], + page3.get_sections(levels=(2, 3))) + self.assertEqual([], page3.get_sections(levels=[0])) + self.assertEqual(["", "====Gnidaeh====\n"], + page3.get_sections(levels=[4], include_lead=True)) + self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", + "====Gnidaeh====\n"], + page3.get_sections(include_lead=False)) + + self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) + self.assertEqual([""], page2.get_sections(include_headings=False)) + self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", + "\nEven more text.\n" + p4_IIIA2ai1], + page4.get_sections(levels=[4], + include_headings=False)) + + self.assertEqual([], page4.get_sections(matches=r"body")) + self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], + page4.get_sections(matches=r"Section\sI[.\s].*?")) + self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + page4.get_sections(matches=r".*?a.*?")) + self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], + page4.get_sections(matches=r".*?a.*?", flags=re.U)) + self.assertEqual(["\nMore text.\n", "\nAn invalid section!"], + page4.get_sections(matches=r".*?a.*?", flags=re.U, + include_headings=False)) + + page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") + section = page5.get_sections(matches="Foo")[0] + section.replace("\nBar\n", "\nBarf ") + section.append("{{Haha}}\n") + self.assertEqual("== Foo ==\nBarf {{Haha}}\n", section) + self.assertEqual("X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz", page5) + + def test_strip_code(self): + """test Wikicode.strip_code()""" + # Since individual nodes have test cases for their __strip__ methods, + # we're only going to do an integration test: + code = parse("Foo [[bar]]\n\n{{baz}}\n\n[[a|b]] Σ") + self.assertEqual("Foo bar\n\nb Σ", + code.strip_code(normalize=True, collapse=True)) + self.assertEqual("Foo bar\n\n\n\nb Σ", + code.strip_code(normalize=True, collapse=False)) + self.assertEqual("Foo bar\n\nb Σ", + code.strip_code(normalize=False, collapse=True)) + self.assertEqual("Foo bar\n\n\n\nb Σ", + code.strip_code(normalize=False, collapse=False)) + + def test_get_tree(self): + """test Wikicode.get_tree()""" + # Since individual nodes have test cases for their __showtree___ + # methods, and the docstring covers all possibilities for the output of + # __showtree__, we'll test it only: + code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") + expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ + "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" + self.assertEqual(expected.expandtabs(4), code.get_tree()) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py new file mode 100644 index 0000000..7851032 --- /dev/null +++ b/tests/test_wikilink.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text, Wikilink + +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext + +class TestWikilink(TreeEqualityTestCase): + """Test cases for the Wikilink node.""" + + def test_unicode(self): + """test Wikilink.__unicode__()""" + node = Wikilink(wraptext("foobar")) + self.assertEqual("[[foobar]]", str(node)) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) + self.assertEqual("[[foo|bar]]", str(node2)) + + def test_iternodes(self): + """test Wikilink.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") + node1 = Wikilink(wrap([node1n1])) + node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.title, node1n1), next(gen1)) + self.assertEqual((node2.title, node2n1), next(gen2)) + self.assertEqual((node2.text, node2n2), next(gen2)) + self.assertEqual((node2.text, node2n3), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + + def test_strip(self): + """test Wikilink.__strip__()""" + node = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) + for a in (True, False): + for b in (True, False): + self.assertEqual("foobar", node.__strip__(a, b)) + self.assertEqual("bar", node2.__strip__(a, b)) + + def test_showtree(self): + """test Wikilink.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "[[", (getter, node1.title), "]]", "[[", (getter, node2.title), + " | ", marker, (getter, node2.text), "]]"] + self.assertEqual(valid, output) + + def test_title(self): + """test getter/setter for the title attribute""" + title = wraptext("foobar") + node1 = Wikilink(title) + node2 = Wikilink(title, wraptext("baz")) + self.assertIs(title, node1.title) + self.assertIs(title, node2.title) + node1.title = "héhehé" + node2.title = "héhehé" + self.assertWikicodeEqual(wraptext("héhehé"), node1.title) + self.assertWikicodeEqual(wraptext("héhehé"), node2.title) + + def test_text(self): + """test getter/setter for the text attribute""" + text = wraptext("baz") + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foobar"), text) + self.assertIs(None, node1.text) + self.assertIs(text, node2.text) + node1.text = "buzz" + node2.text = None + self.assertWikicodeEqual(wraptext("buzz"), node1.text) + self.assertIs(None, node2.text) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/tokenizer/arguments.mwtest b/tests/tokenizer/arguments.mwtest new file mode 100644 index 0000000..3270a96 --- /dev/null +++ b/tests/tokenizer/arguments.mwtest @@ -0,0 +1,130 @@ +name: blank +label: argument with no content +input: "{{{}}}" +output: [ArgumentOpen(), ArgumentClose()] + +--- + +name: blank_with_default +label: argument with no content but a pipe +input: "{{{|}}}" +output: [ArgumentOpen(), ArgumentSeparator(), ArgumentClose()] + +--- + +name: basic +label: simplest type of argument +input: "{{{argument}}}" +output: [ArgumentOpen(), Text(text="argument"), ArgumentClose()] + +--- + +name: default +label: argument with a default value +input: "{{{foo|bar}}}" +output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar"), ArgumentClose()] + +--- + +name: blank_with_multiple_defaults +label: no content, multiple pipes +input: "{{{|||}}}" +output: [ArgumentOpen(), ArgumentSeparator(), Text(text="||"), ArgumentClose()] + +--- + +name: multiple_defaults +label: multiple values separated by pipes +input: "{{{foo|bar|baz}}}" +output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar|baz"), ArgumentClose()] + +--- + +name: newline +label: newline as only content +input: "{{{\n}}}" +output: [ArgumentOpen(), Text(text="\n"), ArgumentClose()] + +--- + +name: right_braces +label: multiple } scattered throughout text +input: "{{{foo}b}a}r}}}" +output: [ArgumentOpen(), Text(text="foo}b}a}r"), ArgumentClose()] + +--- + +name: right_braces_default +label: multiple } scattered throughout text, with a default value +input: "{{{foo}b}|}a}r}}}" +output: [ArgumentOpen(), Text(text="foo}b}"), ArgumentSeparator(), Text(text="}a}r"), ArgumentClose()] + +--- + +name: nested +label: an argument nested within another argument +input: "{{{{{{foo}}}|{{{bar}}}}}}" +output: [ArgumentOpen(), ArgumentOpen(), Text(text="foo"), ArgumentClose(), ArgumentSeparator(), ArgumentOpen(), Text(text="bar"), ArgumentClose(), ArgumentClose()] + +--- + +name: invalid_braces +label: invalid argument: multiple braces that are not part of a template or argument +input: "{{{foo{{[a}}}}}" +output: [Text(text="{{{foo{{[a}}}}}")] + +--- + +name: incomplete_open_only +label: incomplete arguments: just an open +input: "{{{" +output: [Text(text="{{{")] + +--- + +name: incomplete_open_text +label: incomplete arguments: an open with some text +input: "{{{foo" +output: [Text(text="{{{foo")] + +--- + +name: incomplete_open_text_pipe +label: incomplete arguments: an open, text, then a pipe +input: "{{{foo|" +output: [Text(text="{{{foo|")] + +--- + +name: incomplete_open_pipe +label: incomplete arguments: an open, then a pipe +input: "{{{|" +output: [Text(text="{{{|")] + +--- + +name: incomplete_open_pipe_text +label: incomplete arguments: an open, then a pipe, then text +input: "{{{|foo" +output: [Text(text="{{{|foo")] + +--- + +name: incomplete_open_pipes_text +label: incomplete arguments: a pipe, then text then two pipes +input: "{{{|f||" +output: [Text(text="{{{|f||")] + +--- + +name: incomplete_open_partial_close +label: incomplete arguments: an open, then one right brace +input: "{{{{}" +output: [Text(text="{{{{}")] + +--- + +name: incomplete_preserve_previous +label: incomplete arguments: a valid argument followed by an invalid one +input: "{{{foo}}} {{{bar" +output: [ArgumentOpen(), Text(text="foo"), ArgumentClose(), Text(text=" {{{bar")] diff --git a/tests/tokenizer/comments.mwtest b/tests/tokenizer/comments.mwtest new file mode 100644 index 0000000..ea2e89f --- /dev/null +++ b/tests/tokenizer/comments.mwtest @@ -0,0 +1,39 @@ +name: blank +label: a blank comment +input: "" +output: [CommentStart(), CommentEnd()] + +--- + +name: basic +label: a basic comment +input: "" +output: [CommentStart(), Text(text=" comment "), CommentEnd()] + +--- + +name: tons_of_nonsense +label: a comment with tons of ignorable garbage in it +input: "" +output: [CommentStart(), Text(text=" foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aabsp;" +output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(text="bsp;")] + +--- + +name: wildcard +label: a wildcard assortment of various things +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: wildcard_redux +label: an even wilder assortment of various things +input: "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }}" +output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()] diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest new file mode 100644 index 0000000..78d7883 --- /dev/null +++ b/tests/tokenizer/templates.mwtest @@ -0,0 +1,641 @@ +name: blank +label: template with no content +input: "{{}}" +output: [TemplateOpen(), TemplateClose()] + +--- + +name: blank_with_params +label: template with no content, but pipes and equal signs +input: "{{||=|}}" +output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()] + +--- + +name: no_params +label: simplest type of template +input: "{{template}}" +output: [TemplateOpen(), Text(text="template"), TemplateClose()] + +--- + +name: one_param_unnamed +label: basic template with one unnamed parameter +input: "{{foo|bar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()] + +--- + +name: one_param_named +label: basic template with one named parameter +input: "{{foo|bar=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: multiple_unnamed_params +label: basic template with multiple unnamed parameters +input: "{{foo|bar|baz|biz|buzz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()] + +--- + +name: multiple_named_params +label: basic template with multiple named parameters +input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params +label: basic template with multiple unnamed/named parameters +input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params2 +label: basic template with multiple unnamed/named parameters in another order +input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: nested_unnamed_param +label: nested template as an unnamed parameter +input: "{{foo|{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_value +label: nested template as a parameter value with a named parameter +input: "{{foo|bar={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_name_and_value +label: nested templates as a parameter name and value +input: "{{foo|{{bar}}={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start +label: nested template at the beginning of a template name +input: "{{{{foo}}bar}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()] + +--- + +name: nested_name_start_unnamed_param +label: nested template at the beginning of a template name and as an unnamed parameter +input: "{{{{foo}}bar|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_value +label: nested template at the beginning of a template name and as a parameter value with a named parameter +input: "{{{{foo}}bar|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_name_and_value +label: nested template at the beginning of a template name and as a parameter name and value +input: "{{{{foo}}bar|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end +label: nested template at the end of a template name +input: "{{foo{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_unnamed_param +label: nested template at the end of a template name and as an unnamed parameter +input: "{{foo{{bar}}|{{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_value +label: nested template at the end of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_name_and_value +label: nested template at the end of a template name and as a parameter name and value +input: "{{foo{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid +label: nested template in the middle of a template name +input: "{{foo{{bar}}baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()] + +--- + +name: nested_name_mid_unnamed_param +label: nested template in the middle of a template name and as an unnamed parameter +input: "{{foo{{bar}}baz|{{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_value +label: nested template in the middle of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}baz|biz={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_name_and_value +label: nested template in the middle of a template name and as a parameter name and value +input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end +label: nested template at the beginning and end of a template name +input: "{{{{foo}}{{bar}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_unnamed_param +label: nested template at the beginning and end of a template name and as an unnamed parameter +input: "{{{{foo}}{{bar}}|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_value +label: nested template at the beginning and end of a template name and as a parameter value with a named parameter +input: "{{{{foo}}{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_name_and_value +label: nested template at the beginning and end of a template name and as a parameter name and value +input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple +label: multiple nested templates within nested templates +input: "{{{{{{{{foo}}bar}}baz}}biz}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()] + +--- + +name: nested_names_multiple_unnamed_param +label: multiple nested templates within nested templates with a nested unnamed parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_value +label: multiple nested templates within nested templates with a nested parameter value in a named parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_name_and_value +label: multiple nested templates within nested templates with a nested parameter name and value +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: mixed_nested_templates +label: mixed assortment of nested templates within template names, parameter names, and values +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: newlines_start +label: a newline at the start of a template name +input: "{{\nfoobar}}" +output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] + +--- + +name: newlines_end +label: a newline at the end of a template name +input: "{{foobar\n}}" +output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] + +--- + +name: newlines_start_end +label: a newline at the start and end of a template name +input: "{{\nfoobar\n}}" +output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] + +--- + +name: newlines_mid +label: a newline at the middle of a template name +input: "{{foo\nbar}}" +output: [Text(text="{{foo\nbar}}")] + +--- + +name: newlines_start_mid +label: a newline at the start and middle of a template name +input: "{{\nfoo\nbar}}" +output: [Text(text="{{\nfoo\nbar}}")] + +--- + +name: newlines_mid_end +label: a newline at the middle and end of a template name +input: "{{foo\nbar\n}}" +output: [Text(text="{{foo\nbar\n}}")] + +--- + +name: newlines_start_mid_end +label: a newline at the start, middle, and end of a template name +input: "{{\nfoo\nbar\n}}" +output: [Text(text="{{\nfoo\nbar\n}}")] + +--- + +name: newlines_unnamed_param +label: newlines within an unnamed template parameter +input: "{{foo|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newlines_enclose_template_name_unnamed_param +label: newlines enclosing a template name and within an unnamed template parameter +input: "{{\nfoo\n|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newlines_within_template_name_unnamed_param +label: newlines within a template name and within an unnamed template parameter +input: "{{\nfo\no\n|\nb\nar\n}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")] + +--- + +name: newlines_enclose_template_name_named_param_value +label: newlines enclosing a template name and within a named parameter value +input: "{{\nfoo\n|1=\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newlines_within_template_name_named_param_value +label: newlines within a template name and within a named parameter value +input: "{{\nf\noo\n|1=\nb\nar\n}}" +output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")] + +--- + +name: newlines_named_param_name +label: newlines within a parameter name +input: "{{foo|\nb\nar\n=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newlines_named_param_name_param_value +label: newlines within a parameter name and within a parameter value +input: "{{foo|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] + +--- + +name: newlines_enclose_template_name_named_param_name +label: newlines enclosing a template name and within a parameter name +input: "{{\nfoo\n|\nb\nar\n=baz}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newlines_enclose_template_name_named_param_name_param_value +label: newlines enclosing a template name and within a parameter name and within a parameter value +input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] + +--- + +name: newlines_within_template_name_named_param_name +label: newlines within a template name and within a parameter name +input: "{{\nfo\no\n|\nb\nar\n=baz}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")] + +--- + +name: newlines_within_template_name_named_param_name_param_value +label: newlines within a template name and within a parameter name and within a parameter value +input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}" +output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")] + +--- + +name: newlines_wildcard +label: a random, complex assortment of templates and newlines +input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] + +--- + +name: newlines_wildcard_redux +label: an even more random and complex assortment of templates and newlines +input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] + +--- + +name: newlines_wildcard_redux_invalid +label: a variation of the newlines_wildcard_redux test that is invalid +input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")] + +--- + +name: invalid_name_left_brace_middle +label: invalid characters in template name: left brace in middle +input: "{{foo{bar}}" +output: [Text(text="{{foo{bar}}")] + +--- + +name: invalid_name_right_brace_middle +label: invalid characters in template name: right brace in middle +input: "{{foo}bar}}" +output: [Text(text="{{foo}bar}}")] + +--- + +name: invalid_name_left_braces +label: invalid characters in template name: two left braces in middle +input: "{{foo{b{ar}}" +output: [Text(text="{{foo{b{ar}}")] + +--- + +name: invalid_name_left_bracket_middle +label: invalid characters in template name: left bracket in middle +input: "{{foo[bar}}" +output: [Text(text="{{foo[bar}}")] + +--- + +name: invalid_name_right_bracket_middle +label: invalid characters in template name: right bracket in middle +input: "{{foo]bar}}" +output: [Text(text="{{foo]bar}}")] + +--- + +name: invalid_name_left_bracket_start +label: invalid characters in template name: left bracket at start +input: "{{[foobar}}" +output: [Text(text="{{[foobar}}")] + +--- + +name: invalid_name_right_bracket_start +label: invalid characters in template name: right bracket at end +input: "{{foobar]}}" +output: [Text(text="{{foobar]}}")] + +--- + +name: valid_name_left_brace_start +label: valid characters in template name: left brace at start +input: "{{{foobar}}" +output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()] + +--- + +name: valid_unnamed_param_left_brace +label: valid characters in unnamed template parameter: left brace +input: "{{foo|ba{r}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()] + +--- + +name: valid_unnamed_param_braces +label: valid characters in unnamed template parameter: left and right braces +input: "{{foo|ba{r}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")] + +--- + +name: valid_param_name_braces +label: valid characters in template parameter name: left and right braces +input: "{{foo|ba{r}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_brackets +label: valid characters in unnamed template parameter: left and right brackets +input: "{{foo|ba[r]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_left_brackets +label: valid characters in unnamed template parameter: double left brackets +input: "{{foo|bar[[in\nvalid=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_right_brackets +label: valid characters in unnamed template parameter: double right brackets +input: "{{foo|bar]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_brackets +label: valid characters in unnamed template parameter: double left and right brackets +input: "{{foo|bar[[in\nvalid]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: invalid_param_name_double_left_braces +label: invalid characters in template parameter name: double left braces +input: "{{foo|bar{{in\nvalid=baz}}" +output: [Text(text="{{foo|bar{{in\nvalid=baz}}")] + +--- + +name: invalid_param_name_double_braces +label: invalid characters in template parameter name: double left and right braces +input: "{{foo|bar{{in\nvalid}}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")] + +--- + +name: incomplete_stub +label: incomplete templates that should fail gracefully: just an opening +input: "{{" +output: [Text(text="{{")] + +--- + +name: incomplete_plain +label: incomplete templates that should fail gracefully: no close whatsoever +input: "{{stuff}} {{foobar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")] + +--- + +name: incomplete_right_brace +label: incomplete templates that should fail gracefully: only one right brace +input: "{{stuff}} {{foobar}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")] + +--- + +name: incomplete_pipe +label: incomplete templates that should fail gracefully: a pipe +input: "{{stuff}} {{foobar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")] + +--- + +name: incomplete_unnamed_param +label: incomplete templates that should fail gracefully: an unnamed parameter +input: "{{stuff}} {{foo|bar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")] + +--- + +name: incomplete_unnamed_param_pipe +label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe +input: "{{stuff}} {{foo|bar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")] + +--- + +name: incomplete_valueless_param +label: incomplete templates that should fail gracefully: an a named parameter with no value +input: "{{stuff}} {{foo|bar=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")] + +--- + +name: incomplete_valueless_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe +input: "{{stuff}} {{foo|bar=|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")] + +--- + +name: incomplete_named_param +label: incomplete templates that should fail gracefully: a named parameter with a value +input: "{{stuff}} {{foo|bar=baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")] + +--- + +name: incomplete_named_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe +input: "{{stuff}} {{foo|bar=baz|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")] + +--- + +name: incomplete_two_unnamed_params +label: incomplete templates that should fail gracefully: two unnamed parameters +input: "{{stuff}} {{foo|bar|baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")] + +--- + +name: incomplete_unnamed_param_valueless_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value +input: "{{stuff}} {{foo|bar|baz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")] + +--- + +name: incomplete_unnamed_param_named_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value +input: "{{stuff}} {{foo|bar|baz=biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")] + +--- + +name: incomplete_named_param_unnamed_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter +input: "{{stuff}} {{foo|bar=baz|biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")] + +--- + +name: incomplete_named_param_valueless_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value +input: "{{stuff}} {{foo|bar=baz|biz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")] + +--- + +name: incomplete_two_named_params +label: incomplete templates that should fail gracefully: two named parameters with values +input: "{{stuff}} {{foo|bar=baz|biz=buzz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] + +--- + +name: incomplete_nested_template_as_unnamed_param +label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter +input: "{{stuff}} {{foo|{{bar}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()] + +--- + +name: incomplete_nested_template_as_param_value +label: incomplete templates that should fail gracefully: a valid nested template as a parameter value +input: "{{stuff}} {{foo|bar={{baz}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] + +--- + +name: recursion_five_hundred_opens +label: test potentially dangerous recursion: five hundred template openings, without spaces +input: "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{" +output: [Text(text="{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{")] + +--- + +name: recursion_one_hundred_opens +label: test potentially dangerous recursion: one hundred template openings, with spaces +input: "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{" +output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")] + +--- + +name: recursion_opens_and_closes +label: test potentially dangerous recursion: template openings and closings +input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" +output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] diff --git a/tests/tokenizer/text.mwtest b/tests/tokenizer/text.mwtest new file mode 100644 index 0000000..77d5f50 --- /dev/null +++ b/tests/tokenizer/text.mwtest @@ -0,0 +1,25 @@ +name: basic +label: sanity check for basic text parsing, no gimmicks +input: "foobar" +output: [Text(text="foobar")] + +--- + +name: newlines +label: slightly more complex text parsing, with newlines +input: "This is a line of text.\nThis is another line of text.\nThis is another." +output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")] + +--- + +name: unicode +label: ensure unicode data is handled properly +input: "Thís ís å sëñtënce with diœcritiçs." +output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] + +--- + +name: unicode2 +label: additional unicode check for non-BMP codepoints +input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" +output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest new file mode 100644 index 0000000..0682ef1 --- /dev/null +++ b/tests/tokenizer/wikilinks.mwtest @@ -0,0 +1,158 @@ +name: blank +label: wikilink with no content +input: "[[]]" +output: [WikilinkOpen(), WikilinkClose()] + +--- + +name: blank_with_text +label: wikilink with no content but a pipe +input: "[[|]]" +output: [WikilinkOpen(), WikilinkSeparator(), WikilinkClose()] + +--- + +name: basic +label: simplest type of wikilink +input: "[[wikilink]]" +output: [WikilinkOpen(), Text(text="wikilink"), WikilinkClose()] + +--- + +name: with_text +label: wikilink with a text value +input: "[[foo|bar]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar"), WikilinkClose()] + +--- + +name: blank_with_multiple_texts +label: no content, multiple pipes +input: "[[|||]]" +output: [WikilinkOpen(), WikilinkSeparator(), Text(text="||"), WikilinkClose()] + +--- + +name: multiple_texts +label: multiple text values separated by pipes +input: "[[foo|bar|baz]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar|baz"), WikilinkClose()] + +--- + +name: nested +label: a wikilink nested within the value of another +input: "[[foo|[[bar]]]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] + +--- + +name: nested_with_text +label: a wikilink nested within the value of another, separated by other data +input: "[[foo|a[[b]]c]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] + +--- + +name: invalid_newline +label: invalid wikilink: newline as only content +input: "[[\n]]" +output: [Text(text="[[\n]]")] + +--- + +name: invalid_right_brace +label: invalid wikilink: right brace +input: "[[foo}b}a}r]]" +output: [Text(text="[[foo}b}a}r]]")] + +--- + +name: invalid_left_brace +label: invalid wikilink: left brace +input: "[[foo{{[a}}]]" +output: [Text(text="[[foo{{[a}}]]")] + +--- + +name: invalid_right_bracket +label: invalid wikilink: right bracket +input: "[[foo]bar]]" +output: [Text(text="[[foo]bar]]")] + +--- + +name: invalid_left_bracket +label: invalid wikilink: left bracket +input: "[[foo[bar]]" +output: [Text(text="[[foo[bar]]")] + +--- + +name: invalid_nested +label: invalid wikilink: trying to nest in the wrong context +input: "[[foo[[bar]]]]" +output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] + +--- + +name: invalid_nested_text +label: invalid wikilink: trying to nest in the wrong context, with a text param +input: "[[foo[[bar]]|baz]]" +output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="|baz]]")] + +--- + +name: incomplete_open_only +label: incomplete wikilinks: just an open +input: "[[" +output: [Text(text="[[")] + +--- + +name: incomplete_open_text +label: incomplete wikilinks: an open with some text +input: "[[foo" +output: [Text(text="[[foo")] + +--- + +name: incomplete_open_text_pipe +label: incomplete wikilinks: an open, text, then a pipe +input: "[[foo|" +output: [Text(text="[[foo|")] + +--- + +name: incomplete_open_pipe +label: incomplete wikilinks: an open, then a pipe +input: "[[|" +output: [Text(text="[[|")] + +--- + +name: incomplete_open_pipe_text +label: incomplete wikilinks: an open, then a pipe, then text +input: "[[|foo" +output: [Text(text="[[|foo")] + +--- + +name: incomplete_open_pipes_text +label: incomplete wikilinks: a pipe, then text then two pipes +input: "[[|f||" +output: [Text(text="[[|f||")] + +--- + +name: incomplete_open_partial_close +label: incomplete wikilinks: an open, then one right brace +input: "[[{}" +output: [Text(text="[[{}")] + +--- + +name: incomplete_preserve_previous +label: incomplete wikilinks: a valid wikilink followed by an invalid one +input: "[[foo]] [[bar" +output: [WikilinkOpen(), Text(text="foo"), WikilinkClose(), Text(text=" [[bar")]