Conflicts: mwparserfromhell/parser/contexts.py mwparserfromhell/parser/tokenizer.pytags/v0.3
@@ -1,4 +1,5 @@ | |||||
*.pyc | *.pyc | ||||
*.so | |||||
*.egg | *.egg | ||||
*.egg-info | *.egg-info | ||||
.DS_Store | .DS_Store | ||||
@@ -1,4 +1,4 @@ | |||||
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
of this software and associated documentation files (the "Software"), to deal | of this software and associated documentation files (the "Software"), to deal | ||||
@@ -18,7 +18,13 @@ so you can install the latest release with ``pip install mwparserfromhell`` | |||||
cd mwparserfromhell | cd mwparserfromhell | ||||
python setup.py install | python setup.py install | ||||
You can run the comprehensive unit testing suite with ``python setup.py test``. | |||||
If you get ``error: Unable to find vcvarsall.bat`` while installing, this is | |||||
because Windows can't find the compiler for C extensions. Consult this | |||||
`StackOverflow question`_ for help. You can also set ``ext_modules`` in | |||||
``setup.py`` to an empty list to prevent the extension from building. | |||||
You can run the comprehensive unit testing suite with | |||||
``python setup.py test -q``. | |||||
Usage | Usage | ||||
----- | ----- | ||||
@@ -106,12 +112,12 @@ Integration | |||||
``Page`` objects have a ``parse`` method that essentially calls | ``Page`` objects have a ``parse`` method that essentially calls | ||||
``mwparserfromhell.parse()`` on ``page.get()``. | ``mwparserfromhell.parse()`` on ``page.get()``. | ||||
If you're using PyWikipedia_, your code might look like this:: | |||||
If you're using Pywikipedia_, your code might look like this:: | |||||
import mwparserfromhell | import mwparserfromhell | ||||
import wikipedia as pywikibot | import wikipedia as pywikibot | ||||
def parse(title): | def parse(title): | ||||
site = pywikibot.get_site() | |||||
site = pywikibot.getSite() | |||||
page = pywikibot.Page(site, title) | page = pywikibot.Page(site, title) | ||||
text = page.get() | text = page.get() | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
@@ -124,16 +130,19 @@ following code (via the API_):: | |||||
import mwparserfromhell | import mwparserfromhell | ||||
API_URL = "http://en.wikipedia.org/w/api.php" | API_URL = "http://en.wikipedia.org/w/api.php" | ||||
def parse(title): | def parse(title): | ||||
raw = urllib.urlopen(API_URL, data).read() | |||||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||||
"rvprop": "content", "format": "json", "titles": title} | |||||
raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() | |||||
res = json.loads(raw) | res = json.loads(raw) | ||||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
.. _MediaWiki: http://mediawiki.org | |||||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||||
.. _Python Package Index: http://pypi.python.org | |||||
.. _get pip: http://pypi.python.org/pypi/pip | |||||
.. _EarwigBot: https://github.com/earwig/earwigbot | |||||
.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ | |||||
.. _API: http://mediawiki.org/wiki/API | |||||
.. _MediaWiki: http://mediawiki.org | |||||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||||
.. _Python Package Index: http://pypi.python.org | |||||
.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat | |||||
.. _get pip: http://pypi.python.org/pypi/pip | |||||
.. _EarwigBot: https://github.com/earwig/earwigbot | |||||
.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot | |||||
.. _API: http://mediawiki.org/wiki/API |
@@ -42,7 +42,7 @@ master_doc = 'index' | |||||
# General information about the project. | # General information about the project. | ||||
project = u'mwparserfromhell' | project = u'mwparserfromhell' | ||||
copyright = u'2012 Ben Kurtovic' | |||||
copyright = u'2012, 2013 Ben Kurtovic' | |||||
# The version info for the project you're documenting, acts as replacement for | # The version info for the project you're documenting, acts as replacement for | ||||
# |version| and |release|, also used in various other places throughout the | # |version| and |release|, also used in various other places throughout the | ||||
@@ -22,10 +22,16 @@ so you can install the latest release with ``pip install mwparserfromhell`` | |||||
cd mwparserfromhell | cd mwparserfromhell | ||||
python setup.py install | python setup.py install | ||||
If you get ``error: Unable to find vcvarsall.bat`` while installing, this is | |||||
because Windows can't find the compiler for C extensions. Consult this | |||||
`StackOverflow question`_ for help. You can also set ``ext_modules`` in | |||||
``setup.py`` to an empty list to prevent the extension from building. | |||||
You can run the comprehensive unit testing suite with ``python setup.py test``. | You can run the comprehensive unit testing suite with ``python setup.py test``. | ||||
.. _Python Package Index: http://pypi.python.org | |||||
.. _get pip: http://pypi.python.org/pypi/pip | |||||
.. _Python Package Index: http://pypi.python.org | |||||
.. _get pip: http://pypi.python.org/pypi/pip | |||||
.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat | |||||
Contents | Contents | ||||
-------- | -------- | ||||
@@ -7,12 +7,12 @@ Integration | |||||
:py:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on | :py:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on | ||||
:py:meth:`~earwigbot.wiki.page.Page.get`. | :py:meth:`~earwigbot.wiki.page.Page.get`. | ||||
If you're using PyWikipedia_, your code might look like this:: | |||||
If you're using Pywikipedia_, your code might look like this:: | |||||
import mwparserfromhell | import mwparserfromhell | ||||
import wikipedia as pywikibot | import wikipedia as pywikibot | ||||
def parse(title): | def parse(title): | ||||
site = pywikibot.get_site() | |||||
site = pywikibot.getSite() | |||||
page = pywikibot.Page(site, title) | page = pywikibot.Page(site, title) | ||||
text = page.get() | text = page.get() | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
@@ -31,5 +31,5 @@ following code (via the API_):: | |||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
.. _EarwigBot: https://github.com/earwig/earwigbot | .. _EarwigBot: https://github.com/earwig/earwigbot | ||||
.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ | |||||
.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot | |||||
.. _API: http://mediawiki.org/wiki/API | .. _API: http://mediawiki.org/wiki/API |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -29,12 +29,11 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2012 Ben Kurtovic" | |||||
__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" | |||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.2.dev" | __version__ = "0.2.dev" | ||||
__email__ = "ben.kurtovic@verizon.net" | __email__ = "ben.kurtovic@verizon.net" | ||||
from . import nodes, parser, smart_list, string_mixin, wikicode | |||||
from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode | |||||
parse = lambda text: parser.Parser(text).parse() | |||||
parse.__doc__ = "Short for :py:meth:`.Parser.parse`." | |||||
parse = utils.parse_anything |
@@ -1,29 +1,29 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Implements support for both Python 2 and Python 3 by defining common types in | |||||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||||
types are meant to be imported directly from within the parser's modules. | |||||
""" | |||||
import sys | |||||
py3k = sys.version_info[0] == 3 | |||||
if py3k: | |||||
bytes = bytes | |||||
str = str | |||||
basestring = str | |||||
maxsize = sys.maxsize | |||||
import html.entities as htmlentities | |||||
else: | |||||
bytes = str | |||||
str = unicode | |||||
basestring = basestring | |||||
maxsize = sys.maxint | |||||
import htmlentitydefs as htmlentities | |||||
del sys | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Implements support for both Python 2 and Python 3 by defining common types in | |||||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||||
types are meant to be imported directly from within the parser's modules. | |||||
""" | |||||
import sys | |||||
py3k = sys.version_info[0] == 3 | |||||
if py3k: | |||||
bytes = bytes | |||||
str = str | |||||
basestring = str | |||||
maxsize = sys.maxsize | |||||
import html.entities as htmlentities | |||||
else: | |||||
bytes = str | |||||
str = unicode | |||||
basestring = basestring | |||||
maxsize = sys.maxint | |||||
import htmlentitydefs as htmlentities | |||||
del sys |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -30,6 +30,7 @@ __all__ = ["Argument"] | |||||
class Argument(Node): | class Argument(Node): | ||||
"""Represents a template argument substitution, like ``{{{foo}}}``.""" | """Represents a template argument substitution, like ``{{{foo}}}``.""" | ||||
def __init__(self, name, default=None): | def __init__(self, name, default=None): | ||||
super(Argument, self).__init__() | super(Argument, self).__init__() | ||||
self._name = name | self._name = name | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -29,6 +29,7 @@ __all__ = ["Comment"] | |||||
class Comment(Node): | class Comment(Node): | ||||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | """Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | ||||
def __init__(self, contents): | def __init__(self, contents): | ||||
super(Comment, self).__init__() | super(Comment, self).__init__() | ||||
self._contents = contents | self._contents = contents | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -23,7 +23,7 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from . import Node | from . import Node | ||||
from ..compat import htmlentities, str | |||||
from ..compat import htmlentities, py3k, str | |||||
__all__ = ["HTMLEntity"] | __all__ = ["HTMLEntity"] | ||||
@@ -63,28 +63,31 @@ class HTMLEntity(Node): | |||||
return self.normalize() | return self.normalize() | ||||
return self | return self | ||||
def _unichr(self, value): | |||||
"""Implement the builtin unichr() with support for non-BMP code points. | |||||
if not py3k: | |||||
@staticmethod | |||||
def _unichr(value): | |||||
"""Implement builtin unichr() with support for non-BMP code points. | |||||
On wide Python builds, this functions like the normal unichr(). On | |||||
narrow builds, this returns the value's corresponding surrogate pair. | |||||
""" | |||||
try: | |||||
return unichr(value) | |||||
except ValueError: | |||||
# Test whether we're on the wide or narrow Python build. Check the | |||||
# length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY): | |||||
if len("\U0001F64A") == 2: | |||||
# Ensure this is within the range we can encode: | |||||
if value > 0x10FFFF: | |||||
raise ValueError("unichr() arg not in range(0x110000)") | |||||
code = value - 0x10000 | |||||
if value < 0: # Invalid code point | |||||
raise | |||||
lead = 0xD800 + (code >> 10) | |||||
trail = 0xDC00 + (code % (1 << 10)) | |||||
return unichr(lead) + unichr(trail) | |||||
raise | |||||
On wide Python builds, this functions like the normal unichr(). On | |||||
narrow builds, this returns the value's encoded surrogate pair. | |||||
""" | |||||
try: | |||||
return unichr(value) | |||||
except ValueError: | |||||
# Test whether we're on the wide or narrow Python build. Check | |||||
# the length of a non-BMP code point | |||||
# (U+1F64A, SPEAK-NO-EVIL MONKEY): | |||||
if len("\U0001F64A") == 2: | |||||
# Ensure this is within the range we can encode: | |||||
if value > 0x10FFFF: | |||||
raise ValueError("unichr() arg not in range(0x110000)") | |||||
code = value - 0x10000 | |||||
if value < 0: # Invalid code point | |||||
raise | |||||
lead = 0xD800 + (code >> 10) | |||||
trail = 0xDC00 + (code % (1 << 10)) | |||||
return unichr(lead) + unichr(trail) | |||||
raise | |||||
@property | @property | ||||
def value(self): | def value(self): | ||||
@@ -119,28 +122,60 @@ class HTMLEntity(Node): | |||||
@value.setter | @value.setter | ||||
def value(self, newval): | def value(self, newval): | ||||
newval = str(newval) | newval = str(newval) | ||||
if newval not in htmlentities.entitydefs: | |||||
test = int(self.value, 16) | |||||
if test < 0 or (test > 0x10FFFF and int(self.value) > 0x10FFFF): | |||||
raise ValueError(newval) | |||||
try: | |||||
int(newval) | |||||
except ValueError: | |||||
try: | |||||
int(newval, 16) | |||||
except ValueError: | |||||
if newval not in htmlentities.entitydefs: | |||||
raise ValueError("entity value is not a valid name") | |||||
self._named = True | |||||
self._hexadecimal = False | |||||
else: | |||||
if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF: | |||||
raise ValueError("entity value is not in range(0x110000)") | |||||
self._named = False | |||||
self._hexadecimal = True | |||||
else: | |||||
test = int(newval, 16 if self.hexadecimal else 10) | |||||
if test < 0 or test > 0x10FFFF: | |||||
raise ValueError("entity value is not in range(0x110000)") | |||||
self._named = False | |||||
self._value = newval | self._value = newval | ||||
@named.setter | @named.setter | ||||
def named(self, newval): | def named(self, newval): | ||||
self._named = bool(newval) | |||||
newval = bool(newval) | |||||
if newval and self.value not in htmlentities.entitydefs: | |||||
raise ValueError("entity value is not a valid name") | |||||
if not newval: | |||||
try: | |||||
int(self.value, 16) | |||||
except ValueError: | |||||
err = "current entity value is not a valid Unicode codepoint" | |||||
raise ValueError(err) | |||||
self._named = newval | |||||
@hexadecimal.setter | @hexadecimal.setter | ||||
def hexadecimal(self, newval): | def hexadecimal(self, newval): | ||||
self._hexadecimal = bool(newval) | |||||
newval = bool(newval) | |||||
if newval and self.named: | |||||
raise ValueError("a named entity cannot be hexadecimal") | |||||
self._hexadecimal = newval | |||||
@hex_char.setter | @hex_char.setter | ||||
def hex_char(self, newval): | def hex_char(self, newval): | ||||
self._hex_char = bool(newval) | |||||
newval = str(newval) | |||||
if newval not in ("x", "X"): | |||||
raise ValueError(newval) | |||||
self._hex_char = newval | |||||
def normalize(self): | def normalize(self): | ||||
"""Return the unicode character represented by the HTML entity.""" | """Return the unicode character represented by the HTML entity.""" | ||||
chrfunc = chr if py3k else HTMLEntity._unichr | |||||
if self.named: | if self.named: | ||||
return unichr(htmlentities.name2codepoint[self.value]) | |||||
return chrfunc(htmlentities.name2codepoint[self.value]) | |||||
if self.hexadecimal: | if self.hexadecimal: | ||||
return self._unichr(int(self.value, 16)) | |||||
return self._unichr(int(self.value)) | |||||
return chrfunc(int(self.value, 16)) | |||||
return chrfunc(int(self.value)) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -81,7 +81,7 @@ class Template(Node): | |||||
in parameter names or values so they are not mistaken for new | in parameter names or values so they are not mistaken for new | ||||
parameters. | parameters. | ||||
""" | """ | ||||
replacement = HTMLEntity(value=ord(char)) | |||||
replacement = str(HTMLEntity(value=ord(char))) | |||||
for node in code.filter_text(recursive=False): | for node in code.filter_text(recursive=False): | ||||
if char in node: | if char in node: | ||||
code.replace(node, node.replace(char, replacement)) | code.replace(node, node.replace(char, replacement)) | ||||
@@ -107,7 +107,7 @@ class Template(Node): | |||||
values = tuple(theories.values()) | values = tuple(theories.values()) | ||||
best = max(values) | best = max(values) | ||||
confidence = float(best) / sum(values) | confidence = float(best) / sum(values) | ||||
if confidence > 0.75: | |||||
if confidence >= 0.75: | |||||
return tuple(theories.keys())[values.index(best)] | return tuple(theories.keys())[values.index(best)] | ||||
def _get_spacing_conventions(self, use_names): | def _get_spacing_conventions(self, use_names): | ||||
@@ -142,9 +142,9 @@ class Template(Node): | |||||
return False | return False | ||||
return True | return True | ||||
def _remove_without_field(self, param, i, force_no_field): | |||||
def _remove_without_field(self, param, i): | |||||
"""Return False if a parameter name should be kept, otherwise True.""" | """Return False if a parameter name should be kept, otherwise True.""" | ||||
if not param.showkey and not force_no_field: | |||||
if not param.showkey: | |||||
dependents = [not after.showkey for after in self.params[i+1:]] | dependents = [not after.showkey for after in self.params[i+1:]] | ||||
if any(dependents): | if any(dependents): | ||||
return False | return False | ||||
@@ -183,11 +183,10 @@ class Template(Node): | |||||
def get(self, name): | def get(self, name): | ||||
"""Get the parameter whose name is *name*. | """Get the parameter whose name is *name*. | ||||
The returned object is a | |||||
:py:class:`~.Parameter` instance. Raises :py:exc:`ValueError` if no | |||||
parameter has this name. Since multiple parameters can have the same | |||||
name, we'll return the last match, since the last parameter is the only | |||||
one read by the MediaWiki parser. | |||||
The returned object is a :py:class:`~.Parameter` instance. Raises | |||||
:py:exc:`ValueError` if no parameter has this name. Since multiple | |||||
parameters can have the same name, we'll return the last match, since | |||||
the last parameter is the only one read by the MediaWiki parser. | |||||
""" | """ | ||||
name = name.strip() if isinstance(name, basestring) else str(name) | name = name.strip() if isinstance(name, basestring) else str(name) | ||||
for param in reversed(self.params): | for param in reversed(self.params): | ||||
@@ -195,20 +194,34 @@ class Template(Node): | |||||
return param | return param | ||||
raise ValueError(name) | raise ValueError(name) | ||||
def add(self, name, value, showkey=None, force_nonconformity=False): | |||||
def add(self, name, value, showkey=None, before=None, | |||||
preserve_spacing=True): | |||||
"""Add a parameter to the template with a given *name* and *value*. | """Add a parameter to the template with a given *name* and *value*. | ||||
*name* and *value* can be anything parasable by | *name* and *value* can be anything parasable by | ||||
:py:func:`.utils.parse_anything`; pipes (and equal signs, if | |||||
appropriate) are automatically escaped from *value* where applicable. | |||||
:py:func:`.utils.parse_anything`; pipes and equal signs are | |||||
automatically escaped from *value* when appropriate. | |||||
If *showkey* is given, this will determine whether or not to show the | If *showkey* is given, this will determine whether or not to show the | ||||
parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of | parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of | ||||
``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent | ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent | ||||
guess. If *name* is already a parameter, we'll replace its value while | |||||
keeping the same spacing rules unless *force_nonconformity* is | |||||
``True``. We will also try to guess the dominant spacing convention | |||||
when adding a new parameter using :py:meth:`_get_spacing_conventions` | |||||
unless *force_nonconformity* is ``True``. | |||||
guess. | |||||
If *name* is already a parameter in the template, we'll replace its | |||||
value while keeping the same whitespace around it. We will also try to | |||||
guess the dominant spacing convention when adding a new parameter using | |||||
:py:meth:`_get_spacing_conventions`. | |||||
If *before* is given (either a :py:class:`~.Parameter` object or a | |||||
name), then we will place the parameter immediately before this one. | |||||
Otherwise, it will be added at the end. If *before* is a name and | |||||
exists multiple times in the template, we will place it before the last | |||||
occurance. If *before* is not in the template, :py:exc:`ValueError` is | |||||
raised. The argument is ignored if the new parameter already exists. | |||||
If *preserve_spacing* is ``False``, we will avoid preserving spacing | |||||
conventions when changing the value of an existing parameter or when | |||||
adding a new one. | |||||
""" | """ | ||||
name, value = parse_anything(name), parse_anything(value) | name, value = parse_anything(name), parse_anything(value) | ||||
self._surface_escape(value, "|") | self._surface_escape(value, "|") | ||||
@@ -217,14 +230,17 @@ class Template(Node): | |||||
self.remove(name, keep_field=True) | self.remove(name, keep_field=True) | ||||
existing = self.get(name) | existing = self.get(name) | ||||
if showkey is not None: | if showkey is not None: | ||||
if not showkey: | |||||
self._surface_escape(value, "=") | |||||
existing.showkey = showkey | existing.showkey = showkey | ||||
if not existing.showkey: | |||||
self._surface_escape(value, "=") | |||||
nodes = existing.value.nodes | nodes = existing.value.nodes | ||||
if force_nonconformity: | |||||
existing.value = value | |||||
else: | |||||
if preserve_spacing: | |||||
for i in range(2): # Ignore empty text nodes | |||||
if not nodes[i]: | |||||
nodes[i] = None | |||||
existing.value = parse_anything([nodes[0], value, nodes[1]]) | existing.value = parse_anything([nodes[0], value, nodes[1]]) | ||||
else: | |||||
existing.value = value | |||||
return existing | return existing | ||||
if showkey is None: | if showkey is None: | ||||
@@ -246,43 +262,38 @@ class Template(Node): | |||||
if not showkey: | if not showkey: | ||||
self._surface_escape(value, "=") | self._surface_escape(value, "=") | ||||
if not force_nonconformity: | |||||
if preserve_spacing: | |||||
before_n, after_n = self._get_spacing_conventions(use_names=True) | before_n, after_n = self._get_spacing_conventions(use_names=True) | ||||
if before_n and after_n: | |||||
name = parse_anything([before_n, name, after_n]) | |||||
elif before_n: | |||||
name = parse_anything([before_n, name]) | |||||
elif after_n: | |||||
name = parse_anything([name, after_n]) | |||||
before_v, after_v = self._get_spacing_conventions(use_names=False) | before_v, after_v = self._get_spacing_conventions(use_names=False) | ||||
if before_v and after_v: | |||||
value = parse_anything([before_v, value, after_v]) | |||||
elif before_v: | |||||
value = parse_anything([before_v, value]) | |||||
elif after_v: | |||||
value = parse_anything([value, after_v]) | |||||
name = parse_anything([before_n, name, after_n]) | |||||
value = parse_anything([before_v, value, after_v]) | |||||
param = Parameter(name, value, showkey) | param = Parameter(name, value, showkey) | ||||
self.params.append(param) | |||||
if before: | |||||
if not isinstance(before, Parameter): | |||||
before = self.get(before) | |||||
self.params.insert(self.params.index(before), param) | |||||
else: | |||||
self.params.append(param) | |||||
return param | return param | ||||
def remove(self, name, keep_field=False, force_no_field=False): | |||||
def remove(self, name, keep_field=False): | |||||
"""Remove a parameter from the template whose name is *name*. | """Remove a parameter from the template whose name is *name*. | ||||
If *keep_field* is ``True``, we will keep the parameter's name, but | If *keep_field* is ``True``, we will keep the parameter's name, but | ||||
blank its value. Otherwise, we will remove the parameter completely | blank its value. Otherwise, we will remove the parameter completely | ||||
*unless* other parameters are dependent on it (e.g. removing ``bar`` | *unless* other parameters are dependent on it (e.g. removing ``bar`` | ||||
from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what | from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what | ||||
we expected, so ``{{foo||baz}}`` will be produced instead), unless | |||||
*force_no_field* is also ``True``. If the parameter shows up multiple | |||||
times in the template, we will remove all instances of it (and keep | |||||
one if *keep_field* is ``True`` - that being the first instance if | |||||
none of the instances have dependents, otherwise that instance will be | |||||
kept). | |||||
we expected, so ``{{foo||baz}}`` will be produced instead). | |||||
If the parameter shows up multiple times in the template, we will | |||||
remove all instances of it (and keep one if *keep_field* is ``True`` - | |||||
the first instance if none have dependents, otherwise the one with | |||||
dependents will be kept). | |||||
""" | """ | ||||
name = name.strip() if isinstance(name, basestring) else str(name) | name = name.strip() if isinstance(name, basestring) else str(name) | ||||
removed = False | removed = False | ||||
to_remove =[] | |||||
for i, param in enumerate(self.params): | for i, param in enumerate(self.params): | ||||
if param.name.strip() == name: | if param.name.strip() == name: | ||||
if keep_field: | if keep_field: | ||||
@@ -290,13 +301,15 @@ class Template(Node): | |||||
self._blank_param_value(param.value) | self._blank_param_value(param.value) | ||||
keep_field = False | keep_field = False | ||||
else: | else: | ||||
self.params.remove(param) | |||||
to_remove.append(param) | |||||
else: | else: | ||||
if self._remove_without_field(param, i, force_no_field): | |||||
self.params.remove(param) | |||||
if self._remove_without_field(param, i): | |||||
to_remove.append(param) | |||||
else: | else: | ||||
self._blank_param_value(param.value) | self._blank_param_value(param.value) | ||||
if not removed: | if not removed: | ||||
removed = True | removed = True | ||||
if not removed: | if not removed: | ||||
raise ValueError(name) | raise ValueError(name) | ||||
for param in to_remove: | |||||
self.params.remove(param) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -29,6 +29,7 @@ __all__ = ["Text"] | |||||
class Text(Node): | class Text(Node): | ||||
"""Represents ordinary, unformatted text with no special properties.""" | """Represents ordinary, unformatted text with no special properties.""" | ||||
def __init__(self, value): | def __init__(self, value): | ||||
super(Text, self).__init__() | super(Text, self).__init__() | ||||
self._value = value | self._value = value | ||||
@@ -39,6 +40,9 @@ class Text(Node): | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
return self | return self | ||||
def __showtree__(self, write, get, mark): | |||||
write(str(self).encode("unicode_escape").decode("utf8")) | |||||
@property | @property | ||||
def value(self): | def value(self): | ||||
"""The actual text itself.""" | """The actual text itself.""" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -30,6 +30,7 @@ __all__ = ["Wikilink"] | |||||
class Wikilink(Node): | class Wikilink(Node): | ||||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | ||||
def __init__(self, title, text=None): | def __init__(self, title, text=None): | ||||
super(Wikilink, self).__init__() | super(Wikilink, self).__init__() | ||||
self._title = title | self._title = title | ||||
@@ -78,4 +79,7 @@ class Wikilink(Node): | |||||
@text.setter | @text.setter | ||||
def text(self, value): | def text(self, value): | ||||
self._text = parse_anything(value) | |||||
if value is None: | |||||
self._text = None | |||||
else: | |||||
self._text = parse_anything(value) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -77,6 +77,15 @@ Local (stack-specific) contexts: | |||||
* :py:const:`TAG_BODY` | * :py:const:`TAG_BODY` | ||||
* :py:const:`TAG_CLOSE` | * :py:const:`TAG_CLOSE` | ||||
* :py:const:`SAFETY_CHECK` | |||||
* :py:const:`HAS_TEXT` | |||||
* :py:const:`FAIL_ON_TEXT` | |||||
* :py:const:`FAIL_NEXT` | |||||
* :py:const:`FAIL_ON_LBRACE` | |||||
* :py:const:`FAIL_ON_RBRACE` | |||||
* :py:const:`FAIL_ON_EQUALS` | |||||
Global contexts: | Global contexts: | ||||
* :py:const:`GL_HEADING` | * :py:const:`GL_HEADING` | ||||
@@ -84,40 +93,47 @@ Global contexts: | |||||
# Local contexts: | # Local contexts: | ||||
TEMPLATE = 0b000000000000000000111 | |||||
TEMPLATE_NAME = 0b000000000000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b000000000000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b000000000000000000100 | |||||
ARGUMENT = 0b000000000000000011000 | |||||
ARGUMENT_NAME = 0b000000000000000001000 | |||||
ARGUMENT_DEFAULT = 0b000000000000000010000 | |||||
WIKILINK = 0b000000000000001100000 | |||||
WIKILINK_TITLE = 0b000000000000000100000 | |||||
WIKILINK_TEXT = 0b000000000000001000000 | |||||
HEADING = 0b000000001111110000000 | |||||
HEADING_LEVEL_1 = 0b000000000000010000000 | |||||
HEADING_LEVEL_2 = 0b000000000000100000000 | |||||
HEADING_LEVEL_3 = 0b000000000001000000000 | |||||
HEADING_LEVEL_4 = 0b000000000010000000000 | |||||
HEADING_LEVEL_5 = 0b000000000100000000000 | |||||
HEADING_LEVEL_6 = 0b000000001000000000000 | |||||
COMMENT = 0b000000010000000000000 | |||||
TAG = 0b111111100000000000000 | |||||
TAG_OPEN = 0b001111100000000000000 | |||||
TAG_OPEN_NAME = 0b000000100000000000000 | |||||
TAG_OPEN_ATTR = 0b001111000000000000000 | |||||
TAG_OPEN_ATTR_NAME = 0b000001000000000000000 | |||||
TAG_OPEN_ATTR_BODY = 0b000010000000000000000 | |||||
TAG_OPEN_ATTR_QUOTED = 0b000100000000000000000 | |||||
TAG_OPEN_ATTR_IGNORE = 0b001000000000000000000 | |||||
TAG_BODY = 0b010000000000000000000 | |||||
TAG_CLOSE = 0b100000000000000000000 | |||||
TEMPLATE = 0b000000000000000000000000111 | |||||
TEMPLATE_NAME = 0b000000000000000000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b000000000000000000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b000000000000000000000000100 | |||||
ARGUMENT = 0b000000000000000000000011000 | |||||
ARGUMENT_NAME = 0b000000000000000000000001000 | |||||
ARGUMENT_DEFAULT = 0b000000000000000000000010000 | |||||
WIKILINK = 0b000000000000000000001100000 | |||||
WIKILINK_TITLE = 0b000000000000000000000100000 | |||||
WIKILINK_TEXT = 0b000000000000000000001000000 | |||||
HEADING = 0b000000000000001111110000000 | |||||
HEADING_LEVEL_1 = 0b000000000000000000010000000 | |||||
HEADING_LEVEL_2 = 0b000000000000000000100000000 | |||||
HEADING_LEVEL_3 = 0b000000000000000001000000000 | |||||
HEADING_LEVEL_4 = 0b000000000000000010000000000 | |||||
HEADING_LEVEL_5 = 0b000000000000000100000000000 | |||||
HEADING_LEVEL_6 = 0b000000000000001000000000000 | |||||
COMMENT = 0b000000000000010000000000000 | |||||
TAG = 0b000000111111100000000000000 | |||||
TAG_OPEN = 0b000000001111100000000000000 | |||||
TAG_OPEN_NAME = 0b000000000000100000000000000 | |||||
TAG_OPEN_ATTR = 0b000000001111000000000000000 | |||||
TAG_OPEN_ATTR_NAME = 0b000000000001000000000000000 | |||||
TAG_OPEN_ATTR_BODY = 0b000000000010000000000000000 | |||||
TAG_OPEN_ATTR_QUOTED = 0b000000000100000000000000000 | |||||
TAG_OPEN_ATTR_IGNORE = 0b000000001000000000000000000 | |||||
TAG_BODY = 0b000000010000000000000000000 | |||||
TAG_CLOSE = 0b000000100000000000000000000 | |||||
SAFETY_CHECK = 0b111111000000000000000000000 | |||||
HAS_TEXT = 0b000001000000000000000000000 | |||||
FAIL_ON_TEXT = 0b000010000000000000000000000 | |||||
FAIL_NEXT = 0b000100000000000000000000000 | |||||
FAIL_ON_LBRACE = 0b001000000000000000000000000 | |||||
FAIL_ON_RBRACE = 0b010000000000000000000000000 | |||||
FAIL_ON_EQUALS = 0b100000000000000000000000000 | |||||
# Global contexts: | # Global contexts: | ||||
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
Tokenizer for MWParserFromHell | Tokenizer for MWParserFromHell | ||||
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -23,6 +23,11 @@ SOFTWARE. | |||||
#include "tokenizer.h" | #include "tokenizer.h" | ||||
double log2(double n) | |||||
{ | |||||
return log(n) / log(2); | |||||
} | |||||
static PyObject* | static PyObject* | ||||
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) | Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) | ||||
{ | { | ||||
@@ -52,8 +57,9 @@ Textbuffer_new(void) | |||||
static void | static void | ||||
Tokenizer_dealloc(Tokenizer* self) | Tokenizer_dealloc(Tokenizer* self) | ||||
{ | { | ||||
Py_XDECREF(self->text); | |||||
struct Stack *this = self->topstack, *next; | struct Stack *this = self->topstack, *next; | ||||
Py_XDECREF(self->text); | |||||
while (this) { | while (this) { | ||||
Py_DECREF(this->stack); | Py_DECREF(this->stack); | ||||
Textbuffer_dealloc(this->textbuffer); | Textbuffer_dealloc(this->textbuffer); | ||||
@@ -109,6 +115,8 @@ Tokenizer_push(Tokenizer* self, int context) | |||||
return -1; | return -1; | ||||
top->next = self->topstack; | top->next = self->topstack; | ||||
self->topstack = top; | self->topstack = top; | ||||
self->depth++; | |||||
self->cycles++; | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -137,20 +145,21 @@ Textbuffer_render(struct Textbuffer* self) | |||||
static int | static int | ||||
Tokenizer_push_textbuffer(Tokenizer* self) | Tokenizer_push_textbuffer(Tokenizer* self) | ||||
{ | { | ||||
PyObject *text, *kwargs, *token; | |||||
struct Textbuffer* buffer = self->topstack->textbuffer; | struct Textbuffer* buffer = self->topstack->textbuffer; | ||||
if (buffer->size == 0 && !buffer->next) | if (buffer->size == 0 && !buffer->next) | ||||
return 0; | return 0; | ||||
PyObject* text = Textbuffer_render(buffer); | |||||
text = Textbuffer_render(buffer); | |||||
if (!text) | if (!text) | ||||
return -1; | return -1; | ||||
PyObject* kwargs = PyDict_New(); | |||||
kwargs = PyDict_New(); | |||||
if (!kwargs) { | if (!kwargs) { | ||||
Py_DECREF(text); | Py_DECREF(text); | ||||
return -1; | return -1; | ||||
} | } | ||||
PyDict_SetItemString(kwargs, "text", text); | PyDict_SetItemString(kwargs, "text", text); | ||||
Py_DECREF(text); | Py_DECREF(text); | ||||
PyObject* token = PyObject_Call(Text, NOARGS, kwargs); | |||||
token = PyObject_Call(Text, NOARGS, kwargs); | |||||
Py_DECREF(kwargs); | Py_DECREF(kwargs); | ||||
if (!token) | if (!token) | ||||
return -1; | return -1; | ||||
@@ -174,6 +183,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) | |||||
Textbuffer_dealloc(top->textbuffer); | Textbuffer_dealloc(top->textbuffer); | ||||
self->topstack = top->next; | self->topstack = top->next; | ||||
free(top); | free(top); | ||||
self->depth--; | |||||
} | } | ||||
/* | /* | ||||
@@ -182,9 +192,10 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) | |||||
static PyObject* | static PyObject* | ||||
Tokenizer_pop(Tokenizer* self) | Tokenizer_pop(Tokenizer* self) | ||||
{ | { | ||||
PyObject* stack; | |||||
if (Tokenizer_push_textbuffer(self)) | if (Tokenizer_push_textbuffer(self)) | ||||
return NULL; | return NULL; | ||||
PyObject* stack = self->topstack->stack; | |||||
stack = self->topstack->stack; | |||||
Py_INCREF(stack); | Py_INCREF(stack); | ||||
Tokenizer_delete_top_of_stack(self); | Tokenizer_delete_top_of_stack(self); | ||||
return stack; | return stack; | ||||
@@ -197,11 +208,13 @@ Tokenizer_pop(Tokenizer* self) | |||||
static PyObject* | static PyObject* | ||||
Tokenizer_pop_keeping_context(Tokenizer* self) | Tokenizer_pop_keeping_context(Tokenizer* self) | ||||
{ | { | ||||
PyObject* stack; | |||||
int context; | |||||
if (Tokenizer_push_textbuffer(self)) | if (Tokenizer_push_textbuffer(self)) | ||||
return NULL; | return NULL; | ||||
PyObject* stack = self->topstack->stack; | |||||
stack = self->topstack->stack; | |||||
Py_INCREF(stack); | Py_INCREF(stack); | ||||
int context = self->topstack->context; | |||||
context = self->topstack->context; | |||||
Tokenizer_delete_top_of_stack(self); | Tokenizer_delete_top_of_stack(self); | ||||
self->topstack->context = context; | self->topstack->context = context; | ||||
return stack; | return stack; | ||||
@@ -373,9 +386,10 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||||
static PyObject* | static PyObject* | ||||
Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | ||||
{ | { | ||||
Py_ssize_t index; | |||||
if (delta > self->head) | if (delta > self->head) | ||||
return EMPTY; | return EMPTY; | ||||
Py_ssize_t index = self->head - delta; | |||||
index = self->head - delta; | |||||
return PyList_GET_ITEM(self->text, index); | return PyList_GET_ITEM(self->text, index); | ||||
} | } | ||||
@@ -389,7 +403,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
PyObject *tokenlist; | PyObject *tokenlist; | ||||
self->head += 2; | self->head += 2; | ||||
while (Tokenizer_READ(self, 0) == *"{") { | |||||
while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { | |||||
self->head++; | self->head++; | ||||
braces++; | braces++; | ||||
} | } | ||||
@@ -420,8 +434,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
if (Tokenizer_parse_template(self)) | if (Tokenizer_parse_template(self)) | ||||
return -1; | return -1; | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
char text[MAX_BRACES + 1]; | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
char text[braces + 1]; | |||||
for (i = 0; i < braces; i++) text[i] = *"{"; | for (i = 0; i < braces; i++) text[i] = *"{"; | ||||
text[braces] = *""; | text[braces] = *""; | ||||
if (Tokenizer_write_text_then_stack(self, text)) { | if (Tokenizer_write_text_then_stack(self, text)) { | ||||
@@ -632,9 +646,10 @@ Tokenizer_handle_template_end(Tokenizer* self) | |||||
static int | static int | ||||
Tokenizer_handle_argument_separator(Tokenizer* self) | Tokenizer_handle_argument_separator(Tokenizer* self) | ||||
{ | { | ||||
PyObject* token; | |||||
self->topstack->context ^= LC_ARGUMENT_NAME; | self->topstack->context ^= LC_ARGUMENT_NAME; | ||||
self->topstack->context |= LC_ARGUMENT_DEFAULT; | self->topstack->context |= LC_ARGUMENT_DEFAULT; | ||||
PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL); | |||||
token = PyObject_CallObject(ArgumentSeparator, NULL); | |||||
if (!token) | if (!token) | ||||
return -1; | return -1; | ||||
if (Tokenizer_write(self, token)) { | if (Tokenizer_write(self, token)) { | ||||
@@ -651,8 +666,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self) | |||||
static PyObject* | static PyObject* | ||||
Tokenizer_handle_argument_end(Tokenizer* self) | Tokenizer_handle_argument_end(Tokenizer* self) | ||||
{ | { | ||||
self->head += 2; | |||||
PyObject* stack = Tokenizer_pop(self); | PyObject* stack = Tokenizer_pop(self); | ||||
self->head += 2; | |||||
return stack; | return stack; | ||||
} | } | ||||
@@ -713,9 +728,10 @@ Tokenizer_parse_wikilink(Tokenizer* self) | |||||
static int | static int | ||||
Tokenizer_handle_wikilink_separator(Tokenizer* self) | Tokenizer_handle_wikilink_separator(Tokenizer* self) | ||||
{ | { | ||||
PyObject* token; | |||||
self->topstack->context ^= LC_WIKILINK_TITLE; | self->topstack->context ^= LC_WIKILINK_TITLE; | ||||
self->topstack->context |= LC_WIKILINK_TEXT; | self->topstack->context |= LC_WIKILINK_TEXT; | ||||
PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL); | |||||
token = PyObject_CallObject(WikilinkSeparator, NULL); | |||||
if (!token) | if (!token) | ||||
return -1; | return -1; | ||||
if (Tokenizer_write(self, token)) { | if (Tokenizer_write(self, token)) { | ||||
@@ -732,8 +748,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) | |||||
static PyObject* | static PyObject* | ||||
Tokenizer_handle_wikilink_end(Tokenizer* self) | Tokenizer_handle_wikilink_end(Tokenizer* self) | ||||
{ | { | ||||
self->head += 1; | |||||
PyObject* stack = Tokenizer_pop(self); | PyObject* stack = Tokenizer_pop(self); | ||||
self->head += 1; | |||||
return stack; | return stack; | ||||
} | } | ||||
@@ -759,11 +775,10 @@ Tokenizer_parse_heading(Tokenizer* self) | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset + best - 1; | self->head = reset + best - 1; | ||||
char text[best + 1]; | |||||
for (i = 0; i < best; i++) text[i] = *"="; | |||||
text[best] = *""; | |||||
if (Tokenizer_write_text_then_stack(self, text)) | |||||
return -1; | |||||
for (i = 0; i < best; i++) { | |||||
if (Tokenizer_write_text(self, *"=")) | |||||
return -1; | |||||
} | |||||
self->global ^= GL_HEADING; | self->global ^= GL_HEADING; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -798,13 +813,12 @@ Tokenizer_parse_heading(Tokenizer* self) | |||||
Py_DECREF(token); | Py_DECREF(token); | ||||
if (heading->level < best) { | if (heading->level < best) { | ||||
diff = best - heading->level; | diff = best - heading->level; | ||||
char difftext[diff + 1]; | |||||
for (i = 0; i < diff; i++) difftext[i] = *"="; | |||||
difftext[diff] = *""; | |||||
if (Tokenizer_write_text_then_stack(self, difftext)) { | |||||
Py_DECREF(heading->title); | |||||
free(heading); | |||||
return -1; | |||||
for (i = 0; i < diff; i++) { | |||||
if (Tokenizer_write_text(self, *"=")) { | |||||
Py_DECREF(heading->title); | |||||
free(heading); | |||||
return -1; | |||||
} | |||||
} | } | ||||
} | } | ||||
if (Tokenizer_write_all(self, heading->title)) { | if (Tokenizer_write_all(self, heading->title)) { | ||||
@@ -844,28 +858,27 @@ Tokenizer_handle_heading_end(Tokenizer* self) | |||||
self->head++; | self->head++; | ||||
} | } | ||||
current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; | current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; | ||||
level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); | |||||
level = current > best ? (best > 6 ? 6 : best) : | |||||
(current > 6 ? 6 : current); | |||||
after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); | after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
if (level < best) { | if (level < best) { | ||||
diff = best - level; | diff = best - level; | ||||
char difftext[diff + 1]; | |||||
for (i = 0; i < diff; i++) difftext[i] = *"="; | |||||
difftext[diff] = *""; | |||||
if (Tokenizer_write_text_then_stack(self, difftext)) | |||||
return NULL; | |||||
for (i = 0; i < diff; i++) { | |||||
if (Tokenizer_write_text(self, *"=")) | |||||
return NULL; | |||||
} | |||||
} | } | ||||
self->head = reset + best - 1; | self->head = reset + best - 1; | ||||
} | } | ||||
else { | else { | ||||
char text[best + 1]; | |||||
for (i = 0; i < best; i++) text[i] = *"="; | |||||
text[best] = *""; | |||||
if (Tokenizer_write_text_then_stack(self, text)) { | |||||
Py_DECREF(after->title); | |||||
free(after); | |||||
return NULL; | |||||
for (i = 0; i < best; i++) { | |||||
if (Tokenizer_write_text(self, *"=")) { | |||||
Py_DECREF(after->title); | |||||
free(after); | |||||
return NULL; | |||||
} | |||||
} | } | ||||
if (Tokenizer_write_all(self, after->title)) { | if (Tokenizer_write_all(self, after->title)) { | ||||
Py_DECREF(after->title); | Py_DECREF(after->title); | ||||
@@ -897,8 +910,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) | |||||
{ | { | ||||
PyObject *token, *kwargs, *textobj; | PyObject *token, *kwargs, *textobj; | ||||
Py_UNICODE this; | Py_UNICODE this; | ||||
int numeric, hexadecimal, i, j, test; | |||||
char *valid, *text, *def; | |||||
int numeric, hexadecimal, i, j, zeroes, test; | |||||
char *valid, *text, *buffer, *def; | |||||
#define FAIL_ROUTE_AND_EXIT() { \ | #define FAIL_ROUTE_AND_EXIT() { \ | ||||
Tokenizer_fail_route(self); \ | Tokenizer_fail_route(self); \ | ||||
@@ -959,17 +972,18 @@ Tokenizer_really_parse_entity(Tokenizer* self) | |||||
else | else | ||||
numeric = hexadecimal = 0; | numeric = hexadecimal = 0; | ||||
if (hexadecimal) | if (hexadecimal) | ||||
valid = "0123456789abcdefABCDEF"; | |||||
valid = HEXDIGITS; | |||||
else if (numeric) | else if (numeric) | ||||
valid = "0123456789"; | |||||
valid = DIGITS; | |||||
else | else | ||||
valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |||||
valid = ALPHANUM; | |||||
text = calloc(MAX_ENTITY_SIZE, sizeof(char)); | text = calloc(MAX_ENTITY_SIZE, sizeof(char)); | ||||
if (!text) { | if (!text) { | ||||
PyErr_NoMemory(); | PyErr_NoMemory(); | ||||
return -1; | return -1; | ||||
} | } | ||||
i = 0; | i = 0; | ||||
zeroes = 0; | |||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
if (this == *";") { | if (this == *";") { | ||||
@@ -978,6 +992,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) | |||||
break; | break; | ||||
} | } | ||||
if (i == 0 && this == *"0") { | if (i == 0 && this == *"0") { | ||||
zeroes++; | |||||
self->head++; | self->head++; | ||||
continue; | continue; | ||||
} | } | ||||
@@ -1008,13 +1023,26 @@ Tokenizer_really_parse_entity(Tokenizer* self) | |||||
i = 0; | i = 0; | ||||
while (1) { | while (1) { | ||||
def = entitydefs[i]; | def = entitydefs[i]; | ||||
if (!def) // We've reached the end of the def list without finding it | |||||
if (!def) // We've reached the end of the defs without finding it | |||||
FAIL_ROUTE_AND_EXIT() | FAIL_ROUTE_AND_EXIT() | ||||
if (strcmp(text, def) == 0) | if (strcmp(text, def) == 0) | ||||
break; | break; | ||||
i++; | i++; | ||||
} | } | ||||
} | } | ||||
if (zeroes) { | |||||
buffer = calloc(strlen(text) + zeroes + 1, sizeof(char)); | |||||
if (!buffer) { | |||||
free(text); | |||||
PyErr_NoMemory(); | |||||
return -1; | |||||
} | |||||
for (i = 0; i < zeroes; i++) | |||||
strcat(buffer, "0"); | |||||
strcat(buffer, text); | |||||
free(text); | |||||
text = buffer; | |||||
} | |||||
textobj = PyUnicode_FromString(text); | textobj = PyUnicode_FromString(text); | ||||
if (!textobj) { | if (!textobj) { | ||||
free(text); | free(text); | ||||
@@ -1092,9 +1120,9 @@ Tokenizer_parse_comment(Tokenizer* self) | |||||
self->head += 4; | self->head += 4; | ||||
comment = Tokenizer_parse(self, LC_COMMENT); | comment = Tokenizer_parse(self, LC_COMMENT); | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
const char* text = "<!--"; | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset; | self->head = reset; | ||||
const char* text = "<!--"; | |||||
i = 0; | i = 0; | ||||
while (1) { | while (1) { | ||||
if (!text[i]) | if (!text[i]) | ||||
@@ -1138,41 +1166,59 @@ Tokenizer_parse_comment(Tokenizer* self) | |||||
} | } | ||||
/* | /* | ||||
Make sure we are not trying to write an invalid character. | |||||
Make sure we are not trying to write an invalid character. Return 0 if | |||||
everything is safe, or -1 if the route must be failed. | |||||
*/ | */ | ||||
static void | |||||
static int | |||||
Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | ||||
{ | { | ||||
if (context & LC_FAIL_NEXT) { | if (context & LC_FAIL_NEXT) { | ||||
Tokenizer_fail_route(self); | |||||
return; | |||||
return -1; | |||||
} | } | ||||
if (context & (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE)) { | |||||
if (data == *"{" || data == *"}" || data == *"[" || data == *"]") { | |||||
if (context & LC_WIKILINK_TITLE) { | |||||
if (data == *"]" || data == *"{") | |||||
self->topstack->context |= LC_FAIL_NEXT; | self->topstack->context |= LC_FAIL_NEXT; | ||||
return; | |||||
else if (data == *"\n" || data == *"[" || data == *"}") | |||||
return -1; | |||||
return 0; | |||||
} | |||||
if (context & LC_TEMPLATE_NAME) { | |||||
if (data == *"{" || data == *"}" || data == *"[") { | |||||
self->topstack->context |= LC_FAIL_NEXT; | |||||
return 0; | |||||
} | } | ||||
if (data == *"|") { | |||||
if (data == *"]") { | |||||
return -1; | |||||
} | |||||
if (data == *"|") | |||||
return 0; | |||||
if (context & LC_HAS_TEXT) { | |||||
if (context & LC_FAIL_ON_TEXT) { | if (context & LC_FAIL_ON_TEXT) { | ||||
self->topstack->context ^= LC_FAIL_ON_TEXT; | |||||
return; | |||||
if (!Py_UNICODE_ISSPACE(data)) | |||||
return -1; | |||||
} | |||||
else { | |||||
if (data == *"\n") | |||||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||||
} | } | ||||
} | } | ||||
else if (!Py_UNICODE_ISSPACE(data)) | |||||
self->topstack->context |= LC_HAS_TEXT; | |||||
} | } | ||||
else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) { | |||||
else { | |||||
if (context & LC_FAIL_ON_EQUALS) { | if (context & LC_FAIL_ON_EQUALS) { | ||||
if (data == *"=") { | if (data == *"=") { | ||||
Tokenizer_fail_route(self); | |||||
return; | |||||
return -1; | |||||
} | } | ||||
} | } | ||||
else if (context & LC_FAIL_ON_LBRACE) { | else if (context & LC_FAIL_ON_LBRACE) { | ||||
if (data == *"{") { | |||||
if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && | |||||
Tokenizer_READ(self, -2) == *"{")) { | |||||
if (context & LC_TEMPLATE) | if (context & LC_TEMPLATE) | ||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | self->topstack->context |= LC_FAIL_ON_EQUALS; | ||||
else | else | ||||
self->topstack->context |= LC_FAIL_NEXT; | self->topstack->context |= LC_FAIL_NEXT; | ||||
return; | |||||
return 0; | |||||
} | } | ||||
self->topstack->context ^= LC_FAIL_ON_LBRACE; | self->topstack->context ^= LC_FAIL_ON_LBRACE; | ||||
} | } | ||||
@@ -1182,7 +1228,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | self->topstack->context |= LC_FAIL_ON_EQUALS; | ||||
else | else | ||||
self->topstack->context |= LC_FAIL_NEXT; | self->topstack->context |= LC_FAIL_NEXT; | ||||
return; | |||||
return 0; | |||||
} | } | ||||
self->topstack->context ^= LC_FAIL_ON_RBRACE; | self->topstack->context ^= LC_FAIL_ON_RBRACE; | ||||
} | } | ||||
@@ -1191,25 +1237,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
else if (data == *"}") | else if (data == *"}") | ||||
self->topstack->context |= LC_FAIL_ON_RBRACE; | self->topstack->context |= LC_FAIL_ON_RBRACE; | ||||
} | } | ||||
if (context & LC_HAS_TEXT) { | |||||
if (context & LC_FAIL_ON_TEXT) { | |||||
if (!Py_UNICODE_ISSPACE(data)) { | |||||
if (context & LC_TEMPLATE_PARAM_KEY) { | |||||
self->topstack->context ^= LC_FAIL_ON_TEXT; | |||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||||
} | |||||
else | |||||
Tokenizer_fail_route(self); | |||||
return; | |||||
} | |||||
} | |||||
else { | |||||
if (data == *"\n") | |||||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||||
} | |||||
} | |||||
else if (!Py_UNICODE_ISSPACE(data)) | |||||
self->topstack->context |= LC_HAS_TEXT; | |||||
return 0; | |||||
} | } | ||||
/* | /* | ||||
@@ -1232,12 +1260,12 @@ Tokenizer_parse(Tokenizer* self, int context) | |||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
this_context = self->topstack->context; | this_context = self->topstack->context; | ||||
if (this_context & unsafe_contexts) { | if (this_context & unsafe_contexts) { | ||||
Tokenizer_verify_safe(self, this_context, this); | |||||
if (BAD_ROUTE) { | |||||
if (Tokenizer_verify_safe(self, this_context, this) < 0) { | |||||
if (this_context & LC_TEMPLATE_PARAM_KEY) { | if (this_context & LC_TEMPLATE_PARAM_KEY) { | ||||
trash = Tokenizer_pop(self); | trash = Tokenizer_pop(self); | ||||
Py_XDECREF(trash); | Py_XDECREF(trash); | ||||
} | } | ||||
Tokenizer_fail_route(self); | |||||
return NULL; | return NULL; | ||||
} | } | ||||
} | } | ||||
@@ -1271,10 +1299,14 @@ Tokenizer_parse(Tokenizer* self, int context) | |||||
Tokenizer_write_text(self, this); | Tokenizer_write_text(self, this); | ||||
} | } | ||||
else if (this == next && next == *"{") { | else if (this == next && next == *"{") { | ||||
if (Tokenizer_parse_template_or_argument(self)) | |||||
return NULL; | |||||
if (self->topstack->context & LC_FAIL_NEXT) | |||||
self->topstack->context ^= LC_FAIL_NEXT; | |||||
if (Tokenizer_CAN_RECURSE(self)) { | |||||
if (Tokenizer_parse_template_or_argument(self)) | |||||
return NULL; | |||||
if (self->topstack->context & LC_FAIL_NEXT) | |||||
self->topstack->context ^= LC_FAIL_NEXT; | |||||
} | |||||
else | |||||
Tokenizer_write_text(self, this); | |||||
} | } | ||||
else if (this == *"|" && this_context & LC_TEMPLATE) { | else if (this == *"|" && this_context & LC_TEMPLATE) { | ||||
if (Tokenizer_handle_template_param(self)) | if (Tokenizer_handle_template_param(self)) | ||||
@@ -1297,15 +1329,15 @@ Tokenizer_parse(Tokenizer* self, int context) | |||||
Tokenizer_write_text(self, this); | Tokenizer_write_text(self, this); | ||||
} | } | ||||
else if (this == next && next == *"[") { | else if (this == next && next == *"[") { | ||||
if (!(this_context & LC_WIKILINK_TITLE)) { | |||||
if (!(this_context & LC_WIKILINK_TITLE) && | |||||
Tokenizer_CAN_RECURSE(self)) { | |||||
if (Tokenizer_parse_wikilink(self)) | if (Tokenizer_parse_wikilink(self)) | ||||
return NULL; | return NULL; | ||||
if (self->topstack->context & LC_FAIL_NEXT) | if (self->topstack->context & LC_FAIL_NEXT) | ||||
self->topstack->context ^= LC_FAIL_NEXT; | self->topstack->context ^= LC_FAIL_NEXT; | ||||
} | } | ||||
else { | |||||
else | |||||
Tokenizer_write_text(self, this); | Tokenizer_write_text(self, this); | ||||
} | |||||
} | } | ||||
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { | else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { | ||||
if (Tokenizer_handle_wikilink_separator(self)) | if (Tokenizer_handle_wikilink_separator(self)) | ||||
@@ -1354,10 +1386,10 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
PyObject *text, *temp; | PyObject *text, *temp; | ||||
if (!PyArg_ParseTuple(args, "U", &text)) { | if (!PyArg_ParseTuple(args, "U", &text)) { | ||||
/* Failed to parse a Unicode object; try a string instead. */ | |||||
PyErr_Clear(); | |||||
const char* encoded; | const char* encoded; | ||||
Py_ssize_t size; | Py_ssize_t size; | ||||
/* Failed to parse a Unicode object; try a string instead. */ | |||||
PyErr_Clear(); | |||||
if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) | if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) | ||||
return NULL; | return NULL; | ||||
temp = PyUnicode_FromStringAndSize(encoded, size); | temp = PyUnicode_FromStringAndSize(encoded, size); | ||||
@@ -1379,7 +1411,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
PyMODINIT_FUNC | PyMODINIT_FUNC | ||||
init_tokenizer(void) | init_tokenizer(void) | ||||
{ | { | ||||
PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname; | |||||
PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals, | |||||
*fromlist, *modname; | |||||
unsigned numdefs, i; | unsigned numdefs, i; | ||||
char *name; | char *name; | ||||
@@ -1389,14 +1422,16 @@ init_tokenizer(void) | |||||
module = Py_InitModule("_tokenizer", module_methods); | module = Py_InitModule("_tokenizer", module_methods); | ||||
Py_INCREF(&TokenizerType); | Py_INCREF(&TokenizerType); | ||||
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | ||||
Py_INCREF(Py_True); | |||||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | |||||
tempmodule = PyImport_ImportModule("htmlentitydefs"); | |||||
if (!tempmodule) | |||||
tempmod = PyImport_ImportModule("htmlentitydefs"); | |||||
if (!tempmod) | |||||
return; | return; | ||||
defmap = PyObject_GetAttrString(tempmodule, "entitydefs"); | |||||
defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | |||||
if (!defmap) | if (!defmap) | ||||
return; | return; | ||||
Py_DECREF(tempmodule); | |||||
Py_DECREF(tempmod); | |||||
deflist = PyDict_Keys(defmap); | deflist = PyDict_Keys(defmap); | ||||
if (!deflist) | if (!deflist) | ||||
return; | return; | ||||
@@ -1420,18 +1455,20 @@ init_tokenizer(void) | |||||
if (!modname) | if (!modname) | ||||
return; | return; | ||||
PyList_SET_ITEM(fromlist, 0, modname); | PyList_SET_ITEM(fromlist, 0, modname); | ||||
tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||||
Py_DECREF(fromlist); | Py_DECREF(fromlist); | ||||
if (!tempmodule) | |||||
if (!tempmod) | |||||
return; | return; | ||||
tokens = PyObject_GetAttrString(tempmodule, "tokens"); | |||||
Py_DECREF(tempmodule); | |||||
tokens = PyObject_GetAttrString(tempmod, "tokens"); | |||||
Py_DECREF(tempmod); | |||||
Text = PyObject_GetAttrString(tokens, "Text"); | Text = PyObject_GetAttrString(tokens, "Text"); | ||||
TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen"); | TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen"); | ||||
TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); | |||||
TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals"); | |||||
TemplateParamSeparator = PyObject_GetAttrString(tokens, | |||||
"TemplateParamSeparator"); | |||||
TemplateParamEquals = PyObject_GetAttrString(tokens, | |||||
"TemplateParamEquals"); | |||||
TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose"); | TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose"); | ||||
ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen"); | ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen"); | ||||
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
Tokenizer Header File for MWParserFromHell | Tokenizer Header File for MWParserFromHell | ||||
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -36,12 +36,19 @@ SOFTWARE. | |||||
#define malloc PyObject_Malloc | #define malloc PyObject_Malloc | ||||
#define free PyObject_Free | #define free PyObject_Free | ||||
#define DIGITS "0123456789" | |||||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||||
static const char* MARKERS[] = { | static const char* MARKERS[] = { | ||||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", | "{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", | ||||
"!", "\n", ""}; | "!", "\n", ""}; | ||||
#define NUM_MARKERS 18 | #define NUM_MARKERS 18 | ||||
#define TEXTBUFFER_BLOCKSIZE 1024 | #define TEXTBUFFER_BLOCKSIZE 1024 | ||||
#define MAX_DEPTH 40 | |||||
#define MAX_CYCLES 100000 | |||||
#define MAX_BRACES 255 | |||||
#define MAX_ENTITY_SIZE 8 | #define MAX_ENTITY_SIZE 8 | ||||
static int route_state = 0; | static int route_state = 0; | ||||
@@ -118,6 +125,7 @@ static PyObject* TagCloseClose; | |||||
#define LC_COMMENT 0x02000 | #define LC_COMMENT 0x02000 | ||||
#define LC_SAFETY_CHECK 0xFC000 | |||||
#define LC_HAS_TEXT 0x04000 | #define LC_HAS_TEXT 0x04000 | ||||
#define LC_FAIL_ON_TEXT 0x08000 | #define LC_FAIL_ON_TEXT 0x08000 | ||||
#define LC_FAIL_NEXT 0x10000 | #define LC_FAIL_NEXT 0x10000 | ||||
@@ -160,12 +168,15 @@ typedef struct { | |||||
Py_ssize_t head; /* current position in text */ | Py_ssize_t head; /* current position in text */ | ||||
Py_ssize_t length; /* length of text */ | Py_ssize_t length; /* length of text */ | ||||
int global; /* global context */ | int global; /* global context */ | ||||
int depth; /* stack recursion depth */ | |||||
int cycles; /* total number of stack recursions */ | |||||
} Tokenizer; | } Tokenizer; | ||||
/* Macros for accessing Tokenizer data: */ | /* Macros for accessing Tokenizer data: */ | ||||
#define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | #define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) | ||||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) | |||||
/* Function prototypes: */ | /* Function prototypes: */ | ||||
@@ -205,7 +216,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*); | |||||
static int Tokenizer_really_parse_entity(Tokenizer*); | static int Tokenizer_really_parse_entity(Tokenizer*); | ||||
static int Tokenizer_parse_entity(Tokenizer*); | static int Tokenizer_parse_entity(Tokenizer*); | ||||
static int Tokenizer_parse_comment(Tokenizer*); | static int Tokenizer_parse_comment(Tokenizer*); | ||||
static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); | |||||
static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); | |||||
static PyObject* Tokenizer_parse(Tokenizer*, int); | static PyObject* Tokenizer_parse(Tokenizer*, int); | ||||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -23,7 +23,6 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from math import log | from math import log | ||||
import re | import re | ||||
import string | |||||
from . import contexts | from . import contexts | ||||
from . import tokens | from . import tokens | ||||
@@ -39,10 +38,13 @@ class BadRoute(Exception): | |||||
class Tokenizer(object): | class Tokenizer(object): | ||||
"""Creates a list of tokens from a string of wikicode.""" | """Creates a list of tokens from a string of wikicode.""" | ||||
USES_C = False | |||||
START = object() | START = object() | ||||
END = object() | END = object() | ||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | ||||
"/", "-", "!", "\n", END] | "/", "-", "!", "\n", END] | ||||
MAX_DEPTH = 40 | |||||
MAX_CYCLES = 100000 | |||||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) | regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) | ||||
def __init__(self): | def __init__(self): | ||||
@@ -50,6 +52,8 @@ class Tokenizer(object): | |||||
self._head = 0 | self._head = 0 | ||||
self._stacks = [] | self._stacks = [] | ||||
self._global = 0 | self._global = 0 | ||||
self._depth = 0 | |||||
self._cycles = 0 | |||||
@property | @property | ||||
def _stack(self): | def _stack(self): | ||||
@@ -77,6 +81,8 @@ class Tokenizer(object): | |||||
def _push(self, context=0): | def _push(self, context=0): | ||||
"""Add a new token stack, context, and textbuffer to the list.""" | """Add a new token stack, context, and textbuffer to the list.""" | ||||
self._stacks.append([[], context, []]) | self._stacks.append([[], context, []]) | ||||
self._depth += 1 | |||||
self._cycles += 1 | |||||
def _push_textbuffer(self): | def _push_textbuffer(self): | ||||
"""Push the textbuffer onto the stack as a Text node and clear it.""" | """Push the textbuffer onto the stack as a Text node and clear it.""" | ||||
@@ -91,6 +97,7 @@ class Tokenizer(object): | |||||
stack's context with the current stack's. | stack's context with the current stack's. | ||||
""" | """ | ||||
self._push_textbuffer() | self._push_textbuffer() | ||||
self._depth -= 1 | |||||
if keep_context: | if keep_context: | ||||
context = self._context | context = self._context | ||||
stack = self._stacks.pop()[0] | stack = self._stacks.pop()[0] | ||||
@@ -98,6 +105,10 @@ class Tokenizer(object): | |||||
return stack | return stack | ||||
return self._stacks.pop()[0] | return self._stacks.pop()[0] | ||||
def _can_recurse(self): | |||||
"""Return whether or not our max recursion depth has been exceeded.""" | |||||
return self._depth < self.MAX_DEPTH and self._cycles < self.MAX_CYCLES | |||||
def _fail_route(self): | def _fail_route(self): | ||||
"""Fail the current tokenization route. | """Fail the current tokenization route. | ||||
@@ -214,24 +225,9 @@ class Tokenizer(object): | |||||
self._write_all(argument) | self._write_all(argument) | ||||
self._write(tokens.ArgumentClose()) | self._write(tokens.ArgumentClose()) | ||||
def _verify_safe(self, unsafes): | |||||
"""Verify that there are no unsafe characters in the current stack. | |||||
The route will be failed if the name contains any element of *unsafes* | |||||
in it (not merely at the beginning or end). This is used when parsing a | |||||
template name or parameter key, which cannot contain newlines. | |||||
""" | |||||
self._push_textbuffer() | |||||
if self._stack: | |||||
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] | |||||
text = "".join([token.text for token in text]).strip() | |||||
if text and any([unsafe in text for unsafe in unsafes]): | |||||
self._fail_route() | |||||
def _handle_template_param(self): | def _handle_template_param(self): | ||||
"""Handle a template parameter at the head of the string.""" | """Handle a template parameter at the head of the string.""" | ||||
if self._context & contexts.TEMPLATE_NAME: | if self._context & contexts.TEMPLATE_NAME: | ||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._context ^= contexts.TEMPLATE_NAME | self._context ^= contexts.TEMPLATE_NAME | ||||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | elif self._context & contexts.TEMPLATE_PARAM_VALUE: | ||||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | self._context ^= contexts.TEMPLATE_PARAM_VALUE | ||||
@@ -243,11 +239,6 @@ class Tokenizer(object): | |||||
def _handle_template_param_value(self): | def _handle_template_param_value(self): | ||||
"""Handle a template parameter's value at the head of the string.""" | """Handle a template parameter's value at the head of the string.""" | ||||
try: | |||||
self._verify_safe(["\n", "{{", "}}"]) | |||||
except BadRoute: | |||||
self._pop() | |||||
raise | |||||
self._write_all(self._pop(keep_context=True)) | self._write_all(self._pop(keep_context=True)) | ||||
self._context ^= contexts.TEMPLATE_PARAM_KEY | self._context ^= contexts.TEMPLATE_PARAM_KEY | ||||
self._context |= contexts.TEMPLATE_PARAM_VALUE | self._context |= contexts.TEMPLATE_PARAM_VALUE | ||||
@@ -255,24 +246,19 @@ class Tokenizer(object): | |||||
def _handle_template_end(self): | def _handle_template_end(self): | ||||
"""Handle the end of a template at the head of the string.""" | """Handle the end of a template at the head of the string.""" | ||||
if self._context & contexts.TEMPLATE_NAME: | |||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
self._write_all(self._pop(keep_context=True)) | self._write_all(self._pop(keep_context=True)) | ||||
self._head += 1 | self._head += 1 | ||||
return self._pop() | return self._pop() | ||||
def _handle_argument_separator(self): | def _handle_argument_separator(self): | ||||
"""Handle the separator between an argument's name and default.""" | """Handle the separator between an argument's name and default.""" | ||||
self._verify_safe(["\n", "{{", "}}"]) | |||||
self._context ^= contexts.ARGUMENT_NAME | self._context ^= contexts.ARGUMENT_NAME | ||||
self._context |= contexts.ARGUMENT_DEFAULT | self._context |= contexts.ARGUMENT_DEFAULT | ||||
self._write(tokens.ArgumentSeparator()) | self._write(tokens.ArgumentSeparator()) | ||||
def _handle_argument_end(self): | def _handle_argument_end(self): | ||||
"""Handle the end of an argument at the head of the string.""" | """Handle the end of an argument at the head of the string.""" | ||||
if self._context & contexts.ARGUMENT_NAME: | |||||
self._verify_safe(["\n", "{{", "}}"]) | |||||
self._head += 2 | self._head += 2 | ||||
return self._pop() | return self._pop() | ||||
@@ -292,15 +278,12 @@ class Tokenizer(object): | |||||
def _handle_wikilink_separator(self): | def _handle_wikilink_separator(self): | ||||
"""Handle the separator between a wikilink's title and its text.""" | """Handle the separator between a wikilink's title and its text.""" | ||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._context ^= contexts.WIKILINK_TITLE | self._context ^= contexts.WIKILINK_TITLE | ||||
self._context |= contexts.WIKILINK_TEXT | self._context |= contexts.WIKILINK_TEXT | ||||
self._write(tokens.WikilinkSeparator()) | self._write(tokens.WikilinkSeparator()) | ||||
def _handle_wikilink_end(self): | def _handle_wikilink_end(self): | ||||
"""Handle the end of a wikilink at the head of the string.""" | """Handle the end of a wikilink at the head of the string.""" | ||||
if self._context & contexts.WIKILINK_TITLE: | |||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._head += 1 | self._head += 1 | ||||
return self._pop() | return self._pop() | ||||
@@ -340,14 +323,14 @@ class Tokenizer(object): | |||||
current = int(log(self._context / contexts.HEADING_LEVEL_1, 2)) + 1 | current = int(log(self._context / contexts.HEADING_LEVEL_1, 2)) + 1 | ||||
level = min(current, min(best, 6)) | level = min(current, min(best, 6)) | ||||
try: | |||||
try: # Try to check for a heading closure after this one | |||||
after, after_level = self._parse(self._context) | after, after_level = self._parse(self._context) | ||||
except BadRoute: | except BadRoute: | ||||
if level < best: | if level < best: | ||||
self._write_text("=" * (best - level)) | self._write_text("=" * (best - level)) | ||||
self._head = reset + best - 1 | self._head = reset + best - 1 | ||||
return self._pop(), level | return self._pop(), level | ||||
else: | |||||
else: # Found another closure | |||||
self._write_text("=" * best) | self._write_text("=" * best) | ||||
self._write_all(after) | self._write_all(after) | ||||
return self._pop(), after_level | return self._pop(), after_level | ||||
@@ -374,9 +357,9 @@ class Tokenizer(object): | |||||
else: | else: | ||||
numeric = hexadecimal = False | numeric = hexadecimal = False | ||||
valid = string.hexdigits if hexadecimal else string.digits | |||||
valid = "0123456789abcdefABCDEF" if hexadecimal else "0123456789" | |||||
if not numeric and not hexadecimal: | if not numeric and not hexadecimal: | ||||
valid += string.ascii_letters | |||||
valid += "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||||
if not all([char in valid for char in this]): | if not all([char in valid for char in this]): | ||||
self._fail_route() | self._fail_route() | ||||
@@ -608,11 +591,73 @@ class Tokenizer(object): | |||||
self._write(tokens.TagCloseClose()) | self._write(tokens.TagCloseClose()) | ||||
return self._pop() | return self._pop() | ||||
def _verify_safe(self, this): | |||||
"""Make sure we are not trying to write an invalid character.""" | |||||
context = self._context | |||||
if context & contexts.FAIL_NEXT: | |||||
return False | |||||
if context & contexts.WIKILINK_TITLE: | |||||
if this == "]" or this == "{": | |||||
self._context |= contexts.FAIL_NEXT | |||||
elif this == "\n" or this == "[" or this == "}": | |||||
return False | |||||
return True | |||||
if context & contexts.TEMPLATE_NAME: | |||||
if this == "{" or this == "}" or this == "[": | |||||
self._context |= contexts.FAIL_NEXT | |||||
return True | |||||
if this == "]": | |||||
return False | |||||
if this == "|": | |||||
return True | |||||
if context & contexts.HAS_TEXT: | |||||
if context & contexts.FAIL_ON_TEXT: | |||||
if this is self.END or not this.isspace(): | |||||
return False | |||||
else: | |||||
if this == "\n": | |||||
self._context |= contexts.FAIL_ON_TEXT | |||||
elif this is self.END or not this.isspace(): | |||||
self._context |= contexts.HAS_TEXT | |||||
return True | |||||
else: | |||||
if context & contexts.FAIL_ON_EQUALS: | |||||
if this == "=": | |||||
return False | |||||
elif context & contexts.FAIL_ON_LBRACE: | |||||
if this == "{" or (self._read(-1) == self._read(-2) == "{"): | |||||
if context & contexts.TEMPLATE: | |||||
self._context |= contexts.FAIL_ON_EQUALS | |||||
else: | |||||
self._context |= contexts.FAIL_NEXT | |||||
return True | |||||
self._context ^= contexts.FAIL_ON_LBRACE | |||||
elif context & contexts.FAIL_ON_RBRACE: | |||||
if this == "}": | |||||
if context & contexts.TEMPLATE: | |||||
self._context |= contexts.FAIL_ON_EQUALS | |||||
else: | |||||
self._context |= contexts.FAIL_NEXT | |||||
return True | |||||
self._context ^= contexts.FAIL_ON_RBRACE | |||||
elif this == "{": | |||||
self._context |= contexts.FAIL_ON_LBRACE | |||||
elif this == "}": | |||||
self._context |= contexts.FAIL_ON_RBRACE | |||||
return True | |||||
def _parse(self, context=0): | def _parse(self, context=0): | ||||
"""Parse the wikicode string, using *context* for when to stop.""" | """Parse the wikicode string, using *context* for when to stop.""" | ||||
self._push(context) | self._push(context) | ||||
while True: | while True: | ||||
this = self._read() | this = self._read() | ||||
unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | | |||||
contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) | |||||
if self._context & unsafe: | |||||
if not self._verify_safe(this): | |||||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
self._pop() | |||||
self._fail_route() | |||||
if this not in self.MARKERS: | if this not in self.MARKERS: | ||||
if self._context & contexts.TAG_OPEN: | if self._context & contexts.TAG_OPEN: | ||||
should_exit = self._handle_tag_chunk(this) | should_exit = self._handle_tag_chunk(this) | ||||
@@ -641,7 +686,12 @@ class Tokenizer(object): | |||||
else: | else: | ||||
self._write_text(this) | self._write_text(this) | ||||
elif this == next == "{": | elif this == next == "{": | ||||
self._parse_template_or_argument() | |||||
if self._can_recurse(): | |||||
self._parse_template_or_argument() | |||||
if self._context & contexts.FAIL_NEXT: | |||||
self._context ^= contexts.FAIL_NEXT | |||||
else: | |||||
self._write_text("{") | |||||
elif this == "|" and self._context & contexts.TEMPLATE: | elif this == "|" and self._context & contexts.TEMPLATE: | ||||
self._handle_template_param() | self._handle_template_param() | ||||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | ||||
@@ -656,8 +706,10 @@ class Tokenizer(object): | |||||
else: | else: | ||||
self._write_text("}") | self._write_text("}") | ||||
elif this == next == "[": | elif this == next == "[": | ||||
if not self._context & contexts.WIKILINK_TITLE: | |||||
if not self._context & contexts.WIKILINK_TITLE and self._can_recurse(): | |||||
self._parse_wikilink() | self._parse_wikilink() | ||||
if self._context & contexts.FAIL_NEXT: | |||||
self._context ^= contexts.FAIL_NEXT | |||||
else: | else: | ||||
self._write_text("[") | self._write_text("[") | ||||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | elif this == "|" and self._context & contexts.WIKILINK_TITLE: | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -41,8 +41,23 @@ def inheritdoc(method): | |||||
method.__doc__ = getattr(list, method.__name__).__doc__ | method.__doc__ = getattr(list, method.__name__).__doc__ | ||||
return method | return method | ||||
class _SliceNormalizerMixIn(object): | |||||
"""MixIn that provides a private method to normalize slices.""" | |||||
class SmartList(list): | |||||
def _normalize_slice(self, key): | |||||
"""Return a slice equivalent to the input *key*, standardized.""" | |||||
if key.start is not None: | |||||
start = (len(self) + key.start) if key.start < 0 else key.start | |||||
else: | |||||
start = 0 | |||||
if key.stop is not None: | |||||
stop = (len(self) + key.stop) if key.stop < 0 else key.stop | |||||
else: | |||||
stop = maxsize | |||||
return slice(start, stop, key.step or 1) | |||||
class SmartList(_SliceNormalizerMixIn, list): | |||||
"""Implements the ``list`` interface with special handling of sublists. | """Implements the ``list`` interface with special handling of sublists. | ||||
When a sublist is created (by ``list[i:j]``), any changes made to this | When a sublist is created (by ``list[i:j]``), any changes made to this | ||||
@@ -76,7 +91,8 @@ class SmartList(list): | |||||
def __getitem__(self, key): | def __getitem__(self, key): | ||||
if not isinstance(key, slice): | if not isinstance(key, slice): | ||||
return super(SmartList, self).__getitem__(key) | return super(SmartList, self).__getitem__(key) | ||||
sliceinfo = [key.start, key.stop, 1 if not key.step else key.step] | |||||
key = self._normalize_slice(key) | |||||
sliceinfo = [key.start, key.stop, key.step] | |||||
child = _ListProxy(self, sliceinfo) | child = _ListProxy(self, sliceinfo) | ||||
self._children[id(child)] = (child, sliceinfo) | self._children[id(child)] = (child, sliceinfo) | ||||
return child | return child | ||||
@@ -86,25 +102,28 @@ class SmartList(list): | |||||
return super(SmartList, self).__setitem__(key, item) | return super(SmartList, self).__setitem__(key, item) | ||||
item = list(item) | item = list(item) | ||||
super(SmartList, self).__setitem__(key, item) | super(SmartList, self).__setitem__(key, item) | ||||
diff = len(item) - key.stop + key.start | |||||
key = self._normalize_slice(key) | |||||
diff = len(item) + (key.start - key.stop) // key.step | |||||
values = self._children.values if py3k else self._children.itervalues | values = self._children.values if py3k else self._children.itervalues | ||||
if diff: | if diff: | ||||
for child, (start, stop, step) in values(): | for child, (start, stop, step) in values(): | ||||
if start >= key.stop: | |||||
if start > key.stop: | |||||
self._children[id(child)][1][0] += diff | self._children[id(child)][1][0] += diff | ||||
if stop >= key.stop and stop != maxsize: | if stop >= key.stop and stop != maxsize: | ||||
self._children[id(child)][1][1] += diff | self._children[id(child)][1][1] += diff | ||||
def __delitem__(self, key): | def __delitem__(self, key): | ||||
super(SmartList, self).__delitem__(key) | super(SmartList, self).__delitem__(key) | ||||
if not isinstance(key, slice): | |||||
key = slice(key, key + 1) | |||||
diff = key.stop - key.start | |||||
if isinstance(key, slice): | |||||
key = self._normalize_slice(key) | |||||
else: | |||||
key = slice(key, key + 1, 1) | |||||
diff = (key.stop - key.start) // key.step | |||||
values = self._children.values if py3k else self._children.itervalues | values = self._children.values if py3k else self._children.itervalues | ||||
for child, (start, stop, step) in values(): | for child, (start, stop, step) in values(): | ||||
if start > key.start: | if start > key.start: | ||||
self._children[id(child)][1][0] -= diff | self._children[id(child)][1][0] -= diff | ||||
if stop >= key.stop: | |||||
if stop >= key.stop and stop != maxsize: | |||||
self._children[id(child)][1][1] -= diff | self._children[id(child)][1][1] -= diff | ||||
if not py3k: | if not py3k: | ||||
@@ -160,24 +179,35 @@ class SmartList(list): | |||||
child._parent = copy | child._parent = copy | ||||
super(SmartList, self).reverse() | super(SmartList, self).reverse() | ||||
@inheritdoc | |||||
def sort(self, cmp=None, key=None, reverse=None): | |||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
if cmp is not None: | |||||
if py3k: | |||||
@inheritdoc | |||||
def sort(self, key=None, reverse=None): | |||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
kwargs = {} | |||||
if key is not None: | if key is not None: | ||||
if reverse is not None: | |||||
super(SmartList, self).sort(cmp, key, reverse) | |||||
else: | |||||
super(SmartList, self).sort(cmp, key) | |||||
else: | |||||
super(SmartList, self).sort(cmp) | |||||
else: | |||||
super(SmartList, self).sort() | |||||
kwargs["key"] = key | |||||
if reverse is not None: | |||||
kwargs["reverse"] = reverse | |||||
super(SmartList, self).sort(**kwargs) | |||||
else: | |||||
@inheritdoc | |||||
def sort(self, cmp=None, key=None, reverse=None): | |||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
kwargs = {} | |||||
if cmp is not None: | |||||
kwargs["cmp"] = cmp | |||||
if key is not None: | |||||
kwargs["key"] = key | |||||
if reverse is not None: | |||||
kwargs["reverse"] = reverse | |||||
super(SmartList, self).sort(**kwargs) | |||||
class _ListProxy(list): | |||||
class _ListProxy(_SliceNormalizerMixIn, list): | |||||
"""Implement the ``list`` interface by getting elements from a parent. | """Implement the ``list`` interface by getting elements from a parent. | ||||
This is created by a :py:class:`~.SmartList` object when slicing. It does | This is created by a :py:class:`~.SmartList` object when slicing. It does | ||||
@@ -231,25 +261,52 @@ class _ListProxy(list): | |||||
return bool(self._render()) | return bool(self._render()) | ||||
def __len__(self): | def __len__(self): | ||||
return (self._stop - self._start) / self._step | |||||
return (self._stop - self._start) // self._step | |||||
def __getitem__(self, key): | def __getitem__(self, key): | ||||
return self._render()[key] | |||||
if isinstance(key, slice): | |||||
key = self._normalize_slice(key) | |||||
if key.stop == maxsize: | |||||
keystop = self._stop | |||||
else: | |||||
keystop = key.stop + self._start | |||||
adjusted = slice(key.start + self._start, keystop, key.step) | |||||
return self._parent[adjusted] | |||||
else: | |||||
return self._render()[key] | |||||
def __setitem__(self, key, item): | def __setitem__(self, key, item): | ||||
if isinstance(key, slice): | if isinstance(key, slice): | ||||
adjusted = slice(key.start + self._start, key.stop + self._stop, | |||||
key.step) | |||||
key = self._normalize_slice(key) | |||||
if key.stop == maxsize: | |||||
keystop = self._stop | |||||
else: | |||||
keystop = key.stop + self._start | |||||
adjusted = slice(key.start + self._start, keystop, key.step) | |||||
self._parent[adjusted] = item | self._parent[adjusted] = item | ||||
else: | else: | ||||
length = len(self) | |||||
if key < 0: | |||||
key = length + key | |||||
if key < 0 or key >= length: | |||||
raise IndexError("list assignment index out of range") | |||||
self._parent[self._start + key] = item | self._parent[self._start + key] = item | ||||
def __delitem__(self, key): | def __delitem__(self, key): | ||||
if isinstance(key, slice): | if isinstance(key, slice): | ||||
adjusted = slice(key.start + self._start, key.stop + self._stop, | |||||
key.step) | |||||
key = self._normalize_slice(key) | |||||
if key.stop == maxsize: | |||||
keystop = self._stop | |||||
else: | |||||
keystop = key.stop + self._start | |||||
adjusted = slice(key.start + self._start, keystop, key.step) | |||||
del self._parent[adjusted] | del self._parent[adjusted] | ||||
else: | else: | ||||
length = len(self) | |||||
if key < 0: | |||||
key = length + key | |||||
if key < 0 or key >= length: | |||||
raise IndexError("list assignment index out of range") | |||||
del self._parent[self._start + key] | del self._parent[self._start + key] | ||||
def __iter__(self): | def __iter__(self): | ||||
@@ -287,6 +344,16 @@ class _ListProxy(list): | |||||
self.extend(other) | self.extend(other) | ||||
return self | return self | ||||
def __mul__(self, other): | |||||
return SmartList(list(self) * other) | |||||
def __rmul__(self, other): | |||||
return SmartList(other * list(self)) | |||||
def __imul__(self, other): | |||||
self.extend(list(self) * (other - 1)) | |||||
return self | |||||
@property | @property | ||||
def _start(self): | def _start(self): | ||||
"""The starting index of this list, inclusive.""" | """The starting index of this list, inclusive.""" | ||||
@@ -295,6 +362,8 @@ class _ListProxy(list): | |||||
@property | @property | ||||
def _stop(self): | def _stop(self): | ||||
"""The ending index of this list, exclusive.""" | """The ending index of this list, exclusive.""" | ||||
if self._sliceinfo[1] == maxsize: | |||||
return len(self._parent) | |||||
return self._sliceinfo[1] | return self._sliceinfo[1] | ||||
@property | @property | ||||
@@ -328,18 +397,25 @@ class _ListProxy(list): | |||||
@inheritdoc | @inheritdoc | ||||
def insert(self, index, item): | def insert(self, index, item): | ||||
if index < 0: | |||||
index = len(self) + index | |||||
self._parent.insert(self._start + index, item) | self._parent.insert(self._start + index, item) | ||||
@inheritdoc | @inheritdoc | ||||
def pop(self, index=None): | def pop(self, index=None): | ||||
length = len(self) | |||||
if index is None: | if index is None: | ||||
index = len(self) - 1 | |||||
index = length - 1 | |||||
elif index < 0: | |||||
index = length + index | |||||
if index < 0 or index >= length: | |||||
raise IndexError("pop index out of range") | |||||
return self._parent.pop(self._start + index) | return self._parent.pop(self._start + index) | ||||
@inheritdoc | @inheritdoc | ||||
def remove(self, item): | def remove(self, item): | ||||
index = self.index(item) | index = self.index(item) | ||||
del self._parent[index] | |||||
del self._parent[self._start + index] | |||||
@inheritdoc | @inheritdoc | ||||
def reverse(self): | def reverse(self): | ||||
@@ -347,17 +423,30 @@ class _ListProxy(list): | |||||
item.reverse() | item.reverse() | ||||
self._parent[self._start:self._stop:self._step] = item | self._parent[self._start:self._stop:self._step] = item | ||||
@inheritdoc | |||||
def sort(self, cmp=None, key=None, reverse=None): | |||||
item = self._render() | |||||
if cmp is not None: | |||||
if py3k: | |||||
@inheritdoc | |||||
def sort(self, key=None, reverse=None): | |||||
item = self._render() | |||||
kwargs = {} | |||||
if key is not None: | if key is not None: | ||||
if reverse is not None: | |||||
item.sort(cmp, key, reverse) | |||||
else: | |||||
item.sort(cmp, key) | |||||
else: | |||||
item.sort(cmp) | |||||
else: | |||||
item.sort() | |||||
self._parent[self._start:self._stop:self._step] = item | |||||
kwargs["key"] = key | |||||
if reverse is not None: | |||||
kwargs["reverse"] = reverse | |||||
item.sort(**kwargs) | |||||
self._parent[self._start:self._stop:self._step] = item | |||||
else: | |||||
@inheritdoc | |||||
def sort(self, cmp=None, key=None, reverse=None): | |||||
item = self._render() | |||||
kwargs = {} | |||||
if cmp is not None: | |||||
kwargs["cmp"] = cmp | |||||
if key is not None: | |||||
kwargs["key"] = key | |||||
if reverse is not None: | |||||
kwargs["reverse"] = reverse | |||||
item.sort(**kwargs) | |||||
self._parent[self._start:self._stop:self._step] = item | |||||
del inheritdoc |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -40,7 +40,6 @@ def inheritdoc(method): | |||||
method.__doc__ = getattr(str, method.__name__).__doc__ | method.__doc__ = getattr(str, method.__name__).__doc__ | ||||
return method | return method | ||||
class StringMixIn(object): | class StringMixIn(object): | ||||
"""Implement the interface for ``unicode``/``str`` in a dynamic manner. | """Implement the interface for ``unicode``/``str`` in a dynamic manner. | ||||
@@ -114,6 +113,9 @@ class StringMixIn(object): | |||||
def __getitem__(self, key): | def __getitem__(self, key): | ||||
return self.__unicode__()[key] | return self.__unicode__()[key] | ||||
def __reversed__(self): | |||||
return reversed(self.__unicode__()) | |||||
def __contains__(self, item): | def __contains__(self, item): | ||||
if isinstance(item, StringMixIn): | if isinstance(item, StringMixIn): | ||||
return str(item) in self.__unicode__() | return str(item) in self.__unicode__() | ||||
@@ -123,22 +125,39 @@ class StringMixIn(object): | |||||
def capitalize(self): | def capitalize(self): | ||||
return self.__unicode__().capitalize() | return self.__unicode__().capitalize() | ||||
if py3k: | |||||
@inheritdoc | |||||
def casefold(self): | |||||
return self.__unicode__().casefold() | |||||
@inheritdoc | @inheritdoc | ||||
def center(self, width, fillchar=None): | def center(self, width, fillchar=None): | ||||
if fillchar is None: | |||||
return self.__unicode__().center(width) | |||||
return self.__unicode__().center(width, fillchar) | return self.__unicode__().center(width, fillchar) | ||||
@inheritdoc | @inheritdoc | ||||
def count(self, sub=None, start=None, end=None): | |||||
def count(self, sub, start=None, end=None): | |||||
return self.__unicode__().count(sub, start, end) | return self.__unicode__().count(sub, start, end) | ||||
if not py3k: | if not py3k: | ||||
@inheritdoc | @inheritdoc | ||||
def decode(self, encoding=None, errors=None): | def decode(self, encoding=None, errors=None): | ||||
return self.__unicode__().decode(encoding, errors) | |||||
kwargs = {} | |||||
if encoding is not None: | |||||
kwargs["encoding"] = encoding | |||||
if errors is not None: | |||||
kwargs["errors"] = errors | |||||
return self.__unicode__().decode(**kwargs) | |||||
@inheritdoc | @inheritdoc | ||||
def encode(self, encoding=None, errors=None): | def encode(self, encoding=None, errors=None): | ||||
return self.__unicode__().encode(encoding, errors) | |||||
kwargs = {} | |||||
if encoding is not None: | |||||
kwargs["encoding"] = encoding | |||||
if errors is not None: | |||||
kwargs["errors"] = errors | |||||
return self.__unicode__().encode(**kwargs) | |||||
@inheritdoc | @inheritdoc | ||||
def endswith(self, prefix, start=None, end=None): | def endswith(self, prefix, start=None, end=None): | ||||
@@ -146,18 +165,25 @@ class StringMixIn(object): | |||||
@inheritdoc | @inheritdoc | ||||
def expandtabs(self, tabsize=None): | def expandtabs(self, tabsize=None): | ||||
if tabsize is None: | |||||
return self.__unicode__().expandtabs() | |||||
return self.__unicode__().expandtabs(tabsize) | return self.__unicode__().expandtabs(tabsize) | ||||
@inheritdoc | @inheritdoc | ||||
def find(self, sub=None, start=None, end=None): | |||||
def find(self, sub, start=None, end=None): | |||||
return self.__unicode__().find(sub, start, end) | return self.__unicode__().find(sub, start, end) | ||||
@inheritdoc | @inheritdoc | ||||
def format(self, *args, **kwargs): | def format(self, *args, **kwargs): | ||||
return self.__unicode__().format(*args, **kwargs) | return self.__unicode__().format(*args, **kwargs) | ||||
if py3k: | |||||
@inheritdoc | |||||
def format_map(self, mapping): | |||||
return self.__unicode__().format_map(mapping) | |||||
@inheritdoc | @inheritdoc | ||||
def index(self, sub=None, start=None, end=None): | |||||
def index(self, sub, start=None, end=None): | |||||
return self.__unicode__().index(sub, start, end) | return self.__unicode__().index(sub, start, end) | ||||
@inheritdoc | @inheritdoc | ||||
@@ -176,6 +202,11 @@ class StringMixIn(object): | |||||
def isdigit(self): | def isdigit(self): | ||||
return self.__unicode__().isdigit() | return self.__unicode__().isdigit() | ||||
if py3k: | |||||
@inheritdoc | |||||
def isidentifier(self): | |||||
return self.__unicode__().isidentifier() | |||||
@inheritdoc | @inheritdoc | ||||
def islower(self): | def islower(self): | ||||
return self.__unicode__().islower() | return self.__unicode__().islower() | ||||
@@ -184,6 +215,11 @@ class StringMixIn(object): | |||||
def isnumeric(self): | def isnumeric(self): | ||||
return self.__unicode__().isnumeric() | return self.__unicode__().isnumeric() | ||||
if py3k: | |||||
@inheritdoc | |||||
def isprintable(self): | |||||
return self.__unicode__().isprintable() | |||||
@inheritdoc | @inheritdoc | ||||
def isspace(self): | def isspace(self): | ||||
return self.__unicode__().isspace() | return self.__unicode__().isspace() | ||||
@@ -202,6 +238,8 @@ class StringMixIn(object): | |||||
@inheritdoc | @inheritdoc | ||||
def ljust(self, width, fillchar=None): | def ljust(self, width, fillchar=None): | ||||
if fillchar is None: | |||||
return self.__unicode__().ljust(width) | |||||
return self.__unicode__().ljust(width, fillchar) | return self.__unicode__().ljust(width, fillchar) | ||||
@inheritdoc | @inheritdoc | ||||
@@ -212,44 +250,88 @@ class StringMixIn(object): | |||||
def lstrip(self, chars=None): | def lstrip(self, chars=None): | ||||
return self.__unicode__().lstrip(chars) | return self.__unicode__().lstrip(chars) | ||||
if py3k: | |||||
@staticmethod | |||||
@inheritdoc | |||||
def maketrans(self, x, y=None, z=None): | |||||
if z is None: | |||||
if y is None: | |||||
return self.__unicode__.maketrans(x) | |||||
return self.__unicode__.maketrans(x, y) | |||||
return self.__unicode__.maketrans(x, y, z) | |||||
@inheritdoc | @inheritdoc | ||||
def partition(self, sep): | def partition(self, sep): | ||||
return self.__unicode__().partition(sep) | return self.__unicode__().partition(sep) | ||||
@inheritdoc | @inheritdoc | ||||
def replace(self, old, new, count): | |||||
def replace(self, old, new, count=None): | |||||
if count is None: | |||||
return self.__unicode__().replace(old, new) | |||||
return self.__unicode__().replace(old, new, count) | return self.__unicode__().replace(old, new, count) | ||||
@inheritdoc | @inheritdoc | ||||
def rfind(self, sub=None, start=None, end=None): | |||||
def rfind(self, sub, start=None, end=None): | |||||
return self.__unicode__().rfind(sub, start, end) | return self.__unicode__().rfind(sub, start, end) | ||||
@inheritdoc | @inheritdoc | ||||
def rindex(self, sub=None, start=None, end=None): | |||||
def rindex(self, sub, start=None, end=None): | |||||
return self.__unicode__().rindex(sub, start, end) | return self.__unicode__().rindex(sub, start, end) | ||||
@inheritdoc | @inheritdoc | ||||
def rjust(self, width, fillchar=None): | def rjust(self, width, fillchar=None): | ||||
if fillchar is None: | |||||
return self.__unicode__().rjust(width) | |||||
return self.__unicode__().rjust(width, fillchar) | return self.__unicode__().rjust(width, fillchar) | ||||
@inheritdoc | @inheritdoc | ||||
def rpartition(self, sep): | def rpartition(self, sep): | ||||
return self.__unicode__().rpartition(sep) | return self.__unicode__().rpartition(sep) | ||||
@inheritdoc | |||||
def rsplit(self, sep=None, maxsplit=None): | |||||
return self.__unicode__().rsplit(sep, maxsplit) | |||||
if py3k: | |||||
@inheritdoc | |||||
def rsplit(self, sep=None, maxsplit=None): | |||||
kwargs = {} | |||||
if sep is not None: | |||||
kwargs["sep"] = sep | |||||
if maxsplit is not None: | |||||
kwargs["maxsplit"] = maxsplit | |||||
return self.__unicode__().rsplit(**kwargs) | |||||
else: | |||||
@inheritdoc | |||||
def rsplit(self, sep=None, maxsplit=None): | |||||
if maxsplit is None: | |||||
if sep is None: | |||||
return self.__unicode__().rsplit() | |||||
return self.__unicode__().rsplit(sep) | |||||
return self.__unicode__().rsplit(sep, maxsplit) | |||||
@inheritdoc | @inheritdoc | ||||
def rstrip(self, chars=None): | def rstrip(self, chars=None): | ||||
return self.__unicode__().rstrip(chars) | return self.__unicode__().rstrip(chars) | ||||
@inheritdoc | |||||
def split(self, sep=None, maxsplit=None): | |||||
return self.__unicode__().split(sep, maxsplit) | |||||
if py3k: | |||||
@inheritdoc | |||||
def split(self, sep=None, maxsplit=None): | |||||
kwargs = {} | |||||
if sep is not None: | |||||
kwargs["sep"] = sep | |||||
if maxsplit is not None: | |||||
kwargs["maxsplit"] = maxsplit | |||||
return self.__unicode__().split(**kwargs) | |||||
else: | |||||
@inheritdoc | |||||
def split(self, sep=None, maxsplit=None): | |||||
if maxsplit is None: | |||||
if sep is None: | |||||
return self.__unicode__().split() | |||||
return self.__unicode__().split(sep) | |||||
return self.__unicode__().split(sep, maxsplit) | |||||
@inheritdoc | @inheritdoc | ||||
def splitlines(self, keepends=None): | def splitlines(self, keepends=None): | ||||
if keepends is None: | |||||
return self.__unicode__().splitlines() | |||||
return self.__unicode__().splitlines(keepends) | return self.__unicode__().splitlines(keepends) | ||||
@inheritdoc | @inheritdoc | ||||
@@ -269,8 +351,8 @@ class StringMixIn(object): | |||||
return self.__unicode__().title() | return self.__unicode__().title() | ||||
@inheritdoc | @inheritdoc | ||||
def translate(self, table, deletechars=None): | |||||
return self.__unicode__().translate(table, deletechars) | |||||
def translate(self, table): | |||||
return self.__unicode__().translate(table) | |||||
@inheritdoc | @inheritdoc | ||||
def upper(self): | def upper(self): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -34,16 +34,16 @@ from .smart_list import SmartList | |||||
def parse_anything(value): | def parse_anything(value): | ||||
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | ||||
This differs from :py:func:`mwparserfromhell.parse` in that we accept more | |||||
than just a string to be parsed. Unicode objects (strings in py3k), strings | |||||
(bytes in py3k), integers (converted to strings), ``None``, existing | |||||
This differs from :py:meth:`.Parser.parse` in that we accept more than just | |||||
a string to be parsed. Unicode objects (strings in py3k), strings (bytes in | |||||
py3k), integers (converted to strings), ``None``, existing | |||||
:py:class:`~.Node` or :py:class:`~.Wikicode` objects, as well as an | :py:class:`~.Node` or :py:class:`~.Wikicode` objects, as well as an | ||||
iterable of these types, are supported. This is used to parse input | iterable of these types, are supported. This is used to parse input | ||||
on-the-fly by various methods of :py:class:`~.Wikicode` and others like | on-the-fly by various methods of :py:class:`~.Wikicode` and others like | ||||
:py:class:`~.Template`, such as :py:meth:`wikicode.insert() | :py:class:`~.Template`, such as :py:meth:`wikicode.insert() | ||||
<.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. | <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. | ||||
""" | """ | ||||
from . import parse | |||||
from .parser import Parser | |||||
from .wikicode import Wikicode | from .wikicode import Wikicode | ||||
if isinstance(value, Wikicode): | if isinstance(value, Wikicode): | ||||
@@ -51,11 +51,11 @@ def parse_anything(value): | |||||
elif isinstance(value, Node): | elif isinstance(value, Node): | ||||
return Wikicode(SmartList([value])) | return Wikicode(SmartList([value])) | ||||
elif isinstance(value, str): | elif isinstance(value, str): | ||||
return parse(value) | |||||
return Parser(value).parse() | |||||
elif isinstance(value, bytes): | elif isinstance(value, bytes): | ||||
return parse(value.decode("utf8")) | |||||
return Parser(value.decode("utf8")).parse() | |||||
elif isinstance(value, int): | elif isinstance(value, int): | ||||
return parse(str(value)) | |||||
return Parser(str(value)).parse() | |||||
elif value is None: | elif value is None: | ||||
return Wikicode(SmartList()) | return Wikicode(SmartList()) | ||||
try: | try: | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -23,8 +23,9 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import re | import re | ||||
from .compat import maxsize, str | |||||
from .nodes import Heading, Node, Tag, Template, Text, Wikilink | |||||
from .compat import maxsize, py3k, str | |||||
from .nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, | |||||
Template, Text, Wikilink) | |||||
from .string_mixin import StringMixIn | from .string_mixin import StringMixIn | ||||
from .utils import parse_anything | from .utils import parse_anything | ||||
@@ -68,7 +69,7 @@ class Wikicode(StringMixIn): | |||||
Raises ``ValueError`` if *obj* is not within *node*. | Raises ``ValueError`` if *obj* is not within *node*. | ||||
""" | """ | ||||
for context, child in node.__iternodes__(self._get_all_nodes): | for context, child in node.__iternodes__(self._get_all_nodes): | ||||
if child is obj: | |||||
if self._is_equivalent(obj, child): | |||||
return context | return context | ||||
raise ValueError(obj) | raise ValueError(obj) | ||||
@@ -88,13 +89,7 @@ class Wikicode(StringMixIn): | |||||
If *obj* is a ``Node``, the function will test whether they are the | If *obj* is a ``Node``, the function will test whether they are the | ||||
same object, otherwise it will compare them with ``==``. | same object, otherwise it will compare them with ``==``. | ||||
""" | """ | ||||
if isinstance(obj, Node): | |||||
if node is obj: | |||||
return True | |||||
else: | |||||
if node == obj: | |||||
return True | |||||
return False | |||||
return (node is obj) if isinstance(obj, Node) else (node == obj) | |||||
def _contains(self, nodes, obj): | def _contains(self, nodes, obj): | ||||
"""Return ``True`` if *obj* is inside of *nodes*, else ``False``. | """Return ``True`` if *obj* is inside of *nodes*, else ``False``. | ||||
@@ -157,6 +152,36 @@ class Wikicode(StringMixIn): | |||||
node.__showtree__(write, get, mark) | node.__showtree__(write, get, mark) | ||||
return lines | return lines | ||||
@classmethod | |||||
def _build_filter_methods(cls, **meths): | |||||
"""Given Node types, build the corresponding i?filter shortcuts. | |||||
The should be given as keys storing the method's base name paired | |||||
with values storing the corresponding :py:class:`~.Node` type. For | |||||
example, the dict may contain the pair ``("templates", Template)``, | |||||
which will produce the methods :py:meth:`ifilter_templates` and | |||||
:py:meth:`filter_templates`, which are shortcuts for | |||||
:py:meth:`ifilter(forcetype=Template) <ifilter>` and | |||||
:py:meth:`filter(forcetype=Template) <filter>`, respectively. These | |||||
shortcuts are added to the class itself, with an appropriate docstring. | |||||
""" | |||||
doc = """Iterate over {0}. | |||||
This is equivalent to :py:meth:`{1}` with *forcetype* set to | |||||
:py:class:`~.{2}`. | |||||
""" | |||||
make_ifilter = lambda ftype: (lambda self, **kw: | |||||
self.ifilter(forcetype=ftype, **kw)) | |||||
make_filter = lambda ftype: (lambda self, **kw: | |||||
self.filter(forcetype=ftype, **kw)) | |||||
for name, ftype in (meths.items() if py3k else meths.iteritems()): | |||||
ifilter = make_ifilter(ftype) | |||||
filter = make_filter(ftype) | |||||
ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__) | |||||
filter.__doc__ = doc.format(name, "filter", ftype.__name__) | |||||
setattr(cls, "ifilter_" + name, ifilter) | |||||
setattr(cls, "filter_" + name, filter) | |||||
@property | @property | ||||
def nodes(self): | def nodes(self): | ||||
"""A list of :py:class:`~.Node` objects. | """A list of :py:class:`~.Node` objects. | ||||
@@ -168,6 +193,8 @@ class Wikicode(StringMixIn): | |||||
@nodes.setter | @nodes.setter | ||||
def nodes(self, value): | def nodes(self, value): | ||||
if not isinstance(value, list): | |||||
value = parse_anything(value).nodes | |||||
self._nodes = value | self._nodes = value | ||||
def get(self, index): | def get(self, index): | ||||
@@ -188,9 +215,10 @@ class Wikicode(StringMixIn): | |||||
raise ValueError("Cannot coerce multiple nodes into one index") | raise ValueError("Cannot coerce multiple nodes into one index") | ||||
if index >= len(self.nodes) or -1 * index > len(self.nodes): | if index >= len(self.nodes) or -1 * index > len(self.nodes): | ||||
raise IndexError("List assignment index out of range") | raise IndexError("List assignment index out of range") | ||||
self.nodes.pop(index) | |||||
if nodes: | if nodes: | ||||
self.nodes[index] = nodes[0] | self.nodes[index] = nodes[0] | ||||
else: | |||||
self.nodes.pop(index) | |||||
def index(self, obj, recursive=False): | def index(self, obj, recursive=False): | ||||
"""Return the index of *obj* in the list of nodes. | """Return the index of *obj* in the list of nodes. | ||||
@@ -294,47 +322,11 @@ class Wikicode(StringMixIn): | |||||
*flags*. If *forcetype* is given, only nodes that are instances of this | *flags*. If *forcetype* is given, only nodes that are instances of this | ||||
type are yielded. | type are yielded. | ||||
""" | """ | ||||
if recursive: | |||||
nodes = self._get_all_nodes(self) | |||||
else: | |||||
nodes = self.nodes | |||||
for node in nodes: | |||||
for node in (self._get_all_nodes(self) if recursive else self.nodes): | |||||
if not forcetype or isinstance(node, forcetype): | if not forcetype or isinstance(node, forcetype): | ||||
if not matches or re.search(matches, str(node), flags): | if not matches or re.search(matches, str(node), flags): | ||||
yield node | yield node | ||||
def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Iterate over wikilink nodes. | |||||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||||
:py:class:`~.Wikilink`. | |||||
""" | |||||
return self.ifilter(recursive, matches, flags, forcetype=Wikilink) | |||||
def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Iterate over template nodes. | |||||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||||
:py:class:`~.Template`. | |||||
""" | |||||
return self.filter(recursive, matches, flags, forcetype=Template) | |||||
def ifilter_text(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Iterate over text nodes. | |||||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||||
:py:class:`~.nodes.Text`. | |||||
""" | |||||
return self.filter(recursive, matches, flags, forcetype=Text) | |||||
def ifilter_tags(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Iterate over tag nodes. | |||||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||||
:py:class:`~.Tag`. | |||||
""" | |||||
return self.ifilter(recursive, matches, flags, forcetype=Tag) | |||||
def filter(self, recursive=False, matches=None, flags=FLAGS, | def filter(self, recursive=False, matches=None, flags=FLAGS, | ||||
forcetype=None): | forcetype=None): | ||||
"""Return a list of nodes within our list matching certain conditions. | """Return a list of nodes within our list matching certain conditions. | ||||
@@ -343,77 +335,56 @@ class Wikicode(StringMixIn): | |||||
""" | """ | ||||
return list(self.ifilter(recursive, matches, flags, forcetype)) | return list(self.ifilter(recursive, matches, flags, forcetype)) | ||||
def filter_links(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Return a list of wikilink nodes. | |||||
This is equivalent to calling :py:func:`list` on | |||||
:py:meth:`ifilter_links`. | |||||
""" | |||||
return list(self.ifilter_links(recursive, matches, flags)) | |||||
def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Return a list of template nodes. | |||||
This is equivalent to calling :py:func:`list` on | |||||
:py:meth:`ifilter_templates`. | |||||
""" | |||||
return list(self.ifilter_templates(recursive, matches, flags)) | |||||
def filter_text(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Return a list of text nodes. | |||||
This is equivalent to calling :py:func:`list` on | |||||
:py:meth:`ifilter_text`. | |||||
""" | |||||
return list(self.ifilter_text(recursive, matches, flags)) | |||||
def filter_tags(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Return a list of tag nodes. | |||||
This is equivalent to calling :py:func:`list` on | |||||
:py:meth:`ifilter_tags`. | |||||
""" | |||||
return list(self.ifilter_tags(recursive, matches, flags)) | |||||
def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, | |||||
include_headings=True): | |||||
def get_sections(self, levels=None, matches=None, flags=FLAGS, | |||||
include_lead=None, include_headings=True): | |||||
"""Return a list of sections within the page. | """Return a list of sections within the page. | ||||
Sections are returned as :py:class:`~.Wikicode` objects with a shared | Sections are returned as :py:class:`~.Wikicode` objects with a shared | ||||
node list (implemented using :py:class:`~.SmartList`) so that changes | node list (implemented using :py:class:`~.SmartList`) so that changes | ||||
to sections are reflected in the parent Wikicode object. | to sections are reflected in the parent Wikicode object. | ||||
With *flat* as ``True``, each returned section contains all of its | |||||
subsections within the :py:class:`~.Wikicode`; otherwise, the returned | |||||
sections contain only the section up to the next heading, regardless of | |||||
its size. If *matches* is given, it should be a regex to be matched | |||||
against the titles of section headings; only sections whose headings | |||||
match the regex will be included. If *levels* is given, it should be a | |||||
iterable of integers; only sections whose heading levels are within it | |||||
will be returned. If *include_headings* is ``True``, the section's | |||||
beginning :py:class:`~.Heading` object will be included in returned | |||||
:py:class:`~.Wikicode` objects; otherwise, this is skipped. | |||||
Each section contains all of its subsections. If *levels* is given, it | |||||
should be a iterable of integers; only sections whose heading levels | |||||
are within it will be returned.If *matches* is given, it should be a | |||||
regex to be matched against the titles of section headings; only | |||||
sections whose headings match the regex will be included. *flags* can | |||||
be used to override the default regex flags (see :py:meth:`ifilter`) if | |||||
*matches* is used. | |||||
If *include_lead* is ``True``, the first, lead section (without a | |||||
heading) will be included in the list; ``False`` will not include it; | |||||
the default will include it only if no specific *levels* were given. If | |||||
*include_headings* is ``True``, the section's beginning | |||||
:py:class:`~.Heading` object will be included; otherwise, this is | |||||
skipped. | |||||
""" | """ | ||||
if matches: | if matches: | ||||
matches = r"^(=+?)\s*" + matches + r"\s*\1$" | matches = r"^(=+?)\s*" + matches + r"\s*\1$" | ||||
headings = self.filter(recursive=True, matches=matches, flags=flags, | |||||
forcetype=Heading) | |||||
headings = self.filter_headings(recursive=True) | |||||
filtered = self.filter_headings(recursive=True, matches=matches, | |||||
flags=flags) | |||||
if levels: | if levels: | ||||
headings = [head for head in headings if head.level in levels] | |||||
filtered = [head for head in filtered if head.level in levels] | |||||
if matches or include_lead is False or (not include_lead and levels): | |||||
buffers = [] | |||||
else: | |||||
buffers = [(maxsize, 0)] | |||||
sections = [] | sections = [] | ||||
buffers = [(maxsize, 0)] | |||||
i = 0 | i = 0 | ||||
while i < len(self.nodes): | while i < len(self.nodes): | ||||
if self.nodes[i] in headings: | if self.nodes[i] in headings: | ||||
this = self.nodes[i].level | this = self.nodes[i].level | ||||
for (level, start) in buffers: | for (level, start) in buffers: | ||||
if not flat or this <= level: | |||||
buffers.remove((level, start)) | |||||
if this <= level: | |||||
sections.append(Wikicode(self.nodes[start:i])) | sections.append(Wikicode(self.nodes[start:i])) | ||||
buffers.append((this, i)) | |||||
if not include_headings: | |||||
i += 1 | |||||
buffers = [buf for buf in buffers if buf[0] < this] | |||||
if self.nodes[i] in filtered: | |||||
if not include_headings: | |||||
i += 1 | |||||
if i >= len(self.nodes): | |||||
break | |||||
buffers.append((this, i)) | |||||
i += 1 | i += 1 | ||||
for (level, start) in buffers: | for (level, start) in buffers: | ||||
if start != i: | if start != i: | ||||
@@ -473,3 +444,8 @@ class Wikicode(StringMixIn): | |||||
""" | """ | ||||
marker = object() # Random object we can find with certainty in a list | marker = object() # Random object we can find with certainty in a list | ||||
return "\n".join(self._get_tree(self, [], marker, 0)) | return "\n".join(self._get_tree(self, [], marker, 0)) | ||||
Wikicode._build_filter_methods( | |||||
arguments=Argument, comments=Comment, headings=Heading, | |||||
html_entities=HTMLEntity, tags=Tag, templates=Template, text=Text, | |||||
wikilinks=Wikilink) |
@@ -1,7 +1,7 @@ | |||||
#! /usr/bin/env python | #! /usr/bin/env python | ||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -24,6 +24,7 @@ | |||||
from setuptools import setup, find_packages, Extension | from setuptools import setup, find_packages, Extension | ||||
from mwparserfromhell import __version__ | from mwparserfromhell import __version__ | ||||
from mwparserfromhell.compat import py3k | |||||
with open("README.rst") as fp: | with open("README.rst") as fp: | ||||
long_docs = fp.read() | long_docs = fp.read() | ||||
@@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||||
setup( | setup( | ||||
name = "mwparserfromhell", | name = "mwparserfromhell", | ||||
packages = find_packages(exclude=("tests",)), | packages = find_packages(exclude=("tests",)), | ||||
ext_modules = [tokenizer], | |||||
ext_modules = [] if py3k else [tokenizer], | |||||
test_suite = "tests", | test_suite = "tests", | ||||
version = __version__, | version = __version__, | ||||
author = "Ben Kurtovic", | author = "Ben Kurtovic", | ||||
@@ -0,0 +1,130 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |||||
<plist version="1.0"> | |||||
<dict> | |||||
<key>fileTypes</key> | |||||
<array> | |||||
<string>mwtest</string> | |||||
</array> | |||||
<key>name</key> | |||||
<string>MWParserFromHell Test Case</string> | |||||
<key>patterns</key> | |||||
<array> | |||||
<dict> | |||||
<key>match</key> | |||||
<string>---</string> | |||||
<key>name</key> | |||||
<string>markup.heading.divider.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>captures</key> | |||||
<dict> | |||||
<key>1</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>keyword.other.name.mwpfh</string> | |||||
</dict> | |||||
<key>2</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>variable.other.name.mwpfh</string> | |||||
</dict> | |||||
</dict> | |||||
<key>match</key> | |||||
<string>(name:)\s*(\w*)</string> | |||||
<key>name</key> | |||||
<string>meta.name.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>captures</key> | |||||
<dict> | |||||
<key>1</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>keyword.other.label.mwpfh</string> | |||||
</dict> | |||||
<key>2</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>comment.line.other.label.mwpfh</string> | |||||
</dict> | |||||
</dict> | |||||
<key>match</key> | |||||
<string>(label:)\s*(.*)</string> | |||||
<key>name</key> | |||||
<string>meta.label.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>captures</key> | |||||
<dict> | |||||
<key>1</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>keyword.other.input.mwpfh</string> | |||||
</dict> | |||||
<key>2</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>string.quoted.double.input.mwpfh</string> | |||||
</dict> | |||||
</dict> | |||||
<key>match</key> | |||||
<string>(input:)\s*(.*)</string> | |||||
<key>name</key> | |||||
<string>meta.input.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>captures</key> | |||||
<dict> | |||||
<key>1</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>keyword.other.output.mwpfh</string> | |||||
</dict> | |||||
</dict> | |||||
<key>match</key> | |||||
<string>(output:)</string> | |||||
<key>name</key> | |||||
<string>meta.output.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>captures</key> | |||||
<dict> | |||||
<key>1</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>support.language.token.mwpfh</string> | |||||
</dict> | |||||
</dict> | |||||
<key>match</key> | |||||
<string>(\w+)\s*\(</string> | |||||
<key>name</key> | |||||
<string>meta.name.token.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>captures</key> | |||||
<dict> | |||||
<key>1</key> | |||||
<dict> | |||||
<key>name</key> | |||||
<string>variable.parameter.token.mwpfh</string> | |||||
</dict> | |||||
</dict> | |||||
<key>match</key> | |||||
<string>(\w+)\s*(=)</string> | |||||
<key>name</key> | |||||
<string>meta.name.parameter.token.mwpfh</string> | |||||
</dict> | |||||
<dict> | |||||
<key>match</key> | |||||
<string>".*?"</string> | |||||
<key>name</key> | |||||
<string>string.quoted.double.mwpfh</string> | |||||
</dict> | |||||
</array> | |||||
<key>scopeName</key> | |||||
<string>text.mwpfh</string> | |||||
<key>uuid</key> | |||||
<string>cd3e2ffa-a57d-4c40-954f-1a2e87ffd638</string> | |||||
</dict> | |||||
</plist> |
@@ -0,0 +1,133 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import print_function, unicode_literals | |||||
from os import listdir, path | |||||
import sys | |||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.parser import tokens | |||||
class _TestParseError(Exception): | |||||
"""Raised internally when a test could not be parsed.""" | |||||
pass | |||||
class TokenizerTestCase(object): | |||||
"""A base test case for tokenizers, whose tests are loaded dynamically. | |||||
Subclassed along with unittest.TestCase to form TestPyTokenizer and | |||||
TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' | |||||
directory. | |||||
""" | |||||
@classmethod | |||||
def _build_test_method(cls, funcname, data): | |||||
"""Create and return a method to be treated as a test case method. | |||||
*data* is a dict containing multiple keys: the *input* text to be | |||||
tokenized, the expected list of tokens as *output*, and an optional | |||||
*label* for the method's docstring. | |||||
""" | |||||
def inner(self): | |||||
expected = data["output"] | |||||
actual = self.tokenizer().tokenize(data["input"]) | |||||
self.assertEqual(expected, actual) | |||||
if not py3k: | |||||
inner.__name__ = funcname.encode("utf8") | |||||
inner.__doc__ = data["label"] | |||||
return inner | |||||
@classmethod | |||||
def _load_tests(cls, filename, name, text): | |||||
"""Load all tests in *text* from the file *filename*.""" | |||||
tests = text.split("\n---\n") | |||||
counter = 1 | |||||
digits = len(str(len(tests))) | |||||
for test in tests: | |||||
data = {"name": None, "label": None, "input": None, "output": None} | |||||
try: | |||||
for line in test.strip().splitlines(): | |||||
if line.startswith("name:"): | |||||
data["name"] = line[len("name:"):].strip() | |||||
elif line.startswith("label:"): | |||||
data["label"] = line[len("label:"):].strip() | |||||
elif line.startswith("input:"): | |||||
raw = line[len("input:"):].strip() | |||||
if raw[0] == '"' and raw[-1] == '"': | |||||
raw = raw[1:-1] | |||||
raw = raw.encode("raw_unicode_escape") | |||||
data["input"] = raw.decode("unicode_escape") | |||||
elif line.startswith("output:"): | |||||
raw = line[len("output:"):].strip() | |||||
try: | |||||
data["output"] = eval(raw, vars(tokens)) | |||||
except Exception as err: | |||||
raise _TestParseError(err) | |||||
except _TestParseError as err: | |||||
if data["name"]: | |||||
error = "Could not parse test '{0}' in '{1}':\n\t{2}" | |||||
print(error.format(data["name"], filename, err)) | |||||
else: | |||||
error = "Could not parse a test in '{0}':\n\t{1}" | |||||
print(error.format(filename, err)) | |||||
continue | |||||
if not data["name"]: | |||||
error = "A test in '{0}' was ignored because it lacked a name" | |||||
print(error.format(filename)) | |||||
continue | |||||
if data["input"] is None or data["output"] is None: | |||||
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" | |||||
print(error.format(data["name"], filename)) | |||||
continue | |||||
number = str(counter).zfill(digits) | |||||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | |||||
meth = cls._build_test_method(fname, data) | |||||
setattr(cls, fname, meth) | |||||
counter += 1 | |||||
@classmethod | |||||
def build(cls): | |||||
"""Load and install all tests from the 'tokenizer' directory.""" | |||||
def load_file(filename): | |||||
with open(filename, "r") as fp: | |||||
text = fp.read() | |||||
if not py3k: | |||||
text = text.decode("utf8") | |||||
name = path.split(filename)[1][:0-len(extension)] | |||||
cls._load_tests(filename, name, text) | |||||
directory = path.join(path.dirname(__file__), "tokenizer") | |||||
extension = ".mwtest" | |||||
if len(sys.argv) > 2 and sys.argv[1] == "--use": | |||||
for name in sys.argv[2:]: | |||||
load_file(path.join(directory, name + extension)) | |||||
sys.argv = [sys.argv[0]] # So unittest doesn't try to load these | |||||
cls.skip_others = True | |||||
else: | |||||
for filename in listdir(directory): | |||||
if not filename.endswith(extension): | |||||
continue | |||||
load_file(path.join(directory, filename)) | |||||
cls.skip_others = False | |||||
TokenizerTestCase.build() |
@@ -0,0 +1,126 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
from unittest import TestCase | |||||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||||
Tag, Template, Text, Wikilink) | |||||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||||
from mwparserfromhell.smart_list import SmartList | |||||
from mwparserfromhell.wikicode import Wikicode | |||||
wrap = lambda L: Wikicode(SmartList(L)) | |||||
wraptext = lambda *args: wrap([Text(t) for t in args]) | |||||
def getnodes(code): | |||||
"""Iterate over all child nodes of a given parent node. | |||||
Imitates Wikicode._get_all_nodes(). | |||||
""" | |||||
for node in code.nodes: | |||||
for context, child in node.__iternodes__(getnodes): | |||||
yield child | |||||
class TreeEqualityTestCase(TestCase): | |||||
"""A base test case with support for comparing the equality of node trees. | |||||
This adds a number of type equality functions, for Wikicode, Text, | |||||
Templates, and Wikilinks. | |||||
""" | |||||
def assertNodeEqual(self, expected, actual): | |||||
"""Assert that two Nodes have the same type and have the same data.""" | |||||
registry = { | |||||
Argument: self.assertArgumentNodeEqual, | |||||
Comment: self.assertCommentNodeEqual, | |||||
Heading: self.assertHeadingNodeEqual, | |||||
HTMLEntity: self.assertHTMLEntityNodeEqual, | |||||
Tag: self.assertTagNodeEqual, | |||||
Template: self.assertTemplateNodeEqual, | |||||
Text: self.assertTextNodeEqual, | |||||
Wikilink: self.assertWikilinkNodeEqual | |||||
} | |||||
for nodetype in registry: | |||||
if isinstance(expected, nodetype): | |||||
self.assertIsInstance(actual, nodetype) | |||||
registry[nodetype](expected, actual) | |||||
def assertArgumentNodeEqual(self, expected, actual): | |||||
"""Assert that two Argument nodes have the same data.""" | |||||
self.assertWikicodeEqual(expected.name, actual.name) | |||||
if expected.default is not None: | |||||
self.assertWikicodeEqual(expected.default, actual.default) | |||||
else: | |||||
self.assertIs(None, actual.default) | |||||
def assertCommentNodeEqual(self, expected, actual): | |||||
"""Assert that two Comment nodes have the same data.""" | |||||
self.assertWikicodeEqual(expected.contents, actual.contents) | |||||
def assertHeadingNodeEqual(self, expected, actual): | |||||
"""Assert that two Heading nodes have the same data.""" | |||||
self.assertWikicodeEqual(expected.title, actual.title) | |||||
self.assertEqual(expected.level, actual.level) | |||||
def assertHTMLEntityNodeEqual(self, expected, actual): | |||||
"""Assert that two HTMLEntity nodes have the same data.""" | |||||
self.assertEqual(expected.value, actual.value) | |||||
self.assertIs(expected.named, actual.named) | |||||
self.assertIs(expected.hexadecimal, actual.hexadecimal) | |||||
self.assertEqual(expected.hex_char, actual.hex_char) | |||||
def assertTagNodeEqual(self, expected, actual): | |||||
"""Assert that two Tag nodes have the same data.""" | |||||
self.fail("Holding this until feature/html_tags is ready.") | |||||
def assertTemplateNodeEqual(self, expected, actual): | |||||
"""Assert that two Template nodes have the same data.""" | |||||
self.assertWikicodeEqual(expected.name, actual.name) | |||||
length = len(expected.params) | |||||
self.assertEqual(length, len(actual.params)) | |||||
for i in range(length): | |||||
exp_param = expected.params[i] | |||||
act_param = actual.params[i] | |||||
self.assertWikicodeEqual(exp_param.name, act_param.name) | |||||
self.assertWikicodeEqual(exp_param.value, act_param.value) | |||||
self.assertIs(exp_param.showkey, act_param.showkey) | |||||
def assertTextNodeEqual(self, expected, actual): | |||||
"""Assert that two Text nodes have the same data.""" | |||||
self.assertEqual(expected.value, actual.value) | |||||
def assertWikilinkNodeEqual(self, expected, actual): | |||||
"""Assert that two Wikilink nodes have the same data.""" | |||||
self.assertWikicodeEqual(expected.title, actual.title) | |||||
if expected.text is not None: | |||||
self.assertWikicodeEqual(expected.text, actual.text) | |||||
else: | |||||
self.assertIs(None, actual.text) | |||||
def assertWikicodeEqual(self, expected, actual): | |||||
"""Assert that two Wikicode objects have the same data.""" | |||||
self.assertIsInstance(actual, Wikicode) | |||||
length = len(expected.nodes) | |||||
self.assertEqual(length, len(actual.nodes)) | |||||
for i in range(length): | |||||
self.assertNodeEqual(expected.get(i), actual.get(i)) |
@@ -0,0 +1,20 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Serves the same purpose as mwparserfromhell.compat, but only for objects | |||||
required by unit tests. This avoids unnecessary imports (like urllib) within | |||||
the main library. | |||||
""" | |||||
from mwparserfromhell.compat import py3k | |||||
if py3k: | |||||
range = range | |||||
from io import StringIO | |||||
from urllib.parse import urlencode | |||||
from urllib.request import urlopen | |||||
else: | |||||
range = xrange | |||||
from StringIO import StringIO | |||||
from urllib import urlencode, urlopen |
@@ -0,0 +1,107 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import Argument, Text | |||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
class TestArgument(TreeEqualityTestCase): | |||||
"""Test cases for the Argument node.""" | |||||
def test_unicode(self): | |||||
"""test Argument.__unicode__()""" | |||||
node = Argument(wraptext("foobar")) | |||||
self.assertEqual("{{{foobar}}}", str(node)) | |||||
node2 = Argument(wraptext("foo"), wraptext("bar")) | |||||
self.assertEqual("{{{foo|bar}}}", str(node2)) | |||||
def test_iternodes(self): | |||||
"""test Argument.__iternodes__()""" | |||||
node1n1 = Text("foobar") | |||||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") | |||||
node1 = Argument(wrap([node1n1])) | |||||
node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3])) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.name, node1n1), next(gen1)) | |||||
self.assertEqual((node2.name, node2n1), next(gen2)) | |||||
self.assertEqual((node2.default, node2n2), next(gen2)) | |||||
self.assertEqual((node2.default, node2n3), next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
def test_strip(self): | |||||
"""test Argument.__strip__()""" | |||||
node = Argument(wraptext("foobar")) | |||||
node2 = Argument(wraptext("foo"), wraptext("bar")) | |||||
for a in (True, False): | |||||
for b in (True, False): | |||||
self.assertIs(None, node.__strip__(a, b)) | |||||
self.assertEqual("bar", node2.__strip__(a, b)) | |||||
def test_showtree(self): | |||||
"""test Argument.__showtree__()""" | |||||
output = [] | |||||
getter, marker = object(), object() | |||||
get = lambda code: output.append((getter, code)) | |||||
mark = lambda: output.append(marker) | |||||
node1 = Argument(wraptext("foobar")) | |||||
node2 = Argument(wraptext("foo"), wraptext("bar")) | |||||
node1.__showtree__(output.append, get, mark) | |||||
node2.__showtree__(output.append, get, mark) | |||||
valid = [ | |||||
"{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), | |||||
" | ", marker, (getter, node2.default), "}}}"] | |||||
self.assertEqual(valid, output) | |||||
def test_name(self): | |||||
"""test getter/setter for the name attribute""" | |||||
name = wraptext("foobar") | |||||
node1 = Argument(name) | |||||
node2 = Argument(name, wraptext("baz")) | |||||
self.assertIs(name, node1.name) | |||||
self.assertIs(name, node2.name) | |||||
node1.name = "héhehé" | |||||
node2.name = "héhehé" | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node1.name) | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node2.name) | |||||
def test_default(self): | |||||
"""test getter/setter for the default attribute""" | |||||
default = wraptext("baz") | |||||
node1 = Argument(wraptext("foobar")) | |||||
node2 = Argument(wraptext("foobar"), default) | |||||
self.assertIs(None, node1.default) | |||||
self.assertIs(default, node2.default) | |||||
node1.default = "buzz" | |||||
node2.default = None | |||||
self.assertWikicodeEqual(wraptext("buzz"), node1.default) | |||||
self.assertIs(None, node2.default) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,252 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||||
Tag, Template, Text, Wikilink) | |||||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||||
from mwparserfromhell.parser import tokens | |||||
from mwparserfromhell.parser.builder import Builder | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestBuilder(TreeEqualityTestCase): | |||||
"""Tests for the builder, which turns tokens into Wikicode objects.""" | |||||
def setUp(self): | |||||
self.builder = Builder() | |||||
def test_text(self): | |||||
"""tests for building Text nodes""" | |||||
tests = [ | |||||
([tokens.Text(text="foobar")], wraptext("foobar")), | |||||
([tokens.Text(text="fóóbar")], wraptext("fóóbar")), | |||||
([tokens.Text(text="spam"), tokens.Text(text="eggs")], | |||||
wraptext("spam", "eggs")), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_template(self): | |||||
"""tests for building Template nodes""" | |||||
tests = [ | |||||
([tokens.TemplateOpen(), tokens.Text(text="foobar"), | |||||
tokens.TemplateClose()], | |||||
wrap([Template(wraptext("foobar"))])), | |||||
([tokens.TemplateOpen(), tokens.Text(text="spam"), | |||||
tokens.Text(text="eggs"), tokens.TemplateClose()], | |||||
wrap([Template(wraptext("spam", "eggs"))])), | |||||
([tokens.TemplateOpen(), tokens.Text(text="foo"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="bar"), | |||||
tokens.TemplateClose()], | |||||
wrap([Template(wraptext("foo"), params=[ | |||||
Parameter(wraptext("1"), wraptext("bar"), showkey=False)])])), | |||||
([tokens.TemplateOpen(), tokens.Text(text="foo"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="bar"), | |||||
tokens.TemplateParamEquals(), tokens.Text(text="baz"), | |||||
tokens.TemplateClose()], | |||||
wrap([Template(wraptext("foo"), params=[ | |||||
Parameter(wraptext("bar"), wraptext("baz"))])])), | |||||
([tokens.TemplateOpen(), tokens.Text(text="foo"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="bar"), | |||||
tokens.TemplateParamEquals(), tokens.Text(text="baz"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="biz"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="3"), | |||||
tokens.TemplateParamEquals(), tokens.Text(text="buff"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="baff"), | |||||
tokens.TemplateClose()], | |||||
wrap([Template(wraptext("foo"), params=[ | |||||
Parameter(wraptext("bar"), wraptext("baz")), | |||||
Parameter(wraptext("1"), wraptext("biz"), showkey=False), | |||||
Parameter(wraptext("2"), wraptext("buzz"), showkey=False), | |||||
Parameter(wraptext("3"), wraptext("buff")), | |||||
Parameter(wraptext("3"), wraptext("baff"), | |||||
showkey=False)])])), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_argument(self): | |||||
"""tests for building Argument nodes""" | |||||
tests = [ | |||||
([tokens.ArgumentOpen(), tokens.Text(text="foobar"), | |||||
tokens.ArgumentClose()], | |||||
wrap([Argument(wraptext("foobar"))])), | |||||
([tokens.ArgumentOpen(), tokens.Text(text="spam"), | |||||
tokens.Text(text="eggs"), tokens.ArgumentClose()], | |||||
wrap([Argument(wraptext("spam", "eggs"))])), | |||||
([tokens.ArgumentOpen(), tokens.Text(text="foo"), | |||||
tokens.ArgumentSeparator(), tokens.Text(text="bar"), | |||||
tokens.ArgumentClose()], | |||||
wrap([Argument(wraptext("foo"), wraptext("bar"))])), | |||||
([tokens.ArgumentOpen(), tokens.Text(text="foo"), | |||||
tokens.Text(text="bar"), tokens.ArgumentSeparator(), | |||||
tokens.Text(text="baz"), tokens.Text(text="biz"), | |||||
tokens.ArgumentClose()], | |||||
wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))])), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_wikilink(self): | |||||
"""tests for building Wikilink nodes""" | |||||
tests = [ | |||||
([tokens.WikilinkOpen(), tokens.Text(text="foobar"), | |||||
tokens.WikilinkClose()], | |||||
wrap([Wikilink(wraptext("foobar"))])), | |||||
([tokens.WikilinkOpen(), tokens.Text(text="spam"), | |||||
tokens.Text(text="eggs"), tokens.WikilinkClose()], | |||||
wrap([Wikilink(wraptext("spam", "eggs"))])), | |||||
([tokens.WikilinkOpen(), tokens.Text(text="foo"), | |||||
tokens.WikilinkSeparator(), tokens.Text(text="bar"), | |||||
tokens.WikilinkClose()], | |||||
wrap([Wikilink(wraptext("foo"), wraptext("bar"))])), | |||||
([tokens.WikilinkOpen(), tokens.Text(text="foo"), | |||||
tokens.Text(text="bar"), tokens.WikilinkSeparator(), | |||||
tokens.Text(text="baz"), tokens.Text(text="biz"), | |||||
tokens.WikilinkClose()], | |||||
wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))])), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_html_entity(self): | |||||
"""tests for building HTMLEntity nodes""" | |||||
tests = [ | |||||
([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), | |||||
tokens.HTMLEntityEnd()], | |||||
wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), | |||||
([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), | |||||
tokens.Text(text="107"), tokens.HTMLEntityEnd()], | |||||
wrap([HTMLEntity("107", named=False, hexadecimal=False)])), | |||||
([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), | |||||
tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), | |||||
tokens.HTMLEntityEnd()], | |||||
wrap([HTMLEntity("6B", named=False, hexadecimal=True, | |||||
hex_char="X")])), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_heading(self): | |||||
"""tests for building Heading nodes""" | |||||
tests = [ | |||||
([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), | |||||
tokens.HeadingEnd()], | |||||
wrap([Heading(wraptext("foobar"), 2)])), | |||||
([tokens.HeadingStart(level=4), tokens.Text(text="spam"), | |||||
tokens.Text(text="eggs"), tokens.HeadingEnd()], | |||||
wrap([Heading(wraptext("spam", "eggs"), 4)])), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_comment(self): | |||||
"""tests for building Comment nodes""" | |||||
tests = [ | |||||
([tokens.CommentStart(), tokens.Text(text="foobar"), | |||||
tokens.CommentEnd()], | |||||
wrap([Comment(wraptext("foobar"))])), | |||||
([tokens.CommentStart(), tokens.Text(text="spam"), | |||||
tokens.Text(text="eggs"), tokens.CommentEnd()], | |||||
wrap([Comment(wraptext("spam", "eggs"))])), | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
@unittest.skip("holding this until feature/html_tags is ready") | |||||
def test_tag(self): | |||||
"""tests for building Tag nodes""" | |||||
pass | |||||
def test_integration(self): | |||||
"""a test for building a combination of templates together""" | |||||
# {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} | |||||
test = [tokens.TemplateOpen(), tokens.TemplateOpen(), | |||||
tokens.TemplateOpen(), tokens.TemplateOpen(), | |||||
tokens.Text(text="foo"), tokens.TemplateClose(), | |||||
tokens.Text(text="bar"), tokens.TemplateParamSeparator(), | |||||
tokens.Text(text="baz"), tokens.TemplateParamEquals(), | |||||
tokens.Text(text="biz"), tokens.TemplateClose(), | |||||
tokens.Text(text="buzz"), tokens.TemplateClose(), | |||||
tokens.Text(text="usr"), tokens.TemplateParamSeparator(), | |||||
tokens.TemplateOpen(), tokens.Text(text="bin"), | |||||
tokens.TemplateClose(), tokens.TemplateClose()] | |||||
valid = wrap( | |||||
[Template(wrap([Template(wrap([Template(wrap([Template(wraptext( | |||||
"foo")), Text("bar")]), params=[Parameter(wraptext("baz"), | |||||
wraptext("biz"))]), Text("buzz")])), Text("usr")]), params=[ | |||||
Parameter(wraptext("1"), wrap([Template(wraptext("bin"))]), | |||||
showkey=False)])]) | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
def test_integration2(self): | |||||
"""an even more audacious test for building a horrible wikicode mess""" | |||||
# {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j= }} | |||||
test = [tokens.TemplateOpen(), tokens.Text(text="a"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="b"), | |||||
tokens.TemplateParamSeparator(), tokens.TemplateOpen(), | |||||
tokens.Text(text="c"), tokens.TemplateParamSeparator(), | |||||
tokens.WikilinkOpen(), tokens.Text(text="d"), | |||||
tokens.WikilinkClose(), tokens.ArgumentOpen(), | |||||
tokens.Text(text="e"), tokens.ArgumentClose(), | |||||
tokens.TemplateClose(), tokens.TemplateClose(), | |||||
tokens.WikilinkOpen(), tokens.Text(text="f"), | |||||
tokens.WikilinkSeparator(), tokens.ArgumentOpen(), | |||||
tokens.Text(text="g"), tokens.ArgumentClose(), | |||||
tokens.CommentStart(), tokens.Text(text="h"), | |||||
tokens.CommentEnd(), tokens.WikilinkClose(), | |||||
tokens.TemplateOpen(), tokens.Text(text="i"), | |||||
tokens.TemplateParamSeparator(), tokens.Text(text="j"), | |||||
tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), | |||||
tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), | |||||
tokens.TemplateClose()] | |||||
valid = wrap( | |||||
[Template(wraptext("a"), params=[Parameter(wraptext("1"), wraptext( | |||||
"b"), showkey=False), Parameter(wraptext("2"), wrap([Template( | |||||
wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( | |||||
wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), | |||||
showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( | |||||
"g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[ | |||||
Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", | |||||
named=True)]))])]) | |||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,68 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import Comment | |||||
from ._test_tree_equality import TreeEqualityTestCase | |||||
class TestComment(TreeEqualityTestCase): | |||||
"""Test cases for the Comment node.""" | |||||
def test_unicode(self): | |||||
"""test Comment.__unicode__()""" | |||||
node = Comment("foobar") | |||||
self.assertEqual("<!--foobar-->", str(node)) | |||||
def test_iternodes(self): | |||||
"""test Comment.__iternodes__()""" | |||||
node = Comment("foobar") | |||||
gen = node.__iternodes__(None) | |||||
self.assertEqual((None, node), next(gen)) | |||||
self.assertRaises(StopIteration, next, gen) | |||||
def test_strip(self): | |||||
"""test Comment.__strip__()""" | |||||
node = Comment("foobar") | |||||
for a in (True, False): | |||||
for b in (True, False): | |||||
self.assertIs(None, node.__strip__(a, b)) | |||||
def test_showtree(self): | |||||
"""test Comment.__showtree__()""" | |||||
output = [] | |||||
node = Comment("foobar") | |||||
node.__showtree__(output.append, None, None) | |||||
self.assertEqual(["<!--foobar-->"], output) | |||||
def test_contents(self): | |||||
"""test getter/setter for the contents attribute""" | |||||
node = Comment("foobar") | |||||
self.assertEqual("foobar", node.contents) | |||||
node.contents = "barfoo" | |||||
self.assertEqual("barfoo", node.contents) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,48 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
try: | |||||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||||
except ImportError: | |||||
CTokenizer = None | |||||
from ._test_tokenizer import TokenizerTestCase | |||||
@unittest.skipUnless(CTokenizer, "C tokenizer not available") | |||||
class TestCTokenizer(TokenizerTestCase, unittest.TestCase): | |||||
"""Test cases for the C tokenizer.""" | |||||
@classmethod | |||||
def setUpClass(cls): | |||||
cls.tokenizer = CTokenizer | |||||
if not TokenizerTestCase.skip_others: | |||||
def test_uses_c(self): | |||||
"""make sure the C tokenizer identifies as using a C extension""" | |||||
self.assertTrue(CTokenizer.USES_C) | |||||
self.assertTrue(CTokenizer().USES_C) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,131 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import print_function, unicode_literals | |||||
import json | |||||
import unittest | |||||
import mwparserfromhell | |||||
from mwparserfromhell.compat import py3k, str | |||||
from .compat import StringIO, urlencode, urlopen | |||||
class TestDocs(unittest.TestCase): | |||||
"""Integration test cases for mwparserfromhell's documentation.""" | |||||
def assertPrint(self, input, output): | |||||
"""Assertion check that *input*, when printed, produces *output*.""" | |||||
buff = StringIO() | |||||
print(input, end="", file=buff) | |||||
buff.seek(0) | |||||
self.assertEqual(output, buff.read()) | |||||
def test_readme_1(self): | |||||
"""test a block of example code in the README""" | |||||
text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||||
wikicode = mwparserfromhell.parse(text) | |||||
self.assertPrint(wikicode, | |||||
"I has a template! {{foo|bar|baz|eggs=spam}} See it?") | |||||
templates = wikicode.filter_templates() | |||||
if py3k: | |||||
self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") | |||||
else: | |||||
self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") | |||||
template = templates[0] | |||||
self.assertPrint(template.name, "foo") | |||||
if py3k: | |||||
self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") | |||||
else: | |||||
self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") | |||||
self.assertPrint(template.get(1).value, "bar") | |||||
self.assertPrint(template.get("eggs").value, "spam") | |||||
def test_readme_2(self): | |||||
"""test a block of example code in the README""" | |||||
code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||||
if py3k: | |||||
self.assertPrint(code.filter_templates(), | |||||
"['{{foo|this {{includes a|template}}}}']") | |||||
else: | |||||
self.assertPrint(code.filter_templates(), | |||||
"[u'{{foo|this {{includes a|template}}}}']") | |||||
foo = code.filter_templates()[0] | |||||
self.assertPrint(foo.get(1).value, "this {{includes a|template}}") | |||||
self.assertPrint(foo.get(1).value.filter_templates()[0], | |||||
"{{includes a|template}}") | |||||
self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value, | |||||
"template") | |||||
def test_readme_3(self): | |||||
"""test a block of example code in the README""" | |||||
text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||||
temps = mwparserfromhell.parse(text).filter_templates(recursive=True) | |||||
if py3k: | |||||
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | |||||
else: | |||||
res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" | |||||
self.assertPrint(temps, res) | |||||
def test_readme_4(self): | |||||
"""test a block of example code in the README""" | |||||
text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||||
code = mwparserfromhell.parse(text) | |||||
for template in code.filter_templates(): | |||||
if template.name == "cleanup" and not template.has_param("date"): | |||||
template.add("date", "July 2012") | |||||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||||
self.assertPrint(code, res) | |||||
code.replace("{{uncategorized}}", "{{bar-stub}}") | |||||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" | |||||
self.assertPrint(code, res) | |||||
if py3k: | |||||
res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" | |||||
else: | |||||
res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" | |||||
self.assertPrint(code.filter_templates(), res) | |||||
text = str(code) | |||||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" | |||||
self.assertPrint(text, res) | |||||
self.assertEqual(text, code) | |||||
def test_readme_5(self): | |||||
"""test a block of example code in the README; includes a web call""" | |||||
url1 = "http://en.wikipedia.org/w/api.php" | |||||
url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" | |||||
title = "Test" | |||||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||||
"rvprop": "content", "format": "json", "titles": title} | |||||
try: | |||||
raw = urlopen(url1, urlencode(data).encode("utf8")).read() | |||||
except IOError: | |||||
self.skipTest("cannot continue because of unsuccessful web call") | |||||
res = json.loads(raw.decode("utf8")) | |||||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||||
try: | |||||
expected = urlopen(url2.format(title)).read().decode("utf8") | |||||
except IOError: | |||||
self.skipTest("cannot continue because of unsuccessful web call") | |||||
actual = mwparserfromhell.parse(text) | |||||
self.assertEqual(expected, actual) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,91 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import Heading, Text | |||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
class TestHeading(TreeEqualityTestCase): | |||||
"""Test cases for the Heading node.""" | |||||
def test_unicode(self): | |||||
"""test Heading.__unicode__()""" | |||||
node = Heading(wraptext("foobar"), 2) | |||||
self.assertEqual("==foobar==", str(node)) | |||||
node2 = Heading(wraptext(" zzz "), 5) | |||||
self.assertEqual("===== zzz =====", str(node2)) | |||||
def test_iternodes(self): | |||||
"""test Heading.__iternodes__()""" | |||||
text1, text2 = Text("foo"), Text("bar") | |||||
node = Heading(wrap([text1, text2]), 3) | |||||
gen = node.__iternodes__(getnodes) | |||||
self.assertEqual((None, node), next(gen)) | |||||
self.assertEqual((node.title, text1), next(gen)) | |||||
self.assertEqual((node.title, text2), next(gen)) | |||||
self.assertRaises(StopIteration, next, gen) | |||||
def test_strip(self): | |||||
"""test Heading.__strip__()""" | |||||
node = Heading(wraptext("foobar"), 3) | |||||
for a in (True, False): | |||||
for b in (True, False): | |||||
self.assertEqual("foobar", node.__strip__(a, b)) | |||||
def test_showtree(self): | |||||
"""test Heading.__showtree__()""" | |||||
output = [] | |||||
getter = object() | |||||
get = lambda code: output.append((getter, code)) | |||||
node1 = Heading(wraptext("foobar"), 3) | |||||
node2 = Heading(wraptext(" baz "), 4) | |||||
node1.__showtree__(output.append, get, None) | |||||
node2.__showtree__(output.append, get, None) | |||||
valid = ["===", (getter, node1.title), "===", | |||||
"====", (getter, node2.title), "===="] | |||||
self.assertEqual(valid, output) | |||||
def test_title(self): | |||||
"""test getter/setter for the title attribute""" | |||||
title = wraptext("foobar") | |||||
node = Heading(title, 3) | |||||
self.assertIs(title, node.title) | |||||
node.title = "héhehé" | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node.title) | |||||
def test_level(self): | |||||
"""test getter/setter for the level attribute""" | |||||
node = Heading(wraptext("foobar"), 3) | |||||
self.assertEqual(3, node.level) | |||||
node.level = 5 | |||||
self.assertEqual(5, node.level) | |||||
self.assertRaises(ValueError, setattr, node, "level", 0) | |||||
self.assertRaises(ValueError, setattr, node, "level", 7) | |||||
self.assertRaises(ValueError, setattr, node, "level", "abc") | |||||
self.assertRaises(ValueError, setattr, node, "level", False) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,169 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import HTMLEntity | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap | |||||
class TestHTMLEntity(TreeEqualityTestCase): | |||||
"""Test cases for the HTMLEntity node.""" | |||||
def test_unicode(self): | |||||
"""test HTMLEntity.__unicode__()""" | |||||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||||
node2 = HTMLEntity("107", named=False, hexadecimal=False) | |||||
node3 = HTMLEntity("6b", named=False, hexadecimal=True) | |||||
node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X") | |||||
self.assertEqual(" ", str(node1)) | |||||
self.assertEqual("k", str(node2)) | |||||
self.assertEqual("k", str(node3)) | |||||
self.assertEqual("l", str(node4)) | |||||
def test_iternodes(self): | |||||
"""test HTMLEntity.__iternodes__()""" | |||||
node = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||||
gen = node.__iternodes__(None) | |||||
self.assertEqual((None, node), next(gen)) | |||||
self.assertRaises(StopIteration, next, gen) | |||||
def test_strip(self): | |||||
"""test HTMLEntity.__strip__()""" | |||||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||||
node2 = HTMLEntity("107", named=False, hexadecimal=False) | |||||
node3 = HTMLEntity("e9", named=False, hexadecimal=True) | |||||
for a in (True, False): | |||||
self.assertEqual("\xa0", node1.__strip__(True, a)) | |||||
self.assertEqual(" ", node1.__strip__(False, a)) | |||||
self.assertEqual("k", node2.__strip__(True, a)) | |||||
self.assertEqual("k", node2.__strip__(False, a)) | |||||
self.assertEqual("é", node3.__strip__(True, a)) | |||||
self.assertEqual("é", node3.__strip__(False, a)) | |||||
def test_showtree(self): | |||||
"""test HTMLEntity.__showtree__()""" | |||||
output = [] | |||||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||||
node2 = HTMLEntity("107", named=False, hexadecimal=False) | |||||
node3 = HTMLEntity("e9", named=False, hexadecimal=True) | |||||
node1.__showtree__(output.append, None, None) | |||||
node2.__showtree__(output.append, None, None) | |||||
node3.__showtree__(output.append, None, None) | |||||
res = [" ", "k", "é"] | |||||
self.assertEqual(res, output) | |||||
def test_value(self): | |||||
"""test getter/setter for the value attribute""" | |||||
node1 = HTMLEntity("nbsp") | |||||
node2 = HTMLEntity("107") | |||||
node3 = HTMLEntity("e9") | |||||
self.assertEqual("nbsp", node1.value) | |||||
self.assertEqual("107", node2.value) | |||||
self.assertEqual("e9", node3.value) | |||||
node1.value = "ffa4" | |||||
node2.value = 72 | |||||
node3.value = "Sigma" | |||||
self.assertEqual("ffa4", node1.value) | |||||
self.assertFalse(node1.named) | |||||
self.assertTrue(node1.hexadecimal) | |||||
self.assertEqual("72", node2.value) | |||||
self.assertFalse(node2.named) | |||||
self.assertFalse(node2.hexadecimal) | |||||
self.assertEqual("Sigma", node3.value) | |||||
self.assertTrue(node3.named) | |||||
self.assertFalse(node3.hexadecimal) | |||||
node1.value = "10FFFF" | |||||
node2.value = 110000 | |||||
node2.value = 1114111 | |||||
self.assertRaises(ValueError, setattr, node3, "value", "") | |||||
self.assertRaises(ValueError, setattr, node3, "value", "foobar") | |||||
self.assertRaises(ValueError, setattr, node3, "value", True) | |||||
self.assertRaises(ValueError, setattr, node3, "value", -1) | |||||
self.assertRaises(ValueError, setattr, node1, "value", 110000) | |||||
self.assertRaises(ValueError, setattr, node1, "value", "1114112") | |||||
def test_named(self): | |||||
"""test getter/setter for the named attribute""" | |||||
node1 = HTMLEntity("nbsp") | |||||
node2 = HTMLEntity("107") | |||||
node3 = HTMLEntity("e9") | |||||
self.assertTrue(node1.named) | |||||
self.assertFalse(node2.named) | |||||
self.assertFalse(node3.named) | |||||
node1.named = 1 | |||||
node2.named = 0 | |||||
node3.named = 0 | |||||
self.assertTrue(node1.named) | |||||
self.assertFalse(node2.named) | |||||
self.assertFalse(node3.named) | |||||
self.assertRaises(ValueError, setattr, node1, "named", False) | |||||
self.assertRaises(ValueError, setattr, node2, "named", True) | |||||
self.assertRaises(ValueError, setattr, node3, "named", True) | |||||
def test_hexadecimal(self): | |||||
"""test getter/setter for the hexadecimal attribute""" | |||||
node1 = HTMLEntity("nbsp") | |||||
node2 = HTMLEntity("107") | |||||
node3 = HTMLEntity("e9") | |||||
self.assertFalse(node1.hexadecimal) | |||||
self.assertFalse(node2.hexadecimal) | |||||
self.assertTrue(node3.hexadecimal) | |||||
node1.hexadecimal = False | |||||
node2.hexadecimal = True | |||||
node3.hexadecimal = False | |||||
self.assertFalse(node1.hexadecimal) | |||||
self.assertTrue(node2.hexadecimal) | |||||
self.assertFalse(node3.hexadecimal) | |||||
self.assertRaises(ValueError, setattr, node1, "hexadecimal", True) | |||||
def test_hex_char(self): | |||||
"""test getter/setter for the hex_char attribute""" | |||||
node1 = HTMLEntity("e9") | |||||
node2 = HTMLEntity("e9", hex_char="X") | |||||
self.assertEqual("x", node1.hex_char) | |||||
self.assertEqual("X", node2.hex_char) | |||||
node1.hex_char = "X" | |||||
node2.hex_char = "x" | |||||
self.assertEqual("X", node1.hex_char) | |||||
self.assertEqual("x", node2.hex_char) | |||||
self.assertRaises(ValueError, setattr, node1, "hex_char", 123) | |||||
self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar") | |||||
self.assertRaises(ValueError, setattr, node1, "hex_char", True) | |||||
def test_normalize(self): | |||||
"""test getter/setter for the normalize attribute""" | |||||
node1 = HTMLEntity("nbsp") | |||||
node2 = HTMLEntity("107") | |||||
node3 = HTMLEntity("e9") | |||||
node4 = HTMLEntity("1f648") | |||||
self.assertEqual("\xa0", node1.normalize()) | |||||
self.assertEqual("k", node2.normalize()) | |||||
self.assertEqual("é", node3.normalize()) | |||||
self.assertEqual("\U0001F648", node4.normalize()) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -20,100 +20,56 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | |||||
import unittest | import unittest | ||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.template import Template | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import Text | |||||
from mwparserfromhell.nodes.extras import Parameter | |||||
class TestParameter(unittest.TestCase): | |||||
def setUp(self): | |||||
self.name = "foo" | |||||
self.value1 = "bar" | |||||
self.value2 = "{{spam}}" | |||||
self.value3 = "bar{{spam}}" | |||||
self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here" | |||||
self.templates2 = [Template("spam")] | |||||
self.templates3 = [Template("spam")] | |||||
self.templates4 = [Template("eggs", [Parameter("1", "spam"), | |||||
Parameter("baz", "buz")]), | |||||
Template("goes")] | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
def test_construct(self): | |||||
Parameter(self.name, self.value1) | |||||
Parameter(self.name, self.value2, self.templates2) | |||||
Parameter(name=self.name, value=self.value3) | |||||
Parameter(name=self.name, value=self.value4, templates=self.templates4) | |||||
class TestParameter(TreeEqualityTestCase): | |||||
"""Test cases for the Parameter node extra.""" | |||||
def test_unicode(self): | |||||
"""test Parameter.__unicode__()""" | |||||
node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | |||||
self.assertEqual("foo", str(node)) | |||||
node2 = Parameter(wraptext("foo"), wraptext("bar")) | |||||
self.assertEqual("foo=bar", str(node2)) | |||||
def test_name(self): | def test_name(self): | ||||
params = [ | |||||
Parameter(self.name, self.value1), | |||||
Parameter(self.name, self.value2, self.templates2), | |||||
Parameter(name=self.name, value=self.value3), | |||||
Parameter(name=self.name, value=self.value4, | |||||
templates=self.templates4) | |||||
] | |||||
for param in params: | |||||
self.assertEqual(param.name, self.name) | |||||
"""test getter/setter for the name attribute""" | |||||
name1 = wraptext("1") | |||||
name2 = wraptext("foobar") | |||||
node1 = Parameter(name1, wraptext("foobar"), showkey=False) | |||||
node2 = Parameter(name2, wraptext("baz")) | |||||
self.assertIs(name1, node1.name) | |||||
self.assertIs(name2, node2.name) | |||||
node1.name = "héhehé" | |||||
node2.name = "héhehé" | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node1.name) | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node2.name) | |||||
def test_value(self): | def test_value(self): | ||||
tests = [ | |||||
(Parameter(self.name, self.value1), self.value1), | |||||
(Parameter(self.name, self.value2, self.templates2), self.value2), | |||||
(Parameter(name=self.name, value=self.value3), self.value3), | |||||
(Parameter(name=self.name, value=self.value4, | |||||
templates=self.templates4), self.value4) | |||||
] | |||||
for param, correct in tests: | |||||
self.assertEqual(param.value, correct) | |||||
def test_templates(self): | |||||
tests = [ | |||||
(Parameter(self.name, self.value3, self.templates3), | |||||
self.templates3), | |||||
(Parameter(name=self.name, value=self.value4, | |||||
templates=self.templates4), self.templates4) | |||||
] | |||||
for param, correct in tests: | |||||
self.assertEqual(param.templates, correct) | |||||
def test_magic(self): | |||||
params = [Parameter(self.name, self.value1), | |||||
Parameter(self.name, self.value2, self.templates2), | |||||
Parameter(self.name, self.value3, self.templates3), | |||||
Parameter(self.name, self.value4, self.templates4)] | |||||
for param in params: | |||||
self.assertEqual(repr(param), repr(param.value)) | |||||
self.assertEqual(str(param), str(param.value)) | |||||
self.assertIs(param < "eggs", param.value < "eggs") | |||||
self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}") | |||||
self.assertIs(param == "bar", param.value == "bar") | |||||
self.assertIs(param != "bar", param.value != "bar") | |||||
self.assertIs(param > "eggs", param.value > "eggs") | |||||
self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}") | |||||
self.assertEquals(bool(param), bool(param.value)) | |||||
self.assertEquals(len(param), len(param.value)) | |||||
self.assertEquals(list(param), list(param.value)) | |||||
self.assertEquals(param[2], param.value[2]) | |||||
self.assertEquals(list(reversed(param)), | |||||
list(reversed(param.value))) | |||||
self.assertIs("bar" in param, "bar" in param.value) | |||||
self.assertEquals(param + "test", param.value + "test") | |||||
self.assertEquals("test" + param, "test" + param.value) | |||||
# add param | |||||
# add template left | |||||
# add template right | |||||
self.assertEquals(param * 3, Parameter(param.name, param.value * 3, | |||||
param.templates * 3)) | |||||
self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, | |||||
3 * param.templates)) | |||||
"""test getter/setter for the value attribute""" | |||||
value = wraptext("bar") | |||||
node = Parameter(wraptext("foo"), value) | |||||
self.assertIs(value, node.value) | |||||
node.value = "héhehé" | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node.value) | |||||
# add param inplace | |||||
# add template implace | |||||
# add str inplace | |||||
# multiply int inplace | |||||
self.assertIsInstance(param, Parameter) | |||||
self.assertIsInstance(param.value, str) | |||||
def test_showkey(self): | |||||
"""test getter/setter for the showkey attribute""" | |||||
node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | |||||
node2 = Parameter(wraptext("foo"), wraptext("bar")) | |||||
self.assertFalse(node1.showkey) | |||||
self.assertTrue(node2.showkey) | |||||
node1.showkey = True | |||||
node2.showkey = "" | |||||
self.assertTrue(node1.showkey) | |||||
self.assertFalse(node2.showkey) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -20,44 +20,47 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | |||||
import unittest | import unittest | ||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.parser import Parser | |||||
from mwparserfromhell.template import Template | |||||
from mwparserfromhell import parser | |||||
from mwparserfromhell.nodes import Template, Text, Wikilink | |||||
from mwparserfromhell.nodes.extras import Parameter | |||||
TESTS = [ | |||||
("", []), | |||||
("abcdef ghijhk", []), | |||||
("abc{this is not a template}def", []), | |||||
("neither is {{this one}nor} {this one {despite}} containing braces", []), | |||||
("this is an acceptable {{template}}", [Template("template")]), | |||||
("{{multiple}}{{templates}}", [Template("multiple"), | |||||
Template("templates")]), | |||||
("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), | |||||
("{{{no templates here}}}", []), | |||||
("{ {{templates here}}}", [Template("templates here")]), | |||||
("{{{{I do not exist}}}}", []), | |||||
("{{foo|bar|baz|eggs=spam}}", | |||||
[Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), | |||||
Parameter("eggs", "spam")])]), | |||||
("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}", | |||||
[Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), | |||||
Parameter("2", "pqr"), Parameter("st", "uv"), | |||||
Parameter("3", "wx"), Parameter("4", "yz")])]), | |||||
("{{this has a|{{template}}|inside of it}}", | |||||
[Template("this has a", [Parameter("1", "{{template}}", | |||||
[Template("template")]), | |||||
Parameter("2", "inside of it")])]), | |||||
("{{{{I exist}} }}", [Template("I exist", [] )]), | |||||
("{{}}") | |||||
] | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
from .compat import range | |||||
class TestParser(unittest.TestCase): | |||||
def test_parse(self): | |||||
parser = Parser() | |||||
for unparsed, parsed in TESTS: | |||||
self.assertEqual(parser.parse(unparsed), parsed) | |||||
class TestParser(TreeEqualityTestCase): | |||||
"""Tests for the Parser class itself, which tokenizes and builds nodes.""" | |||||
def test_use_c(self): | |||||
"""make sure the correct tokenizer is used""" | |||||
if parser.use_c: | |||||
self.assertTrue(parser.Parser(None)._tokenizer.USES_C) | |||||
parser.use_c = False | |||||
self.assertFalse(parser.Parser(None)._tokenizer.USES_C) | |||||
def test_parsing(self): | |||||
"""integration test for parsing overall""" | |||||
text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" | |||||
expected = wrap([ | |||||
Text("this is text; "), | |||||
Template(wraptext("this"), [ | |||||
Parameter(wraptext("is"), wraptext("a")), | |||||
Parameter(wraptext("template"), wrap([ | |||||
Template(wraptext("with"), [ | |||||
Parameter(wraptext("1"), | |||||
wrap([Wikilink(wraptext("links"))]), | |||||
showkey=False), | |||||
Parameter(wraptext("2"), | |||||
wraptext("in"), showkey=False) | |||||
]), | |||||
Text("it") | |||||
])) | |||||
]) | |||||
]) | |||||
actual = parser.Parser(text).parse() | |||||
self.assertWikicodeEqual(expected, actual) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -0,0 +1,44 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.parser.tokenizer import Tokenizer | |||||
from ._test_tokenizer import TokenizerTestCase | |||||
class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): | |||||
"""Test cases for the Python tokenizer.""" | |||||
@classmethod | |||||
def setUpClass(cls): | |||||
cls.tokenizer = Tokenizer | |||||
if not TokenizerTestCase.skip_others: | |||||
def test_uses_c(self): | |||||
"""make sure the Python tokenizer identifies as not using C""" | |||||
self.assertFalse(Tokenizer.USES_C) | |||||
self.assertFalse(Tokenizer().USES_C) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,392 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||||
from .compat import range | |||||
class TestSmartList(unittest.TestCase): | |||||
"""Test cases for the SmartList class and its child, _ListProxy.""" | |||||
def _test_get_set_del_item(self, builder): | |||||
"""Run tests on __get/set/delitem__ of a list built with *builder*.""" | |||||
def assign(L, s1, s2, s3, val): | |||||
L[s1:s2:s3] = val | |||||
def delete(L, s1): | |||||
del L[s1] | |||||
list1 = builder([0, 1, 2, 3, "one", "two"]) | |||||
list2 = builder(list(range(10))) | |||||
self.assertEqual(1, list1[1]) | |||||
self.assertEqual("one", list1[-2]) | |||||
self.assertEqual([2, 3], list1[2:4]) | |||||
self.assertRaises(IndexError, lambda: list1[6]) | |||||
self.assertRaises(IndexError, lambda: list1[-7]) | |||||
self.assertEqual([0, 1, 2], list1[:3]) | |||||
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) | |||||
self.assertEqual([3, "one", "two"], list1[3:]) | |||||
self.assertEqual(["one", "two"], list1[-2:]) | |||||
self.assertEqual([0, 1], list1[:-4]) | |||||
self.assertEqual([], list1[6:]) | |||||
self.assertEqual([], list1[4:2]) | |||||
self.assertEqual([0, 2, "one"], list1[0:5:2]) | |||||
self.assertEqual([0, 2], list1[0:-3:2]) | |||||
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) | |||||
self.assertEqual([2, 3, "one", "two"], list1[2::]) | |||||
self.assertEqual([0, 1, 2, 3], list1[:4:]) | |||||
self.assertEqual([2, 3], list1[2:4:]) | |||||
self.assertEqual([0, 2, 4, 6, 8], list2[::2]) | |||||
self.assertEqual([2, 5, 8], list2[2::3]) | |||||
self.assertEqual([0, 3], list2[:6:3]) | |||||
self.assertEqual([2, 5, 8], list2[-8:9:3]) | |||||
self.assertEqual([], list2[100000:1000:-100]) | |||||
list1[3] = 100 | |||||
self.assertEqual(100, list1[3]) | |||||
list1[-3] = 101 | |||||
self.assertEqual([0, 1, 2, 101, "one", "two"], list1) | |||||
list1[5:] = [6, 7, 8] | |||||
self.assertEqual([6, 7, 8], list1[5:]) | |||||
self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) | |||||
list1[2:4] = [-1, -2, -3, -4, -5] | |||||
self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) | |||||
list1[0:-3] = [99] | |||||
self.assertEqual([99, 6, 7, 8], list1) | |||||
list2[0:6:2] = [100, 102, 104] | |||||
self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) | |||||
list2[::3] = [200, 203, 206, 209] | |||||
self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) | |||||
list2[::] = range(7) | |||||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) | |||||
self.assertRaises(ValueError, assign, list2, 0, 5, 2, | |||||
[100, 102, 104, 106]) | |||||
del list2[2] | |||||
self.assertEqual([0, 1, 3, 4, 5, 6], list2) | |||||
del list2[-3] | |||||
self.assertEqual([0, 1, 3, 5, 6], list2) | |||||
self.assertRaises(IndexError, delete, list2, 100) | |||||
self.assertRaises(IndexError, delete, list2, -6) | |||||
list2[:] = range(10) | |||||
del list2[3:6] | |||||
self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) | |||||
del list2[-2:] | |||||
self.assertEqual([0, 1, 2, 6, 7], list2) | |||||
del list2[:2] | |||||
self.assertEqual([2, 6, 7], list2) | |||||
list2[:] = range(10) | |||||
del list2[2:8:2] | |||||
self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) | |||||
def _test_add_radd_iadd(self, builder): | |||||
"""Run tests on __r/i/add__ of a list built with *builder*.""" | |||||
list1 = builder(range(5)) | |||||
list2 = builder(range(5, 10)) | |||||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) | |||||
self.assertEqual([0, 1, 2, 3, 4], list1) | |||||
self.assertEqual(list(range(10)), list1 + list2) | |||||
self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) | |||||
self.assertEqual([0, 1, 2, 3, 4], list1) | |||||
list1 += ["foo", "bar", "baz"] | |||||
self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) | |||||
def _test_other_magic_methods(self, builder): | |||||
"""Run tests on other magic methods of a list built with *builder*.""" | |||||
list1 = builder([0, 1, 2, 3, "one", "two"]) | |||||
list2 = builder([]) | |||||
list3 = builder([0, 2, 3, 4]) | |||||
list4 = builder([0, 1, 2]) | |||||
if py3k: | |||||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) | |||||
self.assertEqual(b"\x00\x01\x02", bytes(list4)) | |||||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) | |||||
else: | |||||
self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) | |||||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) | |||||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) | |||||
self.assertTrue(list1 < list3) | |||||
self.assertTrue(list1 <= list3) | |||||
self.assertFalse(list1 == list3) | |||||
self.assertTrue(list1 != list3) | |||||
self.assertFalse(list1 > list3) | |||||
self.assertFalse(list1 >= list3) | |||||
other1 = [0, 2, 3, 4] | |||||
self.assertTrue(list1 < other1) | |||||
self.assertTrue(list1 <= other1) | |||||
self.assertFalse(list1 == other1) | |||||
self.assertTrue(list1 != other1) | |||||
self.assertFalse(list1 > other1) | |||||
self.assertFalse(list1 >= other1) | |||||
other2 = [0, 0, 1, 2] | |||||
self.assertFalse(list1 < other2) | |||||
self.assertFalse(list1 <= other2) | |||||
self.assertFalse(list1 == other2) | |||||
self.assertTrue(list1 != other2) | |||||
self.assertTrue(list1 > other2) | |||||
self.assertTrue(list1 >= other2) | |||||
other3 = [0, 1, 2, 3, "one", "two"] | |||||
self.assertFalse(list1 < other3) | |||||
self.assertTrue(list1 <= other3) | |||||
self.assertTrue(list1 == other3) | |||||
self.assertFalse(list1 != other3) | |||||
self.assertFalse(list1 > other3) | |||||
self.assertTrue(list1 >= other3) | |||||
self.assertTrue(bool(list1)) | |||||
self.assertFalse(bool(list2)) | |||||
self.assertEqual(6, len(list1)) | |||||
self.assertEqual(0, len(list2)) | |||||
out = [] | |||||
for obj in list1: | |||||
out.append(obj) | |||||
self.assertEqual([0, 1, 2, 3, "one", "two"], out) | |||||
out = [] | |||||
for ch in list2: | |||||
out.append(ch) | |||||
self.assertEqual([], out) | |||||
gen1 = iter(list1) | |||||
out = [] | |||||
for i in range(len(list1)): | |||||
out.append(next(gen1)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertEqual([0, 1, 2, 3, "one", "two"], out) | |||||
gen2 = iter(list2) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | |||||
self.assertEqual([], list(reversed(list2))) | |||||
self.assertTrue("one" in list1) | |||||
self.assertTrue(3 in list1) | |||||
self.assertFalse(10 in list1) | |||||
self.assertFalse(0 in list2) | |||||
self.assertEqual([], list2 * 5) | |||||
self.assertEqual([], 5 * list2) | |||||
self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) | |||||
self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) | |||||
list4 *= 2 | |||||
self.assertEqual([0, 1, 2, 0, 1, 2], list4) | |||||
def _test_list_methods(self, builder): | |||||
"""Run tests on the public methods of a list built with *builder*.""" | |||||
list1 = builder(range(5)) | |||||
list2 = builder(["foo"]) | |||||
list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) | |||||
list1.append(5) | |||||
list1.append(1) | |||||
list1.append(2) | |||||
self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) | |||||
self.assertEqual(0, list1.count(6)) | |||||
self.assertEqual(2, list1.count(1)) | |||||
list1.extend(range(5, 8)) | |||||
self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) | |||||
self.assertEqual(1, list1.index(1)) | |||||
self.assertEqual(6, list1.index(1, 3)) | |||||
self.assertEqual(6, list1.index(1, 3, 7)) | |||||
self.assertRaises(ValueError, list1.index, 1, 3, 5) | |||||
list1.insert(0, -1) | |||||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) | |||||
list1.insert(-1, 6.5) | |||||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) | |||||
list1.insert(13, 8) | |||||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) | |||||
self.assertEqual(8, list1.pop()) | |||||
self.assertEqual(7, list1.pop()) | |||||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) | |||||
self.assertEqual(-1, list1.pop(0)) | |||||
self.assertEqual(5, list1.pop(5)) | |||||
self.assertEqual(6.5, list1.pop(-1)) | |||||
self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) | |||||
self.assertEqual("foo", list2.pop()) | |||||
self.assertRaises(IndexError, list2.pop) | |||||
self.assertEqual([], list2) | |||||
list1.remove(6) | |||||
self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) | |||||
list1.remove(1) | |||||
self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) | |||||
list1.remove(1) | |||||
self.assertEqual([0, 2, 3, 4, 2, 5], list1) | |||||
self.assertRaises(ValueError, list1.remove, 1) | |||||
list1.reverse() | |||||
self.assertEqual([5, 2, 4, 3, 2, 0], list1) | |||||
list1.sort() | |||||
self.assertEqual([0, 2, 2, 3, 4, 5], list1) | |||||
list1.sort(reverse=True) | |||||
self.assertEqual([5, 4, 3, 2, 2, 0], list1) | |||||
if not py3k: | |||||
func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 | |||||
list1.sort(cmp=func) | |||||
self.assertEqual([3, 4, 2, 2, 5, 0], list1) | |||||
list1.sort(cmp=func, reverse=True) | |||||
self.assertEqual([0, 5, 4, 2, 2, 3], list1) | |||||
list3.sort(key=lambda i: i[1]) | |||||
self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) | |||||
list3.sort(key=lambda i: i[1], reverse=True) | |||||
self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) | |||||
def test_docs(self): | |||||
"""make sure the methods of SmartList/_ListProxy have docstrings""" | |||||
methods = ["append", "count", "extend", "index", "insert", "pop", | |||||
"remove", "reverse", "sort"] | |||||
for meth in methods: | |||||
expected = getattr(list, meth).__doc__ | |||||
smartlist_doc = getattr(SmartList, meth).__doc__ | |||||
listproxy_doc = getattr(_ListProxy, meth).__doc__ | |||||
self.assertEqual(expected, smartlist_doc) | |||||
self.assertEqual(expected, listproxy_doc) | |||||
def test_doctest(self): | |||||
"""make sure the test embedded in SmartList's docstring passes""" | |||||
parent = SmartList([0, 1, 2, 3]) | |||||
self.assertEqual([0, 1, 2, 3], parent) | |||||
child = parent[2:] | |||||
self.assertEqual([2, 3], child) | |||||
child.append(4) | |||||
self.assertEqual([2, 3, 4], child) | |||||
self.assertEqual([0, 1, 2, 3, 4], parent) | |||||
def test_parent_get_set_del(self): | |||||
"""make sure SmartList's getitem/setitem/delitem work""" | |||||
self._test_get_set_del_item(SmartList) | |||||
def test_parent_add(self): | |||||
"""make sure SmartList's add/radd/iadd work""" | |||||
self._test_add_radd_iadd(SmartList) | |||||
def test_parent_unaffected_magics(self): | |||||
"""sanity checks against SmartList features that were not modified""" | |||||
self._test_other_magic_methods(SmartList) | |||||
def test_parent_methods(self): | |||||
"""make sure SmartList's non-magic methods work, like append()""" | |||||
self._test_list_methods(SmartList) | |||||
def test_child_get_set_del(self): | |||||
"""make sure _ListProxy's getitem/setitem/delitem work""" | |||||
self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) | |||||
self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_get_set_del_item(builder) | |||||
def test_child_add(self): | |||||
"""make sure _ListProxy's add/radd/iadd work""" | |||||
self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) | |||||
self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_add_radd_iadd(builder) | |||||
def test_child_other_magics(self): | |||||
"""make sure _ListProxy's other magically implemented features work""" | |||||
self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) | |||||
self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_other_magic_methods(builder) | |||||
def test_child_methods(self): | |||||
"""make sure _ListProxy's non-magic methods work, like append()""" | |||||
self._test_list_methods(lambda L: SmartList(list(L))[:]) | |||||
self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_list_methods(builder) | |||||
def test_influence(self): | |||||
"""make sure changes are propagated from parents to children""" | |||||
parent = SmartList([0, 1, 2, 3, 4, 5]) | |||||
child1 = parent[2:] | |||||
child2 = parent[2:5] | |||||
parent.append(6) | |||||
child1.append(7) | |||||
child2.append(4.5) | |||||
self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) | |||||
self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) | |||||
self.assertEqual([2, 3, 4, 4.5], child2) | |||||
parent.insert(0, -1) | |||||
parent.insert(4, 2.5) | |||||
parent.insert(10, 6.5) | |||||
self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) | |||||
self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) | |||||
self.assertEqual([2, 2.5, 3, 4, 4.5], child2) | |||||
self.assertEqual(7, parent.pop()) | |||||
self.assertEqual(6.5, child1.pop()) | |||||
self.assertEqual(4.5, child2.pop()) | |||||
self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) | |||||
self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) | |||||
self.assertEqual([2, 2.5, 3, 4], child2) | |||||
parent.remove(-1) | |||||
child1.remove(2.5) | |||||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) | |||||
self.assertEqual([2, 3, 4, 5, 6], child1) | |||||
self.assertEqual([2, 3, 4], child2) | |||||
self.assertEqual(0, parent.pop(0)) | |||||
self.assertEqual([1, 2, 3, 4, 5, 6], parent) | |||||
self.assertEqual([2, 3, 4, 5, 6], child1) | |||||
self.assertEqual([2, 3, 4], child2) | |||||
child2.reverse() | |||||
self.assertEqual([1, 4, 3, 2, 5, 6], parent) | |||||
self.assertEqual([4, 3, 2, 5, 6], child1) | |||||
self.assertEqual([4, 3, 2], child2) | |||||
parent.extend([7, 8]) | |||||
child1.extend([8.1, 8.2]) | |||||
child2.extend([1.9, 1.8]) | |||||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,435 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
from sys import getdefaultencoding | |||||
from types import GeneratorType | |||||
import unittest | |||||
from mwparserfromhell.compat import bytes, py3k, str | |||||
from mwparserfromhell.string_mixin import StringMixIn | |||||
from .compat import range | |||||
class _FakeString(StringMixIn): | |||||
def __init__(self, data): | |||||
self._data = data | |||||
def __unicode__(self): | |||||
return self._data | |||||
class TestStringMixIn(unittest.TestCase): | |||||
"""Test cases for the StringMixIn class.""" | |||||
def test_docs(self): | |||||
"""make sure the various methods of StringMixIn have docstrings""" | |||||
methods = [ | |||||
"capitalize", "center", "count", "encode", "endswith", | |||||
"expandtabs", "find", "format", "index", "isalnum", "isalpha", | |||||
"isdecimal", "isdigit", "islower", "isnumeric", "isspace", | |||||
"istitle", "isupper", "join", "ljust", "lower", "lstrip", | |||||
"partition", "replace", "rfind", "rindex", "rjust", "rpartition", | |||||
"rsplit", "rstrip", "split", "splitlines", "startswith", "strip", | |||||
"swapcase", "title", "translate", "upper", "zfill"] | |||||
if py3k: | |||||
methods.extend(["casefold", "format_map", "isidentifier", | |||||
"isprintable", "maketrans"]) | |||||
else: | |||||
methods.append("decode") | |||||
for meth in methods: | |||||
expected = getattr(str, meth).__doc__ | |||||
actual = getattr(StringMixIn, meth).__doc__ | |||||
self.assertEqual(expected, actual) | |||||
def test_types(self): | |||||
"""make sure StringMixIns convert to different types correctly""" | |||||
fstr = _FakeString("fake string") | |||||
self.assertEqual(str(fstr), "fake string") | |||||
self.assertEqual(bytes(fstr), b"fake string") | |||||
if py3k: | |||||
self.assertEqual(repr(fstr), "'fake string'") | |||||
else: | |||||
self.assertEqual(repr(fstr), b"u'fake string'") | |||||
self.assertIsInstance(str(fstr), str) | |||||
self.assertIsInstance(bytes(fstr), bytes) | |||||
if py3k: | |||||
self.assertIsInstance(repr(fstr), str) | |||||
else: | |||||
self.assertIsInstance(repr(fstr), bytes) | |||||
def test_comparisons(self): | |||||
"""make sure comparison operators work""" | |||||
str1 = _FakeString("this is a fake string") | |||||
str2 = _FakeString("this is a fake string") | |||||
str3 = _FakeString("fake string, this is") | |||||
str4 = "this is a fake string" | |||||
str5 = "fake string, this is" | |||||
self.assertFalse(str1 > str2) | |||||
self.assertTrue(str1 >= str2) | |||||
self.assertTrue(str1 == str2) | |||||
self.assertFalse(str1 != str2) | |||||
self.assertFalse(str1 < str2) | |||||
self.assertTrue(str1 <= str2) | |||||
self.assertTrue(str1 > str3) | |||||
self.assertTrue(str1 >= str3) | |||||
self.assertFalse(str1 == str3) | |||||
self.assertTrue(str1 != str3) | |||||
self.assertFalse(str1 < str3) | |||||
self.assertFalse(str1 <= str3) | |||||
self.assertFalse(str1 > str4) | |||||
self.assertTrue(str1 >= str4) | |||||
self.assertTrue(str1 == str4) | |||||
self.assertFalse(str1 != str4) | |||||
self.assertFalse(str1 < str4) | |||||
self.assertTrue(str1 <= str4) | |||||
self.assertTrue(str1 > str5) | |||||
self.assertTrue(str1 >= str5) | |||||
self.assertFalse(str1 == str5) | |||||
self.assertTrue(str1 != str5) | |||||
self.assertFalse(str1 < str5) | |||||
self.assertFalse(str1 <= str5) | |||||
def test_other_magics(self): | |||||
"""test other magically implemented features, like len() and iter()""" | |||||
str1 = _FakeString("fake string") | |||||
str2 = _FakeString("") | |||||
expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] | |||||
self.assertTrue(str1) | |||||
self.assertFalse(str2) | |||||
self.assertEqual(11, len(str1)) | |||||
self.assertEqual(0, len(str2)) | |||||
out = [] | |||||
for ch in str1: | |||||
out.append(ch) | |||||
self.assertEqual(expected, out) | |||||
out = [] | |||||
for ch in str2: | |||||
out.append(ch) | |||||
self.assertEqual([], out) | |||||
gen1 = iter(str1) | |||||
gen2 = iter(str2) | |||||
self.assertIsInstance(gen1, GeneratorType) | |||||
self.assertIsInstance(gen2, GeneratorType) | |||||
out = [] | |||||
for i in range(len(str1)): | |||||
out.append(next(gen1)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertEqual(expected, out) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) | |||||
self.assertEqual([], list(reversed(str2))) | |||||
self.assertEqual("f", str1[0]) | |||||
self.assertEqual(" ", str1[4]) | |||||
self.assertEqual("g", str1[10]) | |||||
self.assertEqual("n", str1[-2]) | |||||
self.assertRaises(IndexError, lambda: str1[11]) | |||||
self.assertRaises(IndexError, lambda: str2[0]) | |||||
self.assertTrue("k" in str1) | |||||
self.assertTrue("fake" in str1) | |||||
self.assertTrue("str" in str1) | |||||
self.assertTrue("" in str1) | |||||
self.assertTrue("" in str2) | |||||
self.assertFalse("real" in str1) | |||||
self.assertFalse("s" in str2) | |||||
def test_other_methods(self): | |||||
"""test the remaining non-magic methods of StringMixIn""" | |||||
str1 = _FakeString("fake string") | |||||
self.assertEqual("Fake string", str1.capitalize()) | |||||
self.assertEqual(" fake string ", str1.center(15)) | |||||
self.assertEqual(" fake string ", str1.center(16)) | |||||
self.assertEqual("qqfake stringqq", str1.center(15, "q")) | |||||
self.assertEqual(1, str1.count("e")) | |||||
self.assertEqual(0, str1.count("z")) | |||||
self.assertEqual(1, str1.count("r", 7)) | |||||
self.assertEqual(0, str1.count("r", 8)) | |||||
self.assertEqual(1, str1.count("r", 5, 9)) | |||||
self.assertEqual(0, str1.count("r", 5, 7)) | |||||
if not py3k: | |||||
str2 = _FakeString("fo") | |||||
self.assertEqual(str1, str1.decode()) | |||||
actual = _FakeString("\\U00010332\\U0001033f\\U00010344") | |||||
self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) | |||||
self.assertRaises(UnicodeError, str2.decode, "punycode") | |||||
self.assertEqual("", str2.decode("punycode", "ignore")) | |||||
str3 = _FakeString("𐌲𐌿𐍄") | |||||
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" | |||||
self.assertEqual(b"fake string", str1.encode()) | |||||
self.assertEqual(actual, str3.encode("utf-8")) | |||||
self.assertEqual(actual, str3.encode(encoding="utf-8")) | |||||
if getdefaultencoding() == "ascii": | |||||
self.assertRaises(UnicodeEncodeError, str3.encode) | |||||
elif getdefaultencoding() == "utf-8": | |||||
self.assertEqual(actual, str3.encode()) | |||||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") | |||||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") | |||||
if getdefaultencoding() == "ascii": | |||||
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") | |||||
elif getdefaultencoding() == "utf-8": | |||||
self.assertEqual(actual, str3.encode(errors="strict")) | |||||
self.assertEqual(b"", str3.encode("ascii", "ignore")) | |||||
if getdefaultencoding() == "ascii": | |||||
self.assertEqual(b"", str3.encode(errors="ignore")) | |||||
elif getdefaultencoding() == "utf-8": | |||||
self.assertEqual(actual, str3.encode(errors="ignore")) | |||||
self.assertTrue(str1.endswith("ing")) | |||||
self.assertFalse(str1.endswith("ingh")) | |||||
str4 = _FakeString("\tfoobar") | |||||
self.assertEqual("fake string", str1) | |||||
self.assertEqual(" foobar", str4.expandtabs()) | |||||
self.assertEqual(" foobar", str4.expandtabs(4)) | |||||
self.assertEqual(3, str1.find("e")) | |||||
self.assertEqual(-1, str1.find("z")) | |||||
self.assertEqual(7, str1.find("r", 7)) | |||||
self.assertEqual(-1, str1.find("r", 8)) | |||||
self.assertEqual(7, str1.find("r", 5, 9)) | |||||
self.assertEqual(-1, str1.find("r", 5, 7)) | |||||
str5 = _FakeString("foo{0}baz") | |||||
str6 = _FakeString("foo{abc}baz") | |||||
str7 = _FakeString("foo{0}{abc}buzz") | |||||
str8 = _FakeString("{0}{1}") | |||||
self.assertEqual("fake string", str1.format()) | |||||
self.assertEqual("foobarbaz", str5.format("bar")) | |||||
self.assertEqual("foobarbaz", str6.format(abc="bar")) | |||||
self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) | |||||
self.assertRaises(IndexError, str8.format, "abc") | |||||
if py3k: | |||||
self.assertEqual("fake string", str1.format_map({})) | |||||
self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) | |||||
self.assertRaises(ValueError, str5.format_map, {0: "abc"}) | |||||
self.assertEqual(3, str1.index("e")) | |||||
self.assertRaises(ValueError, str1.index, "z") | |||||
self.assertEqual(7, str1.index("r", 7)) | |||||
self.assertRaises(ValueError, str1.index, "r", 8) | |||||
self.assertEqual(7, str1.index("r", 5, 9)) | |||||
self.assertRaises(ValueError, str1.index, "r", 5, 7) | |||||
str9 = _FakeString("foobar") | |||||
str10 = _FakeString("foobar123") | |||||
str11 = _FakeString("foo bar") | |||||
self.assertTrue(str9.isalnum()) | |||||
self.assertTrue(str10.isalnum()) | |||||
self.assertFalse(str11.isalnum()) | |||||
self.assertTrue(str9.isalpha()) | |||||
self.assertFalse(str10.isalpha()) | |||||
self.assertFalse(str11.isalpha()) | |||||
str12 = _FakeString("123") | |||||
str13 = _FakeString("\u2155") | |||||
str14 = _FakeString("\u00B2") | |||||
self.assertFalse(str9.isdecimal()) | |||||
self.assertTrue(str12.isdecimal()) | |||||
self.assertFalse(str13.isdecimal()) | |||||
self.assertFalse(str14.isdecimal()) | |||||
self.assertFalse(str9.isdigit()) | |||||
self.assertTrue(str12.isdigit()) | |||||
self.assertFalse(str13.isdigit()) | |||||
self.assertTrue(str14.isdigit()) | |||||
if py3k: | |||||
self.assertTrue(str9.isidentifier()) | |||||
self.assertTrue(str10.isidentifier()) | |||||
self.assertFalse(str11.isidentifier()) | |||||
self.assertFalse(str12.isidentifier()) | |||||
str15 = _FakeString("") | |||||
str16 = _FakeString("FooBar") | |||||
self.assertTrue(str9.islower()) | |||||
self.assertFalse(str15.islower()) | |||||
self.assertFalse(str16.islower()) | |||||
self.assertFalse(str9.isnumeric()) | |||||
self.assertTrue(str12.isnumeric()) | |||||
self.assertTrue(str13.isnumeric()) | |||||
self.assertTrue(str14.isnumeric()) | |||||
if py3k: | |||||
str16B = _FakeString("\x01\x02") | |||||
self.assertTrue(str9.isprintable()) | |||||
self.assertTrue(str13.isprintable()) | |||||
self.assertTrue(str14.isprintable()) | |||||
self.assertTrue(str15.isprintable()) | |||||
self.assertFalse(str16B.isprintable()) | |||||
str17 = _FakeString(" ") | |||||
str18 = _FakeString("\t \t \r\n") | |||||
self.assertFalse(str1.isspace()) | |||||
self.assertFalse(str9.isspace()) | |||||
self.assertTrue(str17.isspace()) | |||||
self.assertTrue(str18.isspace()) | |||||
str19 = _FakeString("This Sentence Looks Like A Title") | |||||
str20 = _FakeString("This sentence doesn't LookLikeATitle") | |||||
self.assertFalse(str15.istitle()) | |||||
self.assertTrue(str19.istitle()) | |||||
self.assertFalse(str20.istitle()) | |||||
str21 = _FakeString("FOOBAR") | |||||
self.assertFalse(str9.isupper()) | |||||
self.assertFalse(str15.isupper()) | |||||
self.assertTrue(str21.isupper()) | |||||
self.assertEqual("foobar", str15.join(["foo", "bar"])) | |||||
self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) | |||||
self.assertEqual("fake string ", str1.ljust(15)) | |||||
self.assertEqual("fake string ", str1.ljust(16)) | |||||
self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) | |||||
str22 = _FakeString("ß") | |||||
self.assertEqual("", str15.lower()) | |||||
self.assertEqual("foobar", str16.lower()) | |||||
self.assertEqual("ß", str22.lower()) | |||||
if py3k: | |||||
self.assertEqual("", str15.casefold()) | |||||
self.assertEqual("foobar", str16.casefold()) | |||||
self.assertEqual("ss", str22.casefold()) | |||||
str23 = _FakeString(" fake string ") | |||||
self.assertEqual("fake string", str1.lstrip()) | |||||
self.assertEqual("fake string ", str23.lstrip()) | |||||
self.assertEqual("ke string", str1.lstrip("abcdef")) | |||||
self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) | |||||
self.assertEqual(("fake string", "", ""), str1.partition("asdf")) | |||||
str24 = _FakeString("boo foo moo") | |||||
self.assertEqual("real string", str1.replace("fake", "real")) | |||||
self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) | |||||
self.assertEqual(3, str1.rfind("e")) | |||||
self.assertEqual(-1, str1.rfind("z")) | |||||
self.assertEqual(7, str1.rfind("r", 7)) | |||||
self.assertEqual(-1, str1.rfind("r", 8)) | |||||
self.assertEqual(7, str1.rfind("r", 5, 9)) | |||||
self.assertEqual(-1, str1.rfind("r", 5, 7)) | |||||
self.assertEqual(3, str1.rindex("e")) | |||||
self.assertRaises(ValueError, str1.rindex, "z") | |||||
self.assertEqual(7, str1.rindex("r", 7)) | |||||
self.assertRaises(ValueError, str1.rindex, "r", 8) | |||||
self.assertEqual(7, str1.rindex("r", 5, 9)) | |||||
self.assertRaises(ValueError, str1.rindex, "r", 5, 7) | |||||
self.assertEqual(" fake string", str1.rjust(15)) | |||||
self.assertEqual(" fake string", str1.rjust(16)) | |||||
self.assertEqual("qqqqfake string", str1.rjust(15, "q")) | |||||
self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) | |||||
self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) | |||||
str25 = _FakeString(" this is a sentence with whitespace ") | |||||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | |||||
self.assertEqual(actual, str25.rsplit()) | |||||
self.assertEqual(actual, str25.rsplit(None)) | |||||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||||
"", "whitespace", ""] | |||||
self.assertEqual(actual, str25.rsplit(" ")) | |||||
actual = [" this is a", "sentence", "with", "whitespace"] | |||||
self.assertEqual(actual, str25.rsplit(None, 3)) | |||||
actual = [" this is a sentence with", "", "whitespace", ""] | |||||
self.assertEqual(actual, str25.rsplit(" ", 3)) | |||||
if py3k: | |||||
actual = [" this is a", "sentence", "with", "whitespace"] | |||||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||||
self.assertEqual("fake string", str1.rstrip()) | |||||
self.assertEqual(" fake string", str23.rstrip()) | |||||
self.assertEqual("fake stri", str1.rstrip("ngr")) | |||||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | |||||
self.assertEqual(actual, str25.split()) | |||||
self.assertEqual(actual, str25.split(None)) | |||||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||||
"", "whitespace", ""] | |||||
self.assertEqual(actual, str25.split(" ")) | |||||
actual = ["this", "is", "a", "sentence with whitespace "] | |||||
self.assertEqual(actual, str25.split(None, 3)) | |||||
actual = ["", "", "", "this is a sentence with whitespace "] | |||||
self.assertEqual(actual, str25.split(" ", 3)) | |||||
if py3k: | |||||
actual = ["this", "is", "a", "sentence with whitespace "] | |||||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | |||||
self.assertEqual(["lines", "of", "text", "are", "presented", "here"], | |||||
str26.splitlines()) | |||||
self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", | |||||
"presented\n", "here"], str26.splitlines(True)) | |||||
self.assertTrue(str1.startswith("fake")) | |||||
self.assertFalse(str1.startswith("faker")) | |||||
self.assertEqual("fake string", str1.strip()) | |||||
self.assertEqual("fake string", str23.strip()) | |||||
self.assertEqual("ke stri", str1.strip("abcdefngr")) | |||||
self.assertEqual("fOObAR", str16.swapcase()) | |||||
self.assertEqual("Fake String", str1.title()) | |||||
if py3k: | |||||
table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", | |||||
117: "5"}) | |||||
table2 = str.maketrans("aeiou", "12345") | |||||
table3 = str.maketrans("aeiou", "12345", "rts") | |||||
self.assertEqual("f1k2 str3ng", str1.translate(table1)) | |||||
self.assertEqual("f1k2 str3ng", str1.translate(table2)) | |||||
self.assertEqual("f1k2 3ng", str1.translate(table3)) | |||||
else: | |||||
table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} | |||||
self.assertEqual("f1k2 str3ng", str1.translate(table)) | |||||
self.assertEqual("", str15.upper()) | |||||
self.assertEqual("FOOBAR", str16.upper()) | |||||
self.assertEqual("123", str12.zfill(3)) | |||||
self.assertEqual("000123", str12.zfill(6)) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -20,87 +20,345 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from itertools import permutations | |||||
from __future__ import unicode_literals | |||||
import unittest | import unittest | ||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.template import Template | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import HTMLEntity, Template, Text | |||||
from mwparserfromhell.nodes.extras import Parameter | |||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
class TestTemplate(unittest.TestCase): | |||||
def setUp(self): | |||||
self.name = "foo" | |||||
self.bar = Parameter("1", "bar") | |||||
self.baz = Parameter("2", "baz") | |||||
self.eggs = Parameter("eggs", "spam") | |||||
self.params = [self.bar, self.baz, self.eggs] | |||||
pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | |||||
pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | |||||
def test_construct(self): | |||||
Template(self.name) | |||||
Template(self.name, self.params) | |||||
Template(name=self.name) | |||||
Template(name=self.name, params=self.params) | |||||
class TestTemplate(TreeEqualityTestCase): | |||||
"""Test cases for the Template node.""" | |||||
def test_unicode(self): | |||||
"""test Template.__unicode__()""" | |||||
node = Template(wraptext("foobar")) | |||||
self.assertEqual("{{foobar}}", str(node)) | |||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
self.assertEqual("{{foo|bar|abc=def}}", str(node2)) | |||||
def test_iternodes(self): | |||||
"""test Template.__iternodes__()""" | |||||
node1n1 = Text("foobar") | |||||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") | |||||
node2n4, node2n5 = Text("def"), Text("ghi") | |||||
node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) | |||||
node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), | |||||
showkey=True) | |||||
node1 = Template(wrap([node1n1])) | |||||
node2 = Template(wrap([node2n1]), [node2p1, node2p2]) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.name, node1n1), next(gen1)) | |||||
self.assertEqual((node2.name, node2n1), next(gen2)) | |||||
self.assertEqual((node2.params[0].value, node2n2), next(gen2)) | |||||
self.assertEqual((node2.params[1].name, node2n3), next(gen2)) | |||||
self.assertEqual((node2.params[1].value, node2n4), next(gen2)) | |||||
self.assertEqual((node2.params[1].value, node2n5), next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
def test_strip(self): | |||||
"""test Template.__strip__()""" | |||||
node1 = Template(wraptext("foobar")) | |||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
for a in (True, False): | |||||
for b in (True, False): | |||||
self.assertEqual(None, node1.__strip__(a, b)) | |||||
self.assertEqual(None, node2.__strip__(a, b)) | |||||
def test_showtree(self): | |||||
"""test Template.__showtree__()""" | |||||
output = [] | |||||
getter, marker = object(), object() | |||||
get = lambda code: output.append((getter, code)) | |||||
mark = lambda: output.append(marker) | |||||
node1 = Template(wraptext("foobar")) | |||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node1.__showtree__(output.append, get, mark) | |||||
node2.__showtree__(output.append, get, mark) | |||||
valid = [ | |||||
"{{", (getter, node1.name), "}}", "{{", (getter, node2.name), | |||||
" | ", marker, (getter, node2.params[0].name), " = ", marker, | |||||
(getter, node2.params[0].value), " | ", marker, | |||||
(getter, node2.params[1].name), " = ", marker, | |||||
(getter, node2.params[1].value), "}}"] | |||||
self.assertEqual(valid, output) | |||||
def test_name(self): | def test_name(self): | ||||
templates = [ | |||||
Template(self.name), | |||||
Template(self.name, self.params), | |||||
Template(name=self.name), | |||||
Template(name=self.name, params=self.params) | |||||
] | |||||
for template in templates: | |||||
self.assertEqual(template.name, self.name) | |||||
"""test getter/setter for the name attribute""" | |||||
name = wraptext("foobar") | |||||
node1 = Template(name) | |||||
node2 = Template(name, [pgenh("1", "bar")]) | |||||
self.assertIs(name, node1.name) | |||||
self.assertIs(name, node2.name) | |||||
node1.name = "asdf" | |||||
node2.name = "téstïng" | |||||
self.assertWikicodeEqual(wraptext("asdf"), node1.name) | |||||
self.assertWikicodeEqual(wraptext("téstïng"), node2.name) | |||||
def test_params(self): | def test_params(self): | ||||
for template in (Template(self.name), Template(name=self.name)): | |||||
self.assertEqual(template.params, []) | |||||
for template in (Template(self.name, self.params), | |||||
Template(name=self.name, params=self.params)): | |||||
self.assertEqual(template.params, self.params) | |||||
def test_getitem(self): | |||||
template = Template(name=self.name, params=self.params) | |||||
self.assertIs(template[0], self.bar) | |||||
self.assertIs(template[1], self.baz) | |||||
self.assertIs(template[2], self.eggs) | |||||
self.assertIs(template["1"], self.bar) | |||||
self.assertIs(template["2"], self.baz) | |||||
self.assertIs(template["eggs"], self.eggs) | |||||
def test_render(self): | |||||
tests = [ | |||||
(Template(self.name), "{{foo}}"), | |||||
(Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") | |||||
] | |||||
for template, rendered in tests: | |||||
self.assertEqual(template.render(), rendered) | |||||
def test_repr(self): | |||||
correct1= 'Template(name=foo, params={})' | |||||
correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})' | |||||
tests = [(Template(self.name), correct1), | |||||
(Template(self.name, self.params), correct2)] | |||||
for template, correct in tests: | |||||
self.assertEqual(repr(template), correct) | |||||
self.assertEqual(str(template), correct) | |||||
def test_cmp(self): | |||||
tmp1 = Template(self.name) | |||||
tmp2 = Template(name=self.name) | |||||
tmp3 = Template(self.name, []) | |||||
tmp4 = Template(name=self.name, params=[]) | |||||
tmp5 = Template(self.name, self.params) | |||||
tmp6 = Template(name=self.name, params=self.params) | |||||
for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2): | |||||
self.assertEqual(tmpA, tmpB) | |||||
for tmpA, tmpB in permutations((tmp5, tmp6), 2): | |||||
self.assertEqual(tmpA, tmpB) | |||||
for tmpA in (tmp5, tmp6): | |||||
for tmpB in (tmp1, tmp2, tmp3, tmp4): | |||||
self.assertNotEqual(tmpA, tmpB) | |||||
self.assertNotEqual(tmpB, tmpA) | |||||
"""test getter for the params attribute""" | |||||
node1 = Template(wraptext("foobar")) | |||||
plist = [pgenh("1", "bar"), pgens("abc", "def")] | |||||
node2 = Template(wraptext("foo"), plist) | |||||
self.assertEqual([], node1.params) | |||||
self.assertIs(plist, node2.params) | |||||
def test_has_param(self): | |||||
"""test Template.has_param()""" | |||||
node1 = Template(wraptext("foobar")) | |||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("\nabc ", "def")]) | |||||
node3 = Template(wraptext("foo"), | |||||
[pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) | |||||
node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | |||||
self.assertFalse(node1.has_param("foobar")) | |||||
self.assertTrue(node2.has_param(1)) | |||||
self.assertTrue(node2.has_param("abc")) | |||||
self.assertFalse(node2.has_param("def")) | |||||
self.assertTrue(node3.has_param("1")) | |||||
self.assertTrue(node3.has_param(" b ")) | |||||
self.assertFalse(node4.has_param("b")) | |||||
self.assertTrue(node3.has_param("b", False)) | |||||
self.assertTrue(node4.has_param("b", False)) | |||||
def test_get(self): | |||||
"""test Template.get()""" | |||||
node1 = Template(wraptext("foobar")) | |||||
node2p1 = pgenh("1", "bar") | |||||
node2p2 = pgens("abc", "def") | |||||
node2 = Template(wraptext("foo"), [node2p1, node2p2]) | |||||
node3p1 = pgens("b", "c") | |||||
node3p2 = pgens("1", "d") | |||||
node3 = Template(wraptext("foo"), [pgenh("1", "a"), node3p1, node3p2]) | |||||
node4p1 = pgens(" b", " ") | |||||
node4 = Template(wraptext("foo"), [pgenh("1", "a"), node4p1]) | |||||
self.assertRaises(ValueError, node1.get, "foobar") | |||||
self.assertIs(node2p1, node2.get(1)) | |||||
self.assertIs(node2p2, node2.get("abc")) | |||||
self.assertRaises(ValueError, node2.get, "def") | |||||
self.assertIs(node3p1, node3.get("b")) | |||||
self.assertIs(node3p2, node3.get("1")) | |||||
self.assertIs(node4p1, node4.get("b ")) | |||||
def test_add(self): | |||||
"""test Template.add()""" | |||||
node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||||
node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||||
node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||||
node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||||
node5 = Template(wraptext("a"), [pgens("b", "c"), | |||||
pgens(" d ", "e")]) | |||||
node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), | |||||
pgens("b", "e")]) | |||||
node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||||
node8p = pgenh("1", "d") | |||||
node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) | |||||
node9 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||||
node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) | |||||
node11 = Template(wraptext("a"), [pgens("b", "c")]) | |||||
node12 = Template(wraptext("a"), [pgens("b", "c")]) | |||||
node13 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node14 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||||
pgens("h ", " i\n")]) | |||||
node15 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||||
node16 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node17 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node18 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||||
pgens("h ", " i\n")]) | |||||
node19 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||||
node20 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node21 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node22 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node23 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgenh("3", "d"), pgenh("4", "e")]) | |||||
node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgens("4", "d"), pgens("5", "e")]) | |||||
node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgens("4", "d"), pgens("5", "e")]) | |||||
node27 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node28 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node29 = Template(wraptext("a"), [pgens("b", "c")]) | |||||
node30 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node31 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node32 = Template(wraptext("a"), [pgens("1", "b")]) | |||||
node33 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node34 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||||
pgens("h ", " i\n")]) | |||||
node35 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||||
node36 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) | |||||
node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("b", "f"), pgens("b", "h"), | |||||
pgens("i", "j")]) | |||||
node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("b", "f"), pgens("b", "h"), | |||||
pgens("i", "j")]) | |||||
node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
pgens("1", "c"), pgens("2", "d")]) | |||||
node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
pgenh("1", "c"), pgenh("2", "d")]) | |||||
node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("f", "g")]) | |||||
node1.add("e", "f", showkey=True) | |||||
node2.add(2, "g", showkey=False) | |||||
node3.add("e", "foo|bar", showkey=True) | |||||
node4.add("e", "f", showkey=True, before="b") | |||||
node5.add("f", "g", showkey=True, before=" d ") | |||||
node6.add("f", "g", showkey=True, before="b") | |||||
self.assertRaises(ValueError, node7.add, "e", "f", showkey=True, | |||||
before="q") | |||||
node8.add("e", "f", showkey=True, before=node8p) | |||||
node9.add("e", "f", showkey=True, before=pgenh("1", "d")) | |||||
self.assertRaises(ValueError, node10.add, "e", "f", showkey=True, | |||||
before=pgenh("1", "d")) | |||||
node11.add("d", "foo=bar", showkey=True) | |||||
node12.add("1", "foo=bar", showkey=False) | |||||
node13.add("h", "i", showkey=True) | |||||
node14.add("j", "k", showkey=True) | |||||
node15.add("h", "i", showkey=True) | |||||
node16.add("h", "i", showkey=True, preserve_spacing=False) | |||||
node17.add("h", "i", showkey=False) | |||||
node18.add("j", "k", showkey=False) | |||||
node19.add("h", "i", showkey=False) | |||||
node20.add("h", "i", showkey=False, preserve_spacing=False) | |||||
node21.add("2", "c") | |||||
node22.add("3", "c") | |||||
node23.add("c", "d") | |||||
node24.add("5", "f") | |||||
node25.add("3", "f") | |||||
node26.add("6", "f") | |||||
node27.add("c", "foo=bar") | |||||
node28.add("2", "foo=bar") | |||||
node29.add("b", "d") | |||||
node30.add("1", "foo=bar") | |||||
node31.add("1", "foo=bar", showkey=True) | |||||
node32.add("1", "foo=bar", showkey=False) | |||||
node33.add("d", "foo") | |||||
node34.add("f", "foo") | |||||
node35.add("f", "foo") | |||||
node36.add("d", "foo", preserve_spacing=False) | |||||
node37.add("b", "k") | |||||
node38.add("1", "e") | |||||
node39.add("1", "e") | |||||
node40.add("d", "h", before="b") | |||||
self.assertEqual("{{a|b=c|d|e=f}}", node1) | |||||
self.assertEqual("{{a|b=c|d|g}}", node2) | |||||
self.assertEqual("{{a|b=c|d|e=foo|bar}}", node3) | |||||
self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|e=f|b=c|d}}", node4) | |||||
self.assertEqual("{{a|b=c|f=g| d =e}}", node5) | |||||
self.assertEqual("{{a|b=c|b=d|f=g|b=e}}", node6) | |||||
self.assertEqual("{{a|b=c|d}}", node7) | |||||
self.assertEqual("{{a|b=c|e=f|d}}", node8) | |||||
self.assertEqual("{{a|b=c|e=f|d}}", node9) | |||||
self.assertEqual("{{a|b=c|e}}", node10) | |||||
self.assertEqual("{{a|b=c|d=foo=bar}}", node11) | |||||
self.assertEqual("{{a|b=c|foo=bar}}", node12) | |||||
self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) | |||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) | |||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) | |||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) | |||||
self.assertEqual("{{a|b|c}}", node21) | |||||
self.assertEqual("{{a|b|3=c}}", node22) | |||||
self.assertEqual("{{a|b|c=d}}", node23) | |||||
self.assertEqual("{{a|b|c|d|e|f}}", node24) | |||||
self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25) | |||||
self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26) | |||||
self.assertEqual("{{a|b|c=foo=bar}}", node27) | |||||
self.assertEqual("{{a|b|foo=bar}}", node28) | |||||
self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|b=d}}", node29) | |||||
self.assertEqual("{{a|foo=bar}}", node30) | |||||
self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|1=foo=bar}}", node31) | |||||
self.assertEqual("{{a|foo=bar}}", node32) | |||||
self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33) | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =foo }}", node35) | |||||
self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36) | |||||
self.assertEqual("{{a|b=k|d=e|i=j}}", node37) | |||||
self.assertEqual("{{a|1=e|x=y|2=d}}", node38) | |||||
self.assertEqual("{{a|x=y|e|d}}", node39) | |||||
self.assertEqual("{{a|b=c|d=h|f=g}}", node40) | |||||
def test_remove(self): | |||||
"""test Template.remove()""" | |||||
node1 = Template(wraptext("foobar")) | |||||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), | |||||
pgens("abc", "def")]) | |||||
node3 = Template(wraptext("foo"), [pgenh("1", "bar"), | |||||
pgens("abc", "def")]) | |||||
node4 = Template(wraptext("foo"), [pgenh("1", "bar"), | |||||
pgenh("2", "baz")]) | |||||
node5 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node6 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node7 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) | |||||
node8 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) | |||||
node9 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node10 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node2.remove("1") | |||||
node2.remove("abc") | |||||
node3.remove(1, keep_field=True) | |||||
node3.remove("abc", keep_field=True) | |||||
node4.remove("1", keep_field=False) | |||||
node5.remove("a", keep_field=False) | |||||
node6.remove("a", keep_field=True) | |||||
node7.remove(1, keep_field=True) | |||||
node8.remove(1, keep_field=False) | |||||
node9.remove(1, keep_field=True) | |||||
node10.remove(1, keep_field=False) | |||||
self.assertRaises(ValueError, node1.remove, 1) | |||||
self.assertRaises(ValueError, node1.remove, "a") | |||||
self.assertRaises(ValueError, node2.remove, "1") | |||||
self.assertEqual("{{foo}}", node2) | |||||
self.assertEqual("{{foo||abc=}}", node3) | |||||
self.assertEqual("{{foo||baz}}", node4) | |||||
self.assertEqual("{{foo|b=c}}", node5) | |||||
self.assertEqual("{{foo| a=|b=c}}", node6) | |||||
self.assertEqual("{{foo|1 =|2=c}}", node7) | |||||
self.assertEqual("{{foo|2=c}}", node8) | |||||
self.assertEqual("{{foo||c}}", node9) | |||||
self.assertEqual("{{foo||c}}", node10) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -0,0 +1,75 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import Text | |||||
class TestText(unittest.TestCase): | |||||
"""Test cases for the Text node.""" | |||||
def test_unicode(self): | |||||
"""test Text.__unicode__()""" | |||||
node = Text("foobar") | |||||
self.assertEqual("foobar", str(node)) | |||||
node2 = Text("fóóbar") | |||||
self.assertEqual("fóóbar", str(node2)) | |||||
def test_iternodes(self): | |||||
"""test Text.__iternodes__()""" | |||||
node = Text("foobar") | |||||
gen = node.__iternodes__(None) | |||||
self.assertEqual((None, node), next(gen)) | |||||
self.assertRaises(StopIteration, next, gen) | |||||
def test_strip(self): | |||||
"""test Text.__strip__()""" | |||||
node = Text("foobar") | |||||
for a in (True, False): | |||||
for b in (True, False): | |||||
self.assertIs(node, node.__strip__(a, b)) | |||||
def test_showtree(self): | |||||
"""test Text.__showtree__()""" | |||||
output = [] | |||||
node1 = Text("foobar") | |||||
node2 = Text("fóóbar") | |||||
node3 = Text("𐌲𐌿𐍄") | |||||
node1.__showtree__(output.append, None, None) | |||||
node2.__showtree__(output.append, None, None) | |||||
node3.__showtree__(output.append, None, None) | |||||
res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] | |||||
self.assertEqual(res, output) | |||||
def test_value(self): | |||||
"""test getter/setter for the value attribute""" | |||||
node = Text("foobar") | |||||
self.assertEqual("foobar", node.value) | |||||
self.assertIsInstance(node.value, str) | |||||
node.value = "héhéhé" | |||||
self.assertEqual("héhéhé", node.value) | |||||
self.assertIsInstance(node.value, str) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,108 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.parser import tokens | |||||
class TestTokens(unittest.TestCase): | |||||
"""Test cases for the Token class and its subclasses.""" | |||||
def test_issubclass(self): | |||||
"""check that all classes within the tokens module are really Tokens""" | |||||
for name in tokens.__all__: | |||||
klass = getattr(tokens, name) | |||||
self.assertTrue(issubclass(klass, tokens.Token)) | |||||
self.assertIsInstance(klass(), klass) | |||||
self.assertIsInstance(klass(), tokens.Token) | |||||
def test_attributes(self): | |||||
"""check that Token attributes can be managed properly""" | |||||
token1 = tokens.Token() | |||||
token2 = tokens.Token(foo="bar", baz=123) | |||||
self.assertEqual("bar", token2.foo) | |||||
self.assertEqual(123, token2.baz) | |||||
self.assertRaises(KeyError, lambda: token1.foo) | |||||
self.assertRaises(KeyError, lambda: token2.bar) | |||||
token1.spam = "eggs" | |||||
token2.foo = "ham" | |||||
del token2.baz | |||||
self.assertEqual("eggs", token1.spam) | |||||
self.assertEqual("ham", token2.foo) | |||||
self.assertRaises(KeyError, lambda: token2.baz) | |||||
self.assertRaises(KeyError, delattr, token2, "baz") | |||||
def test_repr(self): | |||||
"""check that repr() on a Token works as expected""" | |||||
token1 = tokens.Token() | |||||
token2 = tokens.Token(foo="bar", baz=123) | |||||
token3 = tokens.Text(text="earwig" * 100) | |||||
hundredchars = ("earwig" * 100)[:97] + "..." | |||||
self.assertEqual("Token()", repr(token1)) | |||||
if py3k: | |||||
token2repr1 = "Token(foo='bar', baz=123)" | |||||
token2repr2 = "Token(baz=123, foo='bar')" | |||||
token3repr = "Text(text='" + hundredchars + "')" | |||||
else: | |||||
token2repr1 = "Token(foo=u'bar', baz=123)" | |||||
token2repr2 = "Token(baz=123, foo=u'bar')" | |||||
token3repr = "Text(text=u'" + hundredchars + "')" | |||||
token2repr = repr(token2) | |||||
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) | |||||
self.assertEqual(token3repr, repr(token3)) | |||||
def test_equality(self): | |||||
"""check that equivalent tokens are considered equal""" | |||||
token1 = tokens.Token() | |||||
token2 = tokens.Token() | |||||
token3 = tokens.Token(foo="bar", baz=123) | |||||
token4 = tokens.Text(text="asdf") | |||||
token5 = tokens.Text(text="asdf") | |||||
token6 = tokens.TemplateOpen(text="asdf") | |||||
self.assertEqual(token1, token2) | |||||
self.assertEqual(token2, token1) | |||||
self.assertEqual(token4, token5) | |||||
self.assertEqual(token5, token4) | |||||
self.assertNotEqual(token1, token3) | |||||
self.assertNotEqual(token2, token3) | |||||
self.assertNotEqual(token4, token6) | |||||
self.assertNotEqual(token5, token6) | |||||
def test_repr_equality(self): | |||||
"check that eval(repr(token)) == token" | |||||
tests = [ | |||||
tokens.Token(), | |||||
tokens.Token(foo="bar", baz=123), | |||||
tokens.Text(text="earwig") | |||||
] | |||||
for token in tests: | |||||
self.assertEqual(token, eval(repr(token), vars(tokens))) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,62 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.nodes import Template, Text | |||||
from mwparserfromhell.utils import parse_anything | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestUtils(TreeEqualityTestCase): | |||||
"""Tests for the utils module, which provides parse_anything().""" | |||||
def test_parse_anything_valid(self): | |||||
"""tests for valid input to utils.parse_anything()""" | |||||
tests = [ | |||||
(wraptext("foobar"), wraptext("foobar")), | |||||
(Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), | |||||
("fóóbar", wraptext("fóóbar")), | |||||
(b"foob\xc3\xa1r", wraptext("foobár")), | |||||
(123, wraptext("123")), | |||||
(True, wraptext("True")), | |||||
(None, wrap([])), | |||||
([Text("foo"), Text("bar"), Text("baz")], | |||||
wraptext("foo", "bar", "baz")), | |||||
([wraptext("foo"), Text("bar"), "baz", 123, 456], | |||||
wraptext("foo", "bar", "baz", "123", "456")), | |||||
([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) | |||||
] | |||||
for test, valid in tests: | |||||
self.assertWikicodeEqual(valid, parse_anything(test)) | |||||
def test_parse_anything_invalid(self): | |||||
"""tests for invalid input to utils.parse_anything()""" | |||||
self.assertRaises(ValueError, parse_anything, Ellipsis) | |||||
self.assertRaises(ValueError, parse_anything, object) | |||||
self.assertRaises(ValueError, parse_anything, object()) | |||||
self.assertRaises(ValueError, parse_anything, type) | |||||
self.assertRaises(ValueError, parse_anything, ["foo", [object]]) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,364 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from types import GeneratorType | |||||
import unittest | |||||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||||
Node, Tag, Template, Text, Wikilink) | |||||
from mwparserfromhell.smart_list import SmartList | |||||
from mwparserfromhell.wikicode import Wikicode | |||||
from mwparserfromhell import parse | |||||
from mwparserfromhell.compat import py3k, str | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestWikicode(TreeEqualityTestCase): | |||||
"""Tests for the Wikicode class, which manages a list of nodes.""" | |||||
def test_unicode(self): | |||||
"""test Wikicode.__unicode__()""" | |||||
code1 = parse("foobar") | |||||
code2 = parse("Have a {{template}} and a [[page|link]]") | |||||
self.assertEqual("foobar", str(code1)) | |||||
self.assertEqual("Have a {{template}} and a [[page|link]]", str(code2)) | |||||
def test_nodes(self): | |||||
"""test getter/setter for the nodes attribute""" | |||||
code = parse("Have a {{template}}") | |||||
self.assertEqual(["Have a ", "{{template}}"], code.nodes) | |||||
L1 = SmartList([Text("foobar"), Template(wraptext("abc"))]) | |||||
L2 = [Text("barfoo"), Template(wraptext("cba"))] | |||||
L3 = "abc{{def}}" | |||||
code.nodes = L1 | |||||
self.assertIs(L1, code.nodes) | |||||
code.nodes = L2 | |||||
self.assertIs(L2, code.nodes) | |||||
code.nodes = L3 | |||||
self.assertEqual(["abc", "{{def}}"], code.nodes) | |||||
self.assertRaises(ValueError, setattr, code, "nodes", object) | |||||
def test_get(self): | |||||
"""test Wikicode.get()""" | |||||
code = parse("Have a {{template}} and a [[page|link]]") | |||||
self.assertIs(code.nodes[0], code.get(0)) | |||||
self.assertIs(code.nodes[2], code.get(2)) | |||||
self.assertRaises(IndexError, code.get, 4) | |||||
def test_set(self): | |||||
"""test Wikicode.set()""" | |||||
code = parse("Have a {{template}} and a [[page|link]]") | |||||
code.set(1, "{{{argument}}}") | |||||
self.assertEqual("Have a {{{argument}}} and a [[page|link]]", code) | |||||
self.assertIsInstance(code.get(1), Argument) | |||||
code.set(2, None) | |||||
self.assertEqual("Have a {{{argument}}}[[page|link]]", code) | |||||
code.set(-3, "This is an ") | |||||
self.assertEqual("This is an {{{argument}}}[[page|link]]", code) | |||||
self.assertRaises(ValueError, code.set, 1, "foo {{bar}}") | |||||
self.assertRaises(IndexError, code.set, 3, "{{baz}}") | |||||
self.assertRaises(IndexError, code.set, -4, "{{baz}}") | |||||
def test_index(self): | |||||
"""test Wikicode.index()""" | |||||
code = parse("Have a {{template}} and a [[page|link]]") | |||||
self.assertEqual(0, code.index("Have a ")) | |||||
self.assertEqual(3, code.index("[[page|link]]")) | |||||
self.assertEqual(1, code.index(code.get(1))) | |||||
self.assertRaises(ValueError, code.index, "foo") | |||||
code = parse("{{foo}}{{bar|{{baz}}}}") | |||||
self.assertEqual(1, code.index("{{bar|{{baz}}}}")) | |||||
self.assertEqual(1, code.index("{{baz}}", recursive=True)) | |||||
self.assertEqual(1, code.index(code.get(1).get(1).value, | |||||
recursive=True)) | |||||
self.assertRaises(ValueError, code.index, "{{baz}}", recursive=False) | |||||
self.assertRaises(ValueError, code.index, | |||||
code.get(1).get(1).value, recursive=False) | |||||
def test_insert(self): | |||||
"""test Wikicode.insert()""" | |||||
code = parse("Have a {{template}} and a [[page|link]]") | |||||
code.insert(1, "{{{argument}}}") | |||||
self.assertEqual( | |||||
"Have a {{{argument}}}{{template}} and a [[page|link]]", code) | |||||
self.assertIsInstance(code.get(1), Argument) | |||||
code.insert(2, None) | |||||
self.assertEqual( | |||||
"Have a {{{argument}}}{{template}} and a [[page|link]]", code) | |||||
code.insert(-3, Text("foo")) | |||||
self.assertEqual( | |||||
"Have a {{{argument}}}foo{{template}} and a [[page|link]]", code) | |||||
code2 = parse("{{foo}}{{bar}}{{baz}}") | |||||
code2.insert(1, "abc{{def}}ghi[[jk]]") | |||||
self.assertEqual("{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}", code2) | |||||
self.assertEqual(["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", | |||||
"{{bar}}", "{{baz}}"], code2.nodes) | |||||
code3 = parse("{{foo}}bar") | |||||
code3.insert(1000, "[[baz]]") | |||||
code3.insert(-1000, "derp") | |||||
self.assertEqual("derp{{foo}}bar[[baz]]", code3) | |||||
def test_insert_before(self): | |||||
"""test Wikicode.insert_before()""" | |||||
code = parse("{{a}}{{b}}{{c}}{{d}}") | |||||
code.insert_before("{{b}}", "x", recursive=True) | |||||
code.insert_before("{{d}}", "[[y]]", recursive=False) | |||||
self.assertEqual("{{a}}x{{b}}{{c}}[[y]]{{d}}", code) | |||||
code.insert_before(code.get(2), "z") | |||||
self.assertEqual("{{a}}xz{{b}}{{c}}[[y]]{{d}}", code) | |||||
self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", | |||||
recursive=True) | |||||
self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", | |||||
recursive=False) | |||||
code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") | |||||
code2.insert_before(code2.get(0).params[0].value.get(0), "x", | |||||
recursive=True) | |||||
code2.insert_before("{{f}}", "y", recursive=True) | |||||
self.assertEqual("{{a|x{{b}}|{{c|d=y{{f}}}}}}", code2) | |||||
self.assertRaises(ValueError, code2.insert_before, "{{f}}", "y", | |||||
recursive=False) | |||||
def test_insert_after(self): | |||||
"""test Wikicode.insert_after()""" | |||||
code = parse("{{a}}{{b}}{{c}}{{d}}") | |||||
code.insert_after("{{b}}", "x", recursive=True) | |||||
code.insert_after("{{d}}", "[[y]]", recursive=False) | |||||
self.assertEqual("{{a}}{{b}}x{{c}}{{d}}[[y]]", code) | |||||
code.insert_after(code.get(2), "z") | |||||
self.assertEqual("{{a}}{{b}}xz{{c}}{{d}}[[y]]", code) | |||||
self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", | |||||
recursive=True) | |||||
self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", | |||||
recursive=False) | |||||
code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") | |||||
code2.insert_after(code2.get(0).params[0].value.get(0), "x", | |||||
recursive=True) | |||||
code2.insert_after("{{f}}", "y", recursive=True) | |||||
self.assertEqual("{{a|{{b}}x|{{c|d={{f}}y}}}}", code2) | |||||
self.assertRaises(ValueError, code2.insert_after, "{{f}}", "y", | |||||
recursive=False) | |||||
def test_replace(self): | |||||
"""test Wikicode.replace()""" | |||||
code = parse("{{a}}{{b}}{{c}}{{d}}") | |||||
code.replace("{{b}}", "x", recursive=True) | |||||
code.replace("{{d}}", "[[y]]", recursive=False) | |||||
self.assertEqual("{{a}}x{{c}}[[y]]", code) | |||||
code.replace(code.get(1), "z") | |||||
self.assertEqual("{{a}}z{{c}}[[y]]", code) | |||||
self.assertRaises(ValueError, code.replace, "{{r}}", "n", | |||||
recursive=True) | |||||
self.assertRaises(ValueError, code.replace, "{{r}}", "n", | |||||
recursive=False) | |||||
code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") | |||||
code2.replace(code2.get(0).params[0].value.get(0), "x", recursive=True) | |||||
code2.replace("{{f}}", "y", recursive=True) | |||||
self.assertEqual("{{a|x|{{c|d=y}}}}", code2) | |||||
self.assertRaises(ValueError, code2.replace, "y", "z", recursive=False) | |||||
def test_append(self): | |||||
"""test Wikicode.append()""" | |||||
code = parse("Have a {{template}}") | |||||
code.append("{{{argument}}}") | |||||
self.assertEqual("Have a {{template}}{{{argument}}}", code) | |||||
self.assertIsInstance(code.get(2), Argument) | |||||
code.append(None) | |||||
self.assertEqual("Have a {{template}}{{{argument}}}", code) | |||||
code.append(Text(" foo")) | |||||
self.assertEqual("Have a {{template}}{{{argument}}} foo", code) | |||||
self.assertRaises(ValueError, code.append, slice(0, 1)) | |||||
def test_remove(self): | |||||
"""test Wikicode.remove()""" | |||||
code = parse("{{a}}{{b}}{{c}}{{d}}") | |||||
code.remove("{{b}}", recursive=True) | |||||
code.remove(code.get(1), recursive=True) | |||||
self.assertEqual("{{a}}{{d}}", code) | |||||
self.assertRaises(ValueError, code.remove, "{{r}}", recursive=True) | |||||
self.assertRaises(ValueError, code.remove, "{{r}}", recursive=False) | |||||
code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") | |||||
code2.remove(code2.get(0).params[0].value.get(0), recursive=True) | |||||
code2.remove("{{f}}", recursive=True) | |||||
self.assertEqual("{{a||{{c|d={{h}}}}}}", code2) | |||||
self.assertRaises(ValueError, code2.remove, "{{h}}", recursive=False) | |||||
def test_filter_family(self): | |||||
"""test the Wikicode.i?filter() family of functions""" | |||||
def genlist(gen): | |||||
self.assertIsInstance(gen, GeneratorType) | |||||
return list(gen) | |||||
ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) | |||||
code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | |||||
for func in (code.filter, ifilter(code)): | |||||
self.assertEqual(["a", "{{b}}", "c", "[[d]]", "{{{e}}}", "{{f}}", | |||||
"[[g]]"], func()) | |||||
self.assertEqual(["{{{e}}}"], func(forcetype=Argument)) | |||||
self.assertIs(code.get(4), func(forcetype=Argument)[0]) | |||||
self.assertEqual(["a", "c"], func(forcetype=Text)) | |||||
self.assertEqual([], func(forcetype=Heading)) | |||||
self.assertRaises(TypeError, func, forcetype=True) | |||||
funcs = [ | |||||
lambda name, **kw: getattr(code, "filter_" + name)(**kw), | |||||
lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) | |||||
] | |||||
for get_filter in funcs: | |||||
self.assertEqual(["{{{e}}}"], get_filter("arguments")) | |||||
self.assertIs(code.get(4), get_filter("arguments")[0]) | |||||
self.assertEqual([], get_filter("comments")) | |||||
self.assertEqual([], get_filter("headings")) | |||||
self.assertEqual([], get_filter("html_entities")) | |||||
self.assertEqual([], get_filter("tags")) | |||||
self.assertEqual(["{{b}}", "{{f}}"], get_filter("templates")) | |||||
self.assertEqual(["a", "c"], get_filter("text")) | |||||
self.assertEqual(["[[d]]", "[[g]]"], get_filter("wikilinks")) | |||||
code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") | |||||
for func in (code2.filter, ifilter(code2)): | |||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], | |||||
func(recursive=False, forcetype=Template)) | |||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | |||||
func(recursive=True, forcetype=Template)) | |||||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") | |||||
for func in (code3.filter, ifilter(code3)): | |||||
self.assertEqual(["{{foobar}}", "{{FOO}}"], func(matches=r"foo")) | |||||
self.assertEqual(["{{foobar}}", "{{FOO}}"], | |||||
func(matches=r"^{{foo.*?}}")) | |||||
self.assertEqual(["{{foobar}}"], | |||||
func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) | |||||
self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) | |||||
self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) | |||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], | |||||
code2.filter_templates(recursive=False)) | |||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | |||||
code2.filter_templates(recursive=True)) | |||||
self.assertEqual(["{{baz}}", "{{bz}}"], | |||||
code3.filter_templates(matches=r"^{{b.*?z")) | |||||
self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) | |||||
self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0)) | |||||
self.assertRaises(TypeError, code.filter_templates, 100) | |||||
self.assertRaises(TypeError, code.filter_templates, a=42) | |||||
self.assertRaises(TypeError, code.filter_templates, forcetype=Template) | |||||
def test_get_sections(self): | |||||
"""test Wikicode.get_sections()""" | |||||
page1 = parse("") | |||||
page2 = parse("==Heading==") | |||||
page3 = parse("===Heading===\nFoo bar baz\n====Gnidaeh====\n") | |||||
p4_lead = "This is a lead.\n" | |||||
p4_IA = "=== Section I.A ===\nSection I.A [[body]].\n" | |||||
p4_IB1 = "==== Section I.B.1 ====\nSection I.B.1 body.\n\n•Some content.\n\n" | |||||
p4_IB = "=== Section I.B ===\n" + p4_IB1 | |||||
p4_I = "== Section I ==\nSection I body. {{and a|template}}\n" + p4_IA + p4_IB | |||||
p4_II = "== Section II ==\nSection II body.\n\n" | |||||
p4_IIIA1a = "===== Section III.A.1.a =====\nMore text.\n" | |||||
p4_IIIA2ai1 = "======= Section III.A.2.a.i.1 =======\nAn invalid section!" | |||||
p4_IIIA2 = "==== Section III.A.2 ====\nEven more text.\n" + p4_IIIA2ai1 | |||||
p4_IIIA = "=== Section III.A ===\nText.\n" + p4_IIIA1a + p4_IIIA2 | |||||
p4_III = "== Section III ==\n" + p4_IIIA | |||||
page4 = parse(p4_lead + p4_I + p4_II + p4_III) | |||||
self.assertEqual([], page1.get_sections()) | |||||
self.assertEqual(["", "==Heading=="], page2.get_sections()) | |||||
self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||||
"====Gnidaeh====\n"], page3.get_sections()) | |||||
self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, | |||||
p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], | |||||
page4.get_sections()) | |||||
self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) | |||||
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], | |||||
page3.get_sections(levels=(2, 3))) | |||||
self.assertEqual([], page3.get_sections(levels=[0])) | |||||
self.assertEqual(["", "====Gnidaeh====\n"], | |||||
page3.get_sections(levels=[4], include_lead=True)) | |||||
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||||
"====Gnidaeh====\n"], | |||||
page3.get_sections(include_lead=False)) | |||||
self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) | |||||
self.assertEqual([""], page2.get_sections(include_headings=False)) | |||||
self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", | |||||
"\nEven more text.\n" + p4_IIIA2ai1], | |||||
page4.get_sections(levels=[4], | |||||
include_headings=False)) | |||||
self.assertEqual([], page4.get_sections(matches=r"body")) | |||||
self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], | |||||
page4.get_sections(matches=r"Section\sI[.\s].*?")) | |||||
self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], | |||||
page4.get_sections(matches=r".*?a.*?")) | |||||
self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], | |||||
page4.get_sections(matches=r".*?a.*?", flags=re.U)) | |||||
self.assertEqual(["\nMore text.\n", "\nAn invalid section!"], | |||||
page4.get_sections(matches=r".*?a.*?", flags=re.U, | |||||
include_headings=False)) | |||||
page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") | |||||
section = page5.get_sections(matches="Foo")[0] | |||||
section.replace("\nBar\n", "\nBarf ") | |||||
section.append("{{Haha}}\n") | |||||
self.assertEqual("== Foo ==\nBarf {{Haha}}\n", section) | |||||
self.assertEqual("X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz", page5) | |||||
def test_strip_code(self): | |||||
"""test Wikicode.strip_code()""" | |||||
# Since individual nodes have test cases for their __strip__ methods, | |||||
# we're only going to do an integration test: | |||||
code = parse("Foo [[bar]]\n\n{{baz}}\n\n[[a|b]] Σ") | |||||
self.assertEqual("Foo bar\n\nb Σ", | |||||
code.strip_code(normalize=True, collapse=True)) | |||||
self.assertEqual("Foo bar\n\n\n\nb Σ", | |||||
code.strip_code(normalize=True, collapse=False)) | |||||
self.assertEqual("Foo bar\n\nb Σ", | |||||
code.strip_code(normalize=False, collapse=True)) | |||||
self.assertEqual("Foo bar\n\n\n\nb Σ", | |||||
code.strip_code(normalize=False, collapse=False)) | |||||
def test_get_tree(self): | |||||
"""test Wikicode.get_tree()""" | |||||
# Since individual nodes have test cases for their __showtree___ | |||||
# methods, and the docstring covers all possibilities for the output of | |||||
# __showtree__, we'll test it only: | |||||
code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") | |||||
expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ | |||||
"{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" | |||||
self.assertEqual(expected.expandtabs(4), code.get_tree()) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,107 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
import unittest | |||||
from mwparserfromhell.compat import str | |||||
from mwparserfromhell.nodes import Text, Wikilink | |||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
class TestWikilink(TreeEqualityTestCase): | |||||
"""Test cases for the Wikilink node.""" | |||||
def test_unicode(self): | |||||
"""test Wikilink.__unicode__()""" | |||||
node = Wikilink(wraptext("foobar")) | |||||
self.assertEqual("[[foobar]]", str(node)) | |||||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | |||||
self.assertEqual("[[foo|bar]]", str(node2)) | |||||
def test_iternodes(self): | |||||
"""test Wikilink.__iternodes__()""" | |||||
node1n1 = Text("foobar") | |||||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") | |||||
node1 = Wikilink(wrap([node1n1])) | |||||
node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.title, node1n1), next(gen1)) | |||||
self.assertEqual((node2.title, node2n1), next(gen2)) | |||||
self.assertEqual((node2.text, node2n2), next(gen2)) | |||||
self.assertEqual((node2.text, node2n3), next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | |||||
self.assertRaises(StopIteration, next, gen2) | |||||
def test_strip(self): | |||||
"""test Wikilink.__strip__()""" | |||||
node = Wikilink(wraptext("foobar")) | |||||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | |||||
for a in (True, False): | |||||
for b in (True, False): | |||||
self.assertEqual("foobar", node.__strip__(a, b)) | |||||
self.assertEqual("bar", node2.__strip__(a, b)) | |||||
def test_showtree(self): | |||||
"""test Wikilink.__showtree__()""" | |||||
output = [] | |||||
getter, marker = object(), object() | |||||
get = lambda code: output.append((getter, code)) | |||||
mark = lambda: output.append(marker) | |||||
node1 = Wikilink(wraptext("foobar")) | |||||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | |||||
node1.__showtree__(output.append, get, mark) | |||||
node2.__showtree__(output.append, get, mark) | |||||
valid = [ | |||||
"[[", (getter, node1.title), "]]", "[[", (getter, node2.title), | |||||
" | ", marker, (getter, node2.text), "]]"] | |||||
self.assertEqual(valid, output) | |||||
def test_title(self): | |||||
"""test getter/setter for the title attribute""" | |||||
title = wraptext("foobar") | |||||
node1 = Wikilink(title) | |||||
node2 = Wikilink(title, wraptext("baz")) | |||||
self.assertIs(title, node1.title) | |||||
self.assertIs(title, node2.title) | |||||
node1.title = "héhehé" | |||||
node2.title = "héhehé" | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node1.title) | |||||
self.assertWikicodeEqual(wraptext("héhehé"), node2.title) | |||||
def test_text(self): | |||||
"""test getter/setter for the text attribute""" | |||||
text = wraptext("baz") | |||||
node1 = Wikilink(wraptext("foobar")) | |||||
node2 = Wikilink(wraptext("foobar"), text) | |||||
self.assertIs(None, node1.text) | |||||
self.assertIs(text, node2.text) | |||||
node1.text = "buzz" | |||||
node2.text = None | |||||
self.assertWikicodeEqual(wraptext("buzz"), node1.text) | |||||
self.assertIs(None, node2.text) | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -0,0 +1,130 @@ | |||||
name: blank | |||||
label: argument with no content | |||||
input: "{{{}}}" | |||||
output: [ArgumentOpen(), ArgumentClose()] | |||||
--- | |||||
name: blank_with_default | |||||
label: argument with no content but a pipe | |||||
input: "{{{|}}}" | |||||
output: [ArgumentOpen(), ArgumentSeparator(), ArgumentClose()] | |||||
--- | |||||
name: basic | |||||
label: simplest type of argument | |||||
input: "{{{argument}}}" | |||||
output: [ArgumentOpen(), Text(text="argument"), ArgumentClose()] | |||||
--- | |||||
name: default | |||||
label: argument with a default value | |||||
input: "{{{foo|bar}}}" | |||||
output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar"), ArgumentClose()] | |||||
--- | |||||
name: blank_with_multiple_defaults | |||||
label: no content, multiple pipes | |||||
input: "{{{|||}}}" | |||||
output: [ArgumentOpen(), ArgumentSeparator(), Text(text="||"), ArgumentClose()] | |||||
--- | |||||
name: multiple_defaults | |||||
label: multiple values separated by pipes | |||||
input: "{{{foo|bar|baz}}}" | |||||
output: [ArgumentOpen(), Text(text="foo"), ArgumentSeparator(), Text(text="bar|baz"), ArgumentClose()] | |||||
--- | |||||
name: newline | |||||
label: newline as only content | |||||
input: "{{{\n}}}" | |||||
output: [ArgumentOpen(), Text(text="\n"), ArgumentClose()] | |||||
--- | |||||
name: right_braces | |||||
label: multiple } scattered throughout text | |||||
input: "{{{foo}b}a}r}}}" | |||||
output: [ArgumentOpen(), Text(text="foo}b}a}r"), ArgumentClose()] | |||||
--- | |||||
name: right_braces_default | |||||
label: multiple } scattered throughout text, with a default value | |||||
input: "{{{foo}b}|}a}r}}}" | |||||
output: [ArgumentOpen(), Text(text="foo}b}"), ArgumentSeparator(), Text(text="}a}r"), ArgumentClose()] | |||||
--- | |||||
name: nested | |||||
label: an argument nested within another argument | |||||
input: "{{{{{{foo}}}|{{{bar}}}}}}" | |||||
output: [ArgumentOpen(), ArgumentOpen(), Text(text="foo"), ArgumentClose(), ArgumentSeparator(), ArgumentOpen(), Text(text="bar"), ArgumentClose(), ArgumentClose()] | |||||
--- | |||||
name: invalid_braces | |||||
label: invalid argument: multiple braces that are not part of a template or argument | |||||
input: "{{{foo{{[a}}}}}" | |||||
output: [Text(text="{{{foo{{[a}}}}}")] | |||||
--- | |||||
name: incomplete_open_only | |||||
label: incomplete arguments: just an open | |||||
input: "{{{" | |||||
output: [Text(text="{{{")] | |||||
--- | |||||
name: incomplete_open_text | |||||
label: incomplete arguments: an open with some text | |||||
input: "{{{foo" | |||||
output: [Text(text="{{{foo")] | |||||
--- | |||||
name: incomplete_open_text_pipe | |||||
label: incomplete arguments: an open, text, then a pipe | |||||
input: "{{{foo|" | |||||
output: [Text(text="{{{foo|")] | |||||
--- | |||||
name: incomplete_open_pipe | |||||
label: incomplete arguments: an open, then a pipe | |||||
input: "{{{|" | |||||
output: [Text(text="{{{|")] | |||||
--- | |||||
name: incomplete_open_pipe_text | |||||
label: incomplete arguments: an open, then a pipe, then text | |||||
input: "{{{|foo" | |||||
output: [Text(text="{{{|foo")] | |||||
--- | |||||
name: incomplete_open_pipes_text | |||||
label: incomplete arguments: a pipe, then text then two pipes | |||||
input: "{{{|f||" | |||||
output: [Text(text="{{{|f||")] | |||||
--- | |||||
name: incomplete_open_partial_close | |||||
label: incomplete arguments: an open, then one right brace | |||||
input: "{{{{}" | |||||
output: [Text(text="{{{{}")] | |||||
--- | |||||
name: incomplete_preserve_previous | |||||
label: incomplete arguments: a valid argument followed by an invalid one | |||||
input: "{{{foo}}} {{{bar" | |||||
output: [ArgumentOpen(), Text(text="foo"), ArgumentClose(), Text(text=" {{{bar")] |
@@ -0,0 +1,39 @@ | |||||
name: blank | |||||
label: a blank comment | |||||
input: "<!---->" | |||||
output: [CommentStart(), CommentEnd()] | |||||
--- | |||||
name: basic | |||||
label: a basic comment | |||||
input: "<!-- comment -->" | |||||
output: [CommentStart(), Text(text=" comment "), CommentEnd()] | |||||
--- | |||||
name: tons_of_nonsense | |||||
label: a comment with tons of ignorable garbage in it | |||||
input: "<!-- foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aa<!--aa -->" | |||||
output: [CommentStart(), Text(text=" foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aa<!--aa "), CommentEnd()] | |||||
--- | |||||
name: incomplete_blank | |||||
label: a comment that doesn't close | |||||
input: "<!--" | |||||
output: [Text(text="<!--")] | |||||
--- | |||||
name: incomplete_text | |||||
label: a comment that doesn't close, with text | |||||
input: "<!-- foo" | |||||
output: [Text(text="<!-- foo")] | |||||
--- | |||||
name: incomplete_partial_close | |||||
label: a comment that doesn't close, with a partial close | |||||
input: "<!-- foo --\x01>" | |||||
output: [Text(text="<!-- foo --\x01>")] |
@@ -0,0 +1,109 @@ | |||||
name: level_1 | |||||
label: a basic level-1 heading | |||||
input: "= Heading =" | |||||
output: [HeadingStart(level=1), Text(text=" Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_2 | |||||
label: a basic level-2 heading | |||||
input: "== Heading ==" | |||||
output: [HeadingStart(level=2), Text(text=" Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_3 | |||||
label: a basic level-3 heading | |||||
input: "=== Heading ===" | |||||
output: [HeadingStart(level=3), Text(text=" Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_4 | |||||
label: a basic level-4 heading | |||||
input: "==== Heading ====" | |||||
output: [HeadingStart(level=4), Text(text=" Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_5 | |||||
label: a basic level-5 heading | |||||
input: "===== Heading =====" | |||||
output: [HeadingStart(level=5), Text(text=" Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_6 | |||||
label: a basic level-6 heading | |||||
input: "====== Heading ======" | |||||
output: [HeadingStart(level=6), Text(text=" Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_7 | |||||
label: a level-6 heading that pretends to be a level-7 heading | |||||
input: "======= Heading =======" | |||||
output: [HeadingStart(level=6), Text(text="= Heading ="), HeadingEnd()] | |||||
--- | |||||
name: level_3_2 | |||||
label: a level-2 heading that pretends to be a level-3 heading | |||||
input: "=== Heading ==" | |||||
output: [HeadingStart(level=2), Text(text="= Heading "), HeadingEnd()] | |||||
--- | |||||
name: level_4_6 | |||||
label: a level-4 heading that pretends to be a level-6 heading | |||||
input: "==== Heading ======" | |||||
output: [HeadingStart(level=4), Text(text=" Heading =="), HeadingEnd()] | |||||
--- | |||||
name: newline_before | |||||
label: a heading that starts after a newline | |||||
input: "This is some text.\n== Foobar ==\nbaz" | |||||
output: [Text(text="This is some text.\n"), HeadingStart(level=2), Text(text=" Foobar "), HeadingEnd(), Text(text="\nbaz")] | |||||
--- | |||||
name: text_after | |||||
label: text on the same line after | |||||
input: "This is some text.\n== Foobar == baz" | |||||
output: [Text(text="This is some text.\n"), HeadingStart(level=2), Text(text=" Foobar "), HeadingEnd(), Text(text=" baz")] | |||||
--- | |||||
name: invalid_text_before | |||||
label: invalid headings: text on the same line before | |||||
input: "This is some text. == Foobar ==\nbaz" | |||||
output: [Text(text="This is some text. == Foobar ==\nbaz")] | |||||
--- | |||||
name: invalid_newline_middle | |||||
label: invalid headings: newline in the middle | |||||
input: "This is some text.\n== Foo\nbar ==" | |||||
output: [Text(text="This is some text.\n== Foo\nbar ==")] | |||||
--- | |||||
name: invalid_newline_end | |||||
label: invalid headings: newline in the middle | |||||
input: "This is some text.\n=== Foo\n===" | |||||
output: [Text(text="This is some text.\n=== Foo\n===")] | |||||
--- | |||||
name: invalid_nesting | |||||
label: invalid headings: attempts at nesting | |||||
input: "== Foo === Bar === Baz ==" | |||||
output: [HeadingStart(level=2), Text(text=" Foo === Bar === Baz "), HeadingEnd()] | |||||
--- | |||||
name: incomplete | |||||
label: a heading that starts but doesn't finish | |||||
input: "Foobar. \n== Heading " | |||||
output: [Text(text="Foobar. \n== Heading ")] |
@@ -0,0 +1,144 @@ | |||||
name: named | |||||
label: a basic named HTML entity | |||||
input: " " | |||||
output: [HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_decimal | |||||
label: a basic decimal HTML entity | |||||
input: "k" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="107"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_hexadecimal_x | |||||
label: a basic hexadecimal HTML entity, using 'x' as a signal | |||||
input: "k" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="6B"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_hexadecimal_X | |||||
label: a basic hexadecimal HTML entity, using 'X' as a signal | |||||
input: "k" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="X"), Text(text="6B"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_decimal_max | |||||
label: the maximum acceptable decimal numeric entity | |||||
input: "" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="1114111"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_hex_max | |||||
label: the maximum acceptable hexadecimal numeric entity | |||||
input: "" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="10FFFF"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_zeros | |||||
label: zeros accepted at the beginning of a numeric entity | |||||
input: "k" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), Text(text="0000000107"), HTMLEntityEnd()] | |||||
--- | |||||
name: numeric_hex_zeros | |||||
label: zeros accepted at the beginning of a hex numeric entity | |||||
input: "ć" | |||||
output: [HTMLEntityStart(), HTMLEntityNumeric(), HTMLEntityHex(char="x"), Text(text="0000000107"), HTMLEntityEnd()] | |||||
--- | |||||
name: invalid_named_too_long | |||||
label: a named entity that is too long | |||||
input: "&sigmaSigma;" | |||||
output: [Text(text="&sigmaSigma;")] | |||||
--- | |||||
name: invalid_named_undefined | |||||
label: a named entity that doesn't exist | |||||
input: "&foobar;" | |||||
output: [Text(text="&foobar;")] | |||||
--- | |||||
name: invalid_named_nonascii | |||||
label: a named entity with non-ASCII characters | |||||
input: "&sígma;" | |||||
output: [Text(text="&sígma;")] | |||||
--- | |||||
name: invalid_numeric_out_of_range_1 | |||||
label: a numeric entity that is out of range: < 1 | |||||
input: "�" | |||||
output: [Text(text="�")] | |||||
--- | |||||
name: invalid_numeric_out_of_range_2 | |||||
label: a hex numeric entity that is out of range: < 1 | |||||
input: "�" | |||||
output: [Text(text="�")] | |||||
--- | |||||
name: invalid_numeric_out_of_range_3 | |||||
label: a numeric entity that is out of range: > 0x10FFFF | |||||
input: "�" | |||||
output: [Text(text="�")] | |||||
--- | |||||
name: invalid_numeric_out_of_range_4 | |||||
label: a hex numeric entity that is out of range: > 0x10FFFF | |||||
input: "�" | |||||
output: [Text(text="�")] | |||||
--- | |||||
name: invalid_partial_amp | |||||
label: invalid entities: just an ampersand | |||||
input: "&" | |||||
output: [Text(text="&")] | |||||
--- | |||||
name: invalid_partial_amp_semicolon | |||||
label: invalid entities: an ampersand and semicolon | |||||
input: "&;" | |||||
output: [Text(text="&;")] | |||||
--- | |||||
name: invalid_partial_amp_pound_semicolon | |||||
label: invalid entities: an ampersand, pound sign, and semicolon | |||||
input: "&#;" | |||||
output: [Text(text="&#;")] | |||||
--- | |||||
name: invalid_partial_amp_pound_x_semicolon | |||||
label: invalid entities: an ampersand, pound sign, x, and semicolon | |||||
input: "&#x;" | |||||
output: [Text(text="&#x;")] | |||||
--- | |||||
name: invalid_partial_amp_pound_numbers | |||||
label: invalid entities: an ampersand, pound sign, numbers | |||||
input: "{" | |||||
output: [Text(text="{")] | |||||
--- | |||||
name: invalid_partial_amp_pound_x_semicolon | |||||
label: invalid entities: an ampersand, pound sign, and x | |||||
input: "&#x" | |||||
output: [Text(text="&#x")] |
@@ -0,0 +1,46 @@ | |||||
name: empty | |||||
label: sanity check that parsing an empty string yields nothing | |||||
input: "" | |||||
output: [] | |||||
--- | |||||
name: template_argument_mix | |||||
label: an ambiguous mix of templates and arguments | |||||
input: "{{{{{{{{foo}}}}}}}}{{{{{{{bar}}baz}}}buz}}" | |||||
output: [TemplateOpen(), ArgumentOpen(), ArgumentOpen(), Text(text="foo"), ArgumentClose(), ArgumentClose(), TemplateClose(), TemplateOpen(), ArgumentOpen(), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), ArgumentClose(), Text(text="buz"), TemplateClose()] | |||||
--- | |||||
name: rich_heading | |||||
label: a heading with templates/wikilinks in it | |||||
input: "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} ==" | |||||
output: [HeadingStart(level=2), Text(text=" Head"), TemplateOpen(), Text(text="ing"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="with"), WikilinkClose(), Text(text=" "), ArgumentOpen(), Text(text="funky"), ArgumentSeparator(), TemplateOpen(), Text(text="stuf"), TemplateClose(), ArgumentClose(), Text(text=" "), HeadingEnd()] | |||||
--- | |||||
name: html_entity_with_template | |||||
label: a HTML entity with a template embedded inside | |||||
input: "&n{{bs}}p;" | |||||
output: [Text(text="&n"), TemplateOpen(), Text(text="bs"), TemplateClose(), Text(text="p;")] | |||||
--- | |||||
name: html_entity_with_comment | |||||
label: a HTML entity with a comment embedded inside | |||||
input: "&n<!--foo-->bsp;" | |||||
output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(text="bsp;")] | |||||
--- | |||||
name: wildcard | |||||
label: a wildcard assortment of various things | |||||
input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: wildcard_redux | |||||
label: an even wilder assortment of various things | |||||
input: "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j= }}" | |||||
output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()] |
@@ -0,0 +1,641 @@ | |||||
name: blank | |||||
label: template with no content | |||||
input: "{{}}" | |||||
output: [TemplateOpen(), TemplateClose()] | |||||
--- | |||||
name: blank_with_params | |||||
label: template with no content, but pipes and equal signs | |||||
input: "{{||=|}}" | |||||
output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()] | |||||
--- | |||||
name: no_params | |||||
label: simplest type of template | |||||
input: "{{template}}" | |||||
output: [TemplateOpen(), Text(text="template"), TemplateClose()] | |||||
--- | |||||
name: one_param_unnamed | |||||
label: basic template with one unnamed parameter | |||||
input: "{{foo|bar}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()] | |||||
--- | |||||
name: one_param_named | |||||
label: basic template with one named parameter | |||||
input: "{{foo|bar=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: multiple_unnamed_params | |||||
label: basic template with multiple unnamed parameters | |||||
input: "{{foo|bar|baz|biz|buzz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()] | |||||
--- | |||||
name: multiple_named_params | |||||
label: basic template with multiple named parameters | |||||
input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] | |||||
--- | |||||
name: multiple_mixed_params | |||||
label: basic template with multiple unnamed/named parameters | |||||
input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()] | |||||
--- | |||||
name: multiple_mixed_params2 | |||||
label: basic template with multiple unnamed/named parameters in another order | |||||
input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] | |||||
--- | |||||
name: nested_unnamed_param | |||||
label: nested template as an unnamed parameter | |||||
input: "{{foo|{{bar}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_named_param_value | |||||
label: nested template as a parameter value with a named parameter | |||||
input: "{{foo|bar={{baz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_named_param_name_and_value | |||||
label: nested templates as a parameter name and value | |||||
input: "{{foo|{{bar}}={{baz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start | |||||
label: nested template at the beginning of a template name | |||||
input: "{{{{foo}}bar}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_unnamed_param | |||||
label: nested template at the beginning of a template name and as an unnamed parameter | |||||
input: "{{{{foo}}bar|{{baz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_named_param_value | |||||
label: nested template at the beginning of a template name and as a parameter value with a named parameter | |||||
input: "{{{{foo}}bar|baz={{biz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_named_param_name_and_value | |||||
label: nested template at the beginning of a template name and as a parameter name and value | |||||
input: "{{{{foo}}bar|{{baz}}={{biz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_end | |||||
label: nested template at the end of a template name | |||||
input: "{{foo{{bar}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_end_unnamed_param | |||||
label: nested template at the end of a template name and as an unnamed parameter | |||||
input: "{{foo{{bar}}|{{baz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_end_named_param_value | |||||
label: nested template at the end of a template name and as a parameter value with a named parameter | |||||
input: "{{foo{{bar}}|baz={{biz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_end_named_param_name_and_value | |||||
label: nested template at the end of a template name and as a parameter name and value | |||||
input: "{{foo{{bar}}|{{baz}}={{biz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_mid | |||||
label: nested template in the middle of a template name | |||||
input: "{{foo{{bar}}baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: nested_name_mid_unnamed_param | |||||
label: nested template in the middle of a template name and as an unnamed parameter | |||||
input: "{{foo{{bar}}baz|{{biz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_mid_named_param_value | |||||
label: nested template in the middle of a template name and as a parameter value with a named parameter | |||||
input: "{{foo{{bar}}baz|biz={{buzz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_mid_named_param_name_and_value | |||||
label: nested template in the middle of a template name and as a parameter name and value | |||||
input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_end | |||||
label: nested template at the beginning and end of a template name | |||||
input: "{{{{foo}}{{bar}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_end_unnamed_param | |||||
label: nested template at the beginning and end of a template name and as an unnamed parameter | |||||
input: "{{{{foo}}{{bar}}|{{baz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_end_named_param_value | |||||
label: nested template at the beginning and end of a template name and as a parameter value with a named parameter | |||||
input: "{{{{foo}}{{bar}}|baz={{biz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_name_start_end_named_param_name_and_value | |||||
label: nested template at the beginning and end of a template name and as a parameter name and value | |||||
input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_names_multiple | |||||
label: multiple nested templates within nested templates | |||||
input: "{{{{{{{{foo}}bar}}baz}}biz}}" | |||||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()] | |||||
--- | |||||
name: nested_names_multiple_unnamed_param | |||||
label: multiple nested templates within nested templates with a nested unnamed parameter | |||||
input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_names_multiple_named_param_value | |||||
label: multiple nested templates within nested templates with a nested parameter value in a named parameter | |||||
input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: nested_names_multiple_named_param_name_and_value | |||||
label: multiple nested templates within nested templates with a nested parameter name and value | |||||
input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: mixed_nested_templates | |||||
label: mixed assortment of nested templates within template names, parameter names, and values | |||||
input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" | |||||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||||
--- | |||||
name: newlines_start | |||||
label: a newline at the start of a template name | |||||
input: "{{\nfoobar}}" | |||||
output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] | |||||
--- | |||||
name: newlines_end | |||||
label: a newline at the end of a template name | |||||
input: "{{foobar\n}}" | |||||
output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_start_end | |||||
label: a newline at the start and end of a template name | |||||
input: "{{\nfoobar\n}}" | |||||
output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_mid | |||||
label: a newline at the middle of a template name | |||||
input: "{{foo\nbar}}" | |||||
output: [Text(text="{{foo\nbar}}")] | |||||
--- | |||||
name: newlines_start_mid | |||||
label: a newline at the start and middle of a template name | |||||
input: "{{\nfoo\nbar}}" | |||||
output: [Text(text="{{\nfoo\nbar}}")] | |||||
--- | |||||
name: newlines_mid_end | |||||
label: a newline at the middle and end of a template name | |||||
input: "{{foo\nbar\n}}" | |||||
output: [Text(text="{{foo\nbar\n}}")] | |||||
--- | |||||
name: newlines_start_mid_end | |||||
label: a newline at the start, middle, and end of a template name | |||||
input: "{{\nfoo\nbar\n}}" | |||||
output: [Text(text="{{\nfoo\nbar\n}}")] | |||||
--- | |||||
name: newlines_unnamed_param | |||||
label: newlines within an unnamed template parameter | |||||
input: "{{foo|\nb\nar\n}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_enclose_template_name_unnamed_param | |||||
label: newlines enclosing a template name and within an unnamed template parameter | |||||
input: "{{\nfoo\n|\nb\nar\n}}" | |||||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_within_template_name_unnamed_param | |||||
label: newlines within a template name and within an unnamed template parameter | |||||
input: "{{\nfo\no\n|\nb\nar\n}}" | |||||
output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")] | |||||
--- | |||||
name: newlines_enclose_template_name_named_param_value | |||||
label: newlines enclosing a template name and within a named parameter value | |||||
input: "{{\nfoo\n|1=\nb\nar\n}}" | |||||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_within_template_name_named_param_value | |||||
label: newlines within a template name and within a named parameter value | |||||
input: "{{\nf\noo\n|1=\nb\nar\n}}" | |||||
output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")] | |||||
--- | |||||
name: newlines_named_param_name | |||||
label: newlines within a parameter name | |||||
input: "{{foo|\nb\nar\n=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: newlines_named_param_name_param_value | |||||
label: newlines within a parameter name and within a parameter value | |||||
input: "{{foo|\nb\nar\n=\nba\nz\n}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_enclose_template_name_named_param_name | |||||
label: newlines enclosing a template name and within a parameter name | |||||
input: "{{\nfoo\n|\nb\nar\n=baz}}" | |||||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: newlines_enclose_template_name_named_param_name_param_value | |||||
label: newlines enclosing a template name and within a parameter name and within a parameter value | |||||
input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}" | |||||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_within_template_name_named_param_name | |||||
label: newlines within a template name and within a parameter name | |||||
input: "{{\nfo\no\n|\nb\nar\n=baz}}" | |||||
output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")] | |||||
--- | |||||
name: newlines_within_template_name_named_param_name_param_value | |||||
label: newlines within a template name and within a parameter name and within a parameter value | |||||
input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}" | |||||
output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")] | |||||
--- | |||||
name: newlines_wildcard | |||||
label: a random, complex assortment of templates and newlines | |||||
input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}" | |||||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_wildcard_redux | |||||
label: an even more random and complex assortment of templates and newlines | |||||
input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" | |||||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] | |||||
--- | |||||
name: newlines_wildcard_redux_invalid | |||||
label: a variation of the newlines_wildcard_redux test that is invalid | |||||
input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" | |||||
output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")] | |||||
--- | |||||
name: invalid_name_left_brace_middle | |||||
label: invalid characters in template name: left brace in middle | |||||
input: "{{foo{bar}}" | |||||
output: [Text(text="{{foo{bar}}")] | |||||
--- | |||||
name: invalid_name_right_brace_middle | |||||
label: invalid characters in template name: right brace in middle | |||||
input: "{{foo}bar}}" | |||||
output: [Text(text="{{foo}bar}}")] | |||||
--- | |||||
name: invalid_name_left_braces | |||||
label: invalid characters in template name: two left braces in middle | |||||
input: "{{foo{b{ar}}" | |||||
output: [Text(text="{{foo{b{ar}}")] | |||||
--- | |||||
name: invalid_name_left_bracket_middle | |||||
label: invalid characters in template name: left bracket in middle | |||||
input: "{{foo[bar}}" | |||||
output: [Text(text="{{foo[bar}}")] | |||||
--- | |||||
name: invalid_name_right_bracket_middle | |||||
label: invalid characters in template name: right bracket in middle | |||||
input: "{{foo]bar}}" | |||||
output: [Text(text="{{foo]bar}}")] | |||||
--- | |||||
name: invalid_name_left_bracket_start | |||||
label: invalid characters in template name: left bracket at start | |||||
input: "{{[foobar}}" | |||||
output: [Text(text="{{[foobar}}")] | |||||
--- | |||||
name: invalid_name_right_bracket_start | |||||
label: invalid characters in template name: right bracket at end | |||||
input: "{{foobar]}}" | |||||
output: [Text(text="{{foobar]}}")] | |||||
--- | |||||
name: valid_name_left_brace_start | |||||
label: valid characters in template name: left brace at start | |||||
input: "{{{foobar}}" | |||||
output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()] | |||||
--- | |||||
name: valid_unnamed_param_left_brace | |||||
label: valid characters in unnamed template parameter: left brace | |||||
input: "{{foo|ba{r}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()] | |||||
--- | |||||
name: valid_unnamed_param_braces | |||||
label: valid characters in unnamed template parameter: left and right braces | |||||
input: "{{foo|ba{r}}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")] | |||||
--- | |||||
name: valid_param_name_braces | |||||
label: valid characters in template parameter name: left and right braces | |||||
input: "{{foo|ba{r}=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: valid_param_name_brackets | |||||
label: valid characters in unnamed template parameter: left and right brackets | |||||
input: "{{foo|ba[r]=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: valid_param_name_double_left_brackets | |||||
label: valid characters in unnamed template parameter: double left brackets | |||||
input: "{{foo|bar[[in\nvalid=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: valid_param_name_double_right_brackets | |||||
label: valid characters in unnamed template parameter: double right brackets | |||||
input: "{{foo|bar]]=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: valid_param_name_double_brackets | |||||
label: valid characters in unnamed template parameter: double left and right brackets | |||||
input: "{{foo|bar[[in\nvalid]]=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: invalid_param_name_double_left_braces | |||||
label: invalid characters in template parameter name: double left braces | |||||
input: "{{foo|bar{{in\nvalid=baz}}" | |||||
output: [Text(text="{{foo|bar{{in\nvalid=baz}}")] | |||||
--- | |||||
name: invalid_param_name_double_braces | |||||
label: invalid characters in template parameter name: double left and right braces | |||||
input: "{{foo|bar{{in\nvalid}}=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")] | |||||
--- | |||||
name: incomplete_stub | |||||
label: incomplete templates that should fail gracefully: just an opening | |||||
input: "{{" | |||||
output: [Text(text="{{")] | |||||
--- | |||||
name: incomplete_plain | |||||
label: incomplete templates that should fail gracefully: no close whatsoever | |||||
input: "{{stuff}} {{foobar" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")] | |||||
--- | |||||
name: incomplete_right_brace | |||||
label: incomplete templates that should fail gracefully: only one right brace | |||||
input: "{{stuff}} {{foobar}" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")] | |||||
--- | |||||
name: incomplete_pipe | |||||
label: incomplete templates that should fail gracefully: a pipe | |||||
input: "{{stuff}} {{foobar|" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")] | |||||
--- | |||||
name: incomplete_unnamed_param | |||||
label: incomplete templates that should fail gracefully: an unnamed parameter | |||||
input: "{{stuff}} {{foo|bar" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")] | |||||
--- | |||||
name: incomplete_unnamed_param_pipe | |||||
label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe | |||||
input: "{{stuff}} {{foo|bar|" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")] | |||||
--- | |||||
name: incomplete_valueless_param | |||||
label: incomplete templates that should fail gracefully: an a named parameter with no value | |||||
input: "{{stuff}} {{foo|bar=" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")] | |||||
--- | |||||
name: incomplete_valueless_param_pipe | |||||
label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe | |||||
input: "{{stuff}} {{foo|bar=|" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")] | |||||
--- | |||||
name: incomplete_named_param | |||||
label: incomplete templates that should fail gracefully: a named parameter with a value | |||||
input: "{{stuff}} {{foo|bar=baz" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")] | |||||
--- | |||||
name: incomplete_named_param_pipe | |||||
label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe | |||||
input: "{{stuff}} {{foo|bar=baz|" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")] | |||||
--- | |||||
name: incomplete_two_unnamed_params | |||||
label: incomplete templates that should fail gracefully: two unnamed parameters | |||||
input: "{{stuff}} {{foo|bar|baz" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")] | |||||
--- | |||||
name: incomplete_unnamed_param_valueless_param | |||||
label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value | |||||
input: "{{stuff}} {{foo|bar|baz=" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")] | |||||
--- | |||||
name: incomplete_unnamed_param_named_param | |||||
label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value | |||||
input: "{{stuff}} {{foo|bar|baz=biz" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")] | |||||
--- | |||||
name: incomplete_named_param_unnamed_param | |||||
label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter | |||||
input: "{{stuff}} {{foo|bar=baz|biz" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")] | |||||
--- | |||||
name: incomplete_named_param_valueless_param | |||||
label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value | |||||
input: "{{stuff}} {{foo|bar=baz|biz=" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")] | |||||
--- | |||||
name: incomplete_two_named_params | |||||
label: incomplete templates that should fail gracefully: two named parameters with values | |||||
input: "{{stuff}} {{foo|bar=baz|biz=buzz" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] | |||||
--- | |||||
name: incomplete_nested_template_as_unnamed_param | |||||
label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter | |||||
input: "{{stuff}} {{foo|{{bar}}" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()] | |||||
--- | |||||
name: incomplete_nested_template_as_param_value | |||||
label: incomplete templates that should fail gracefully: a valid nested template as a parameter value | |||||
input: "{{stuff}} {{foo|bar={{baz}}" | |||||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: recursion_five_hundred_opens | |||||
label: test potentially dangerous recursion: five hundred template openings, without spaces | |||||
input: "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{" | |||||
output: [Text(text="{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{")] | |||||
--- | |||||
name: recursion_one_hundred_opens | |||||
label: test potentially dangerous recursion: one hundred template openings, with spaces | |||||
input: "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{" | |||||
output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")] | |||||
--- | |||||
name: recursion_opens_and_closes | |||||
label: test potentially dangerous recursion: template openings and closings | |||||
input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" | |||||
output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] |
@@ -0,0 +1,25 @@ | |||||
name: basic | |||||
label: sanity check for basic text parsing, no gimmicks | |||||
input: "foobar" | |||||
output: [Text(text="foobar")] | |||||
--- | |||||
name: newlines | |||||
label: slightly more complex text parsing, with newlines | |||||
input: "This is a line of text.\nThis is another line of text.\nThis is another." | |||||
output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")] | |||||
--- | |||||
name: unicode | |||||
label: ensure unicode data is handled properly | |||||
input: "Thís ís å sëñtënce with diœcritiçs." | |||||
output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] | |||||
--- | |||||
name: unicode2 | |||||
label: additional unicode check for non-BMP codepoints | |||||
input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" | |||||
output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] |
@@ -0,0 +1,158 @@ | |||||
name: blank | |||||
label: wikilink with no content | |||||
input: "[[]]" | |||||
output: [WikilinkOpen(), WikilinkClose()] | |||||
--- | |||||
name: blank_with_text | |||||
label: wikilink with no content but a pipe | |||||
input: "[[|]]" | |||||
output: [WikilinkOpen(), WikilinkSeparator(), WikilinkClose()] | |||||
--- | |||||
name: basic | |||||
label: simplest type of wikilink | |||||
input: "[[wikilink]]" | |||||
output: [WikilinkOpen(), Text(text="wikilink"), WikilinkClose()] | |||||
--- | |||||
name: with_text | |||||
label: wikilink with a text value | |||||
input: "[[foo|bar]]" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar"), WikilinkClose()] | |||||
--- | |||||
name: blank_with_multiple_texts | |||||
label: no content, multiple pipes | |||||
input: "[[|||]]" | |||||
output: [WikilinkOpen(), WikilinkSeparator(), Text(text="||"), WikilinkClose()] | |||||
--- | |||||
name: multiple_texts | |||||
label: multiple text values separated by pipes | |||||
input: "[[foo|bar|baz]]" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar|baz"), WikilinkClose()] | |||||
--- | |||||
name: nested | |||||
label: a wikilink nested within the value of another | |||||
input: "[[foo|[[bar]]]]" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] | |||||
--- | |||||
name: nested_with_text | |||||
label: a wikilink nested within the value of another, separated by other data | |||||
input: "[[foo|a[[b]]c]]" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] | |||||
--- | |||||
name: invalid_newline | |||||
label: invalid wikilink: newline as only content | |||||
input: "[[\n]]" | |||||
output: [Text(text="[[\n]]")] | |||||
--- | |||||
name: invalid_right_brace | |||||
label: invalid wikilink: right brace | |||||
input: "[[foo}b}a}r]]" | |||||
output: [Text(text="[[foo}b}a}r]]")] | |||||
--- | |||||
name: invalid_left_brace | |||||
label: invalid wikilink: left brace | |||||
input: "[[foo{{[a}}]]" | |||||
output: [Text(text="[[foo{{[a}}]]")] | |||||
--- | |||||
name: invalid_right_bracket | |||||
label: invalid wikilink: right bracket | |||||
input: "[[foo]bar]]" | |||||
output: [Text(text="[[foo]bar]]")] | |||||
--- | |||||
name: invalid_left_bracket | |||||
label: invalid wikilink: left bracket | |||||
input: "[[foo[bar]]" | |||||
output: [Text(text="[[foo[bar]]")] | |||||
--- | |||||
name: invalid_nested | |||||
label: invalid wikilink: trying to nest in the wrong context | |||||
input: "[[foo[[bar]]]]" | |||||
output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] | |||||
--- | |||||
name: invalid_nested_text | |||||
label: invalid wikilink: trying to nest in the wrong context, with a text param | |||||
input: "[[foo[[bar]]|baz]]" | |||||
output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="|baz]]")] | |||||
--- | |||||
name: incomplete_open_only | |||||
label: incomplete wikilinks: just an open | |||||
input: "[[" | |||||
output: [Text(text="[[")] | |||||
--- | |||||
name: incomplete_open_text | |||||
label: incomplete wikilinks: an open with some text | |||||
input: "[[foo" | |||||
output: [Text(text="[[foo")] | |||||
--- | |||||
name: incomplete_open_text_pipe | |||||
label: incomplete wikilinks: an open, text, then a pipe | |||||
input: "[[foo|" | |||||
output: [Text(text="[[foo|")] | |||||
--- | |||||
name: incomplete_open_pipe | |||||
label: incomplete wikilinks: an open, then a pipe | |||||
input: "[[|" | |||||
output: [Text(text="[[|")] | |||||
--- | |||||
name: incomplete_open_pipe_text | |||||
label: incomplete wikilinks: an open, then a pipe, then text | |||||
input: "[[|foo" | |||||
output: [Text(text="[[|foo")] | |||||
--- | |||||
name: incomplete_open_pipes_text | |||||
label: incomplete wikilinks: a pipe, then text then two pipes | |||||
input: "[[|f||" | |||||
output: [Text(text="[[|f||")] | |||||
--- | |||||
name: incomplete_open_partial_close | |||||
label: incomplete wikilinks: an open, then one right brace | |||||
input: "[[{}" | |||||
output: [Text(text="[[{}")] | |||||
--- | |||||
name: incomplete_preserve_previous | |||||
label: incomplete wikilinks: a valid wikilink followed by an invalid one | |||||
input: "[[foo]] [[bar" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkClose(), Text(text=" [[bar")] |