diff --git a/.travis.yml b/.travis.yml index 347badd..31090f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ language: python python: + - "2.6" - "2.7" - "3.2" - "3.3" diff --git a/CHANGELOG b/CHANGELOG index 99eff38..9faf6b7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,21 @@ +v0.3.3 (released April 22, 2014): + +- Added support for Python 2.6 and 3.4. +- Template.has() is now passed 'ignore_empty=False' by default instead of True. + This fixes a bug when adding parameters to templates with empty fields, and + is a breaking change if you rely on the default behavior. +- The 'matches' argument of Wikicode's filter methods now accepts a function + (taking one argument, a Node, and returning a bool) in addition to a regex. +- Re-added 'flat' argument to Wikicode.get_sections(), fixed the order in which + it returns sections, and made it faster. +- Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects + instead of just a single string or Wikicode. +- Given the frequency of issues with the (admittedly insufficient) tag parser, + there's a temporary skip_style_tags argument to parse() that ignores '' and + ''' until these issues are corrected. +- Fixed a parser bug involving nested wikilinks and external links. +- C code cleanup and speed improvements. + v0.3.2 (released September 1, 2013): - Added support for Python 3.2 (along with current support for 3.3 and 2.7). diff --git a/LICENSE b/LICENSE index 413f1c4..327905b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2014 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/changelog.rst b/docs/changelog.rst index e72baef..9efc022 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,32 @@ Changelog ========= +v0.3.3 +------ + +`Released April 22, 2014 `_ +(`changes `__): + +- Added support for Python 2.6 and 3.4. +- :py:meth:`.Template.has` is now passed *ignore_empty=False* by default + instead of *True*. This fixes a bug when adding parameters to templates with + empty fields, **and is a breaking change if you rely on the default + behavior.** +- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` + :py:meth:`.filter` methods now accepts a function (taking one argument, a + :py:class:`.Node`, and returning a bool) in addition to a regex. +- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the + order in which it returns sections, and made it faster. +- :py:meth:`.Wikicode.matches` now accepts a tuple or list of + strings/:py:class:`.Wikicode` objects instead of just a single string or + :py:class:`.Wikicode`. +- Given the frequency of issues with the (admittedly insufficient) tag parser, + there's a temporary *skip_style_tags* argument to + :py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues + are corrected. +- Fixed a parser bug involving nested wikilinks and external links. +- C code cleanup and speed improvements. + v0.3.2 ------ diff --git a/docs/conf.py b/docs/conf.py index 9fa1e02..dd1d6e1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012, 2013 Ben Kurtovic' +copyright = u'2012, 2013, 2014 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 6569d96..469e9a6 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,10 +29,10 @@ outrageously powerful parser for `MediaWiki `_ wikicode. from __future__ import unicode_literals __author__ = "Ben Kurtovic" -__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" +__copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.3.2" -__email__ = "ben.kurtovic@verizon.net" +__version__ = "0.3.3" +__email__ = "ben.kurtovic@gmail.com" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, utils, wikicode) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index a142128..4384ace 100644 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -10,18 +10,21 @@ types are meant to be imported directly from within the parser's modules. import sys -py3k = sys.version_info.major == 3 -py32 = py3k and sys.version_info.minor == 2 +py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) +py3k = (sys.version_info[0] == 3) +py32 = py3k and (sys.version_info[1] == 2) if py3k: bytes = bytes str = str + range = range maxsize = sys.maxsize import html.entities as htmlentities else: bytes = str str = unicode + range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 9449bcb..6020ad1 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index ba97b3f..223cc67 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -42,21 +42,21 @@ class Node(StringMixIn): :py:meth:`__unicode__` must be overridden. It should return a ``unicode`` or (``str`` in py3k) representation of the node. If the node contains - :py:class:`~.Wikicode` objects inside of it, :py:meth:`__iternodes__` - should be overridden to yield tuples of (``wikicode``, - ``node_in_wikicode``) for each node in each wikicode, as well as the node - itself (``None``, ``self``). If the node is printable, :py:meth:`__strip__` - should be overridden to return the printable version of the node - it does - not have to be a string, but something that can be converted to a string - with ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build - a nice tree representation of the node, if desired, for + :py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__` + should be a generator that iterates over them. If the node is printable + (shown when the page is rendered), :py:meth:`__strip__` should return its + printable version, stripping out any formatting marks. It does not have to + return a string, but something that can be converted to a string with + ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a + nice tree representation of the node, if desired, for :py:meth:`~.Wikicode.get_tree`. """ def __unicode__(self): raise NotImplementedError() - def __iternodes__(self, getter): - yield None, self + def __children__(self): + return # Funny generator-that-yields-nothing syntax + yield def __strip__(self, normalize, collapse): return None diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index d7db92a..a595dfb 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -42,13 +42,10 @@ class Argument(Node): return start + "|" + str(self.default) + "}}}" return start + "}}}" - def __iternodes__(self, getter): - yield None, self - for child in getter(self.name): - yield self.name, child + def __children__(self): + yield self.name if self.default is not None: - for child in getter(self.default): - yield self.default, child + yield self.default def __strip__(self, normalize, collapse): if self.default is not None: diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index e96ce38..fcfd946 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index d74f6b3..d13376e 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -44,13 +44,10 @@ class ExternalLink(Node): return "[" + str(self.url) + "]" return str(self.url) - def __iternodes__(self, getter): - yield None, self - for child in getter(self.url): - yield self.url, child + def __children__(self): + yield self.url if self.title is not None: - for child in getter(self.title): - yield self.title, child + yield self.title def __strip__(self, normalize, collapse): if self.brackets: diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index e860f01..a131269 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 8f7f453..4b7c668 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index c1c10a0..e273af9 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index f001234..47c23a8 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -39,10 +39,8 @@ class Heading(Node): def __unicode__(self): return ("=" * self.level) + str(self.title) + ("=" * self.level) - def __iternodes__(self, getter): - yield None, self - for child in getter(self.title): - yield self.title, child + def __children__(self): + yield self.title def __strip__(self, normalize, collapse): return self.title.strip_code(normalize, collapse) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index b51bd92..c75cb99 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 06f43d0..661304e 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -70,23 +70,17 @@ class Tag(Node): result += "" return result - def __iternodes__(self, getter): - yield None, self + def __children__(self): if not self.wiki_markup: - for child in getter(self.tag): - yield self.tag, child + yield self.tag for attr in self.attributes: - for child in getter(attr.name): - yield attr.name, child - if attr.value: - for child in getter(attr.value): - yield attr.value, child + yield attr.name + if attr.value is not None: + yield attr.value if self.contents: - for child in getter(self.contents): - yield self.contents, child + yield self.contents if not self.self_closing and not self.wiki_markup and self.closing_tag: - for child in getter(self.closing_tag): - yield self.closing_tag, child + yield self.closing_tag def __strip__(self, normalize, collapse): if self.contents and is_visible(self.tag): diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 1b4e3fa..d1a0b0e 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,7 +26,7 @@ import re from . import HTMLEntity, Node, Text from .extras import Parameter -from ..compat import str +from ..compat import range, str from ..utils import parse_anything __all__ = ["Template"] @@ -51,16 +51,12 @@ class Template(Node): else: return "{{" + str(self.name) + "}}" - def __iternodes__(self, getter): - yield None, self - for child in getter(self.name): - yield self.name, child + def __children__(self): + yield self.name for param in self.params: if param.showkey: - for child in getter(param.name): - yield param.name, child - for child in getter(param.value): - yield param.value, child + yield param.name + yield param.value def __showtree__(self, write, get, mark): write("{{") @@ -174,7 +170,7 @@ class Template(Node): def name(self, value): self._name = parse_anything(value) - def has(self, name, ignore_empty=True): + def has(self, name, ignore_empty=False): """Return ``True`` if any parameter in the template is named *name*. With *ignore_empty*, ``False`` will be returned even if the template @@ -190,7 +186,7 @@ class Template(Node): return True return False - has_param = lambda self, name, ignore_empty=True: \ + has_param = lambda self, name, ignore_empty=False: \ self.has(name, ignore_empty) has_param.__doc__ = "Alias for :py:meth:`has`." diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 6fda3da..55c714e 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 527e9bb..4640f34 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -41,13 +41,10 @@ class Wikilink(Node): return "[[" + str(self.title) + "|" + str(self.text) + "]]" return "[[" + str(self.title) + "]]" - def __iternodes__(self, getter): - yield None, self - for child in getter(self.title): - yield self.title, child + def __children__(self): + yield self.title if self.text is not None: - for child in getter(self.text): - yield self.text, child + yield self.text def __strip__(self, normalize, collapse): if self.text is not None: diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 22c3dc2..093e501 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -53,8 +53,12 @@ class Parser(object): self._tokenizer = Tokenizer() self._builder = Builder() - def parse(self, text, context=0): - """Parse *text*, returning a :py:class:`~.Wikicode` object tree.""" - tokens = self._tokenizer.tokenize(text, context) + def parse(self, text, context=0, skip_style_tags=False): + """Parse *text*, returning a :py:class:`~.Wikicode` object tree. + + If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be + parsed, but instead be treated as plain text. + """ + tokens = self._tokenizer.tokenize(text, context, skip_style_tags) code = self._builder.build(tokens) return code diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index d31f450..5f8ce45 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 33da8f7..28023b5 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -55,7 +55,6 @@ Local (stack-specific) contexts: * :py:const:`EXT_LINK_URI` * :py:const:`EXT_LINK_TITLE` - * :py:const:`EXT_LINK_BRACKETS` * :py:const:`HEADING` @@ -100,7 +99,8 @@ Aggregate contexts: * :py:const:`FAIL` * :py:const:`UNSAFE` * :py:const:`DOUBLE` -* :py:const:`INVALID_LINK` +* :py:const:`NO_WIKILINKS` +* :py:const:`NO_EXT_LINKS` """ @@ -121,38 +121,37 @@ WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT EXT_LINK_URI = 1 << 7 EXT_LINK_TITLE = 1 << 8 -EXT_LINK_BRACKETS = 1 << 9 -EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS - -HEADING_LEVEL_1 = 1 << 10 -HEADING_LEVEL_2 = 1 << 11 -HEADING_LEVEL_3 = 1 << 12 -HEADING_LEVEL_4 = 1 << 13 -HEADING_LEVEL_5 = 1 << 14 -HEADING_LEVEL_6 = 1 << 15 +EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + +HEADING_LEVEL_1 = 1 << 9 +HEADING_LEVEL_2 = 1 << 10 +HEADING_LEVEL_3 = 1 << 11 +HEADING_LEVEL_4 = 1 << 12 +HEADING_LEVEL_5 = 1 << 13 +HEADING_LEVEL_6 = 1 << 14 HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) -TAG_OPEN = 1 << 16 -TAG_ATTR = 1 << 17 -TAG_BODY = 1 << 18 -TAG_CLOSE = 1 << 19 +TAG_OPEN = 1 << 15 +TAG_ATTR = 1 << 16 +TAG_BODY = 1 << 17 +TAG_CLOSE = 1 << 18 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE -STYLE_ITALICS = 1 << 20 -STYLE_BOLD = 1 << 21 -STYLE_PASS_AGAIN = 1 << 22 -STYLE_SECOND_PASS = 1 << 23 +STYLE_ITALICS = 1 << 19 +STYLE_BOLD = 1 << 20 +STYLE_PASS_AGAIN = 1 << 21 +STYLE_SECOND_PASS = 1 << 22 STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS -DL_TERM = 1 << 24 +DL_TERM = 1 << 23 -HAS_TEXT = 1 << 25 -FAIL_ON_TEXT = 1 << 26 -FAIL_NEXT = 1 << 27 -FAIL_ON_LBRACE = 1 << 28 -FAIL_ON_RBRACE = 1 << 29 -FAIL_ON_EQUALS = 1 << 30 +HAS_TEXT = 1 << 24 +FAIL_ON_TEXT = 1 << 25 +FAIL_NEXT = 1 << 26 +FAIL_ON_LBRACE = 1 << 27 +FAIL_ON_RBRACE = 1 << 28 +FAIL_ON_EQUALS = 1 << 29 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + FAIL_ON_RBRACE + FAIL_ON_EQUALS) @@ -163,7 +162,8 @@ GL_HEADING = 1 << 0 # Aggregate contexts: FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE -UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY + - ARGUMENT_NAME + TAG_CLOSE) +UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + + TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE -INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK +NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI +NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 609a595..de58e72 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2014 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -31,7 +31,7 @@ static int is_marker(Py_UNICODE this) int i; for (i = 0; i < NUM_MARKERS; i++) { - if (*MARKERS[i] == this) + if (MARKERS[i] == this) return 1; } return 0; @@ -440,7 +440,7 @@ static int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) { Textbuffer *original = buffer; - int i; + long i; if (reverse) { do { @@ -642,7 +642,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) PyObject *tokenlist; self->head += 2; - while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { + while (Tokenizer_READ(self, 0) == '{' && braces < MAX_BRACES) { self->head++; braces++; } @@ -674,8 +674,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) if (BAD_ROUTE) { char text[MAX_BRACES + 1]; RESET_ROUTE(); - for (i = 0; i < braces; i++) text[i] = *"{"; - text[braces] = *""; + for (i = 0; i < braces; i++) text[i] = '{'; + text[braces] = '\0'; if (Tokenizer_emit_text_then_stack(self, text)) { Py_XDECREF(text); return -1; @@ -872,7 +872,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) if (Tokenizer_push(self, LC_EXT_LINK_URI)) return -1; - if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") { + if (Tokenizer_READ(self, 0) == '/' && Tokenizer_READ(self, 1) == '/') { if (Tokenizer_emit_text(self, "//")) return -1; self->head += 2; @@ -881,7 +881,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) buffer = Textbuffer_new(); if (!buffer) return -1; - while ((this = Tokenizer_READ(self, 0)) != *"") { + while ((this = Tokenizer_READ(self, 0))) { i = 0; while (1) { if (!valid[i]) @@ -898,18 +898,18 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) self->head++; } end_of_loop: - if (this != *":") { + if (this != ':') { Textbuffer_dealloc(buffer); Tokenizer_fail_route(self); return 0; } - if (Tokenizer_emit_char(self, *":")) { + if (Tokenizer_emit_char(self, ':')) { Textbuffer_dealloc(buffer); return -1; } self->head++; - slashes = (Tokenizer_READ(self, 0) == *"/" && - Tokenizer_READ(self, 1) == *"/"); + slashes = (Tokenizer_READ(self, 0) == '/' && + Tokenizer_READ(self, 1) == '/'); if (slashes) { if (Tokenizer_emit_text(self, "//")) { Textbuffer_dealloc(buffer); @@ -940,7 +940,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer; PyObject *scheme; Py_UNICODE chunk; - int slashes, i, j; + long i; + int slashes, j; if (!scheme_buffer) return -1; @@ -973,8 +974,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Textbuffer_dealloc(scheme_buffer); return -1; } - slashes = (Tokenizer_READ(self, 0) == *"/" && - Tokenizer_READ(self, 1) == *"/"); + slashes = (Tokenizer_READ(self, 0) == '/' && + Tokenizer_READ(self, 1) == '/'); if (!IS_SCHEME(scheme, slashes, 1)) { Py_DECREF(scheme); Textbuffer_dealloc(scheme_buffer); @@ -988,7 +989,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) } if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1)) return -1; - if (Tokenizer_emit_char(self, *":")) + if (Tokenizer_emit_char(self, ':')) return -1; if (slashes) { if (Tokenizer_emit_text(self, "//")) @@ -1014,13 +1015,13 @@ Tokenizer_handle_free_link_text(Tokenizer* self, int* parens, return error; \ } - if (this == *"(" && !(*parens)) { + if (this == '(' && !(*parens)) { *parens = 1; PUSH_TAIL_BUFFER(*tail, -1) } - else if (this == *"," || this == *";" || this == *"\\" || this == *"." || - this == *":" || this == *"!" || this == *"?" || - (!(*parens) && this == *")")) + else if (this == ',' || this == ';' || this == '\\' || this == '.' || + this == ':' || this == '!' || this == '?' || + (!(*parens) && this == ')')) return Textbuffer_write(tail, this); else PUSH_TAIL_BUFFER(*tail, -1) @@ -1037,12 +1038,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) Py_UNICODE after = Tokenizer_READ(self, 2); int ctx = self->topstack->context; - return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || - this == *"<" || this == *">" || (this == *"'" && next == *"'") || - (this == *"|" && ctx & LC_TEMPLATE) || - (this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || - (this == *"}" && next == *"}" && - (ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); + return (!this || this == '\n' || this == '[' || this == ']' || + this == '<' || this == '>' || (this == '\'' && next == '\'') || + (this == '|' && ctx & LC_TEMPLATE) || + (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || + (this == '}' && next == '}' && + (ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); } /* @@ -1061,21 +1062,21 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, if (BAD_ROUTE) return NULL; this = Tokenizer_READ(self, 0); - if (this == *"" || this == *"\n" || this == *" " || this == *"]") + if (!this || this == '\n' || this == ' ' || this == ']') return Tokenizer_fail_route(self); - if (!brackets && this == *"[") + if (!brackets && this == '[') return Tokenizer_fail_route(self); while (1) { this = Tokenizer_READ(self, 0); next = Tokenizer_READ(self, 1); - if (this == *"&") { + if (this == '&') { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_entity(self)) return NULL; } - else if (this == *"<" && next == *"!" - && Tokenizer_READ(self, 2) == *"-" - && Tokenizer_READ(self, 3) == *"-") { + else if (this == '<' && next == '!' + && Tokenizer_READ(self, 2) == '-' + && Tokenizer_READ(self, 3) == '-') { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_comment(self)) return NULL; @@ -1084,16 +1085,16 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, self->head--; return Tokenizer_pop(self); } - else if (this == *"" || this == *"\n") + else if (!this || this == '\n') return Tokenizer_fail_route(self); - else if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) { + else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_template_or_argument(self)) return NULL; } - else if (this == *"]") + else if (this == ']') return Tokenizer_pop(self); - else if (this == *" ") { + else if (this == ' ') { if (brackets) { if (Tokenizer_emit(self, ExternalLinkSeparator)) return NULL; @@ -1102,7 +1103,7 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, self->head++; return Tokenizer_parse(self, 0, 0); } - if (Textbuffer_write(extra, *" ")) + if (Textbuffer_write(extra, ' ')) return NULL; return Tokenizer_pop(self); } @@ -1157,7 +1158,7 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) */ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) { - #define INVALID_CONTEXT self->topstack->context & AGG_INVALID_LINK + #define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS #define NOT_A_LINK \ if (!brackets && self->topstack->context & LC_DLTERM) \ return Tokenizer_handle_dl_term(self); \ @@ -1232,7 +1233,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) self->global |= GL_HEADING; self->head += 1; - while (Tokenizer_READ(self, 0) == *"=") { + while (Tokenizer_READ(self, 0) == '=') { best++; self->head++; } @@ -1242,7 +1243,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) RESET_ROUTE(); self->head = reset + best - 1; for (i = 0; i < best; i++) { - if (Tokenizer_emit_char(self, *"=")) + if (Tokenizer_emit_char(self, '=')) return -1; } self->global ^= GL_HEADING; @@ -1271,7 +1272,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) if (heading->level < best) { diff = best - heading->level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_char(self, *"=")) { + if (Tokenizer_emit_char(self, '=')) { Py_DECREF(heading->title); free(heading); return -1; @@ -1296,14 +1297,14 @@ static int Tokenizer_parse_heading(Tokenizer* self) */ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) { - Py_ssize_t reset = self->head, best; - int i, current, level, diff; + Py_ssize_t reset = self->head; + int best, i, current, level, diff; HeadingData *after, *heading; PyObject *stack; self->head += 1; best = 1; - while (Tokenizer_READ(self, 0) == *"=") { + while (Tokenizer_READ(self, 0) == '=') { best++; self->head++; } @@ -1316,7 +1317,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) if (level < best) { diff = best - level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_char(self, *"=")) + if (Tokenizer_emit_char(self, '=')) return NULL; } } @@ -1324,7 +1325,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) } else { for (i = 0; i < best; i++) { - if (Tokenizer_emit_char(self, *"=")) { + if (Tokenizer_emit_char(self, '=')) { Py_DECREF(after->title); free(after); return NULL; @@ -1372,21 +1373,21 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) return -1; self->head++; this = Tokenizer_READ(self, 0); - if (this == *"") { + if (!this) { Tokenizer_fail_route(self); return 0; } - if (this == *"#") { + if (this == '#') { numeric = 1; if (Tokenizer_emit(self, HTMLEntityNumeric)) return -1; self->head++; this = Tokenizer_READ(self, 0); - if (this == *"") { + if (!this) { Tokenizer_fail_route(self); return 0; } - if (this == *"x" || this == *"X") { + if (this == 'x' || this == 'X') { hexadecimal = 1; kwargs = PyDict_New(); if (!kwargs) @@ -1416,22 +1417,20 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) zeroes = 0; while (1) { this = Tokenizer_READ(self, 0); - if (this == *";") { + if (this == ';') { if (i == 0) FAIL_ROUTE_AND_EXIT() break; } - if (i == 0 && this == *"0") { + if (i == 0 && this == '0') { zeroes++; self->head++; continue; } if (i >= MAX_ENTITY_SIZE) FAIL_ROUTE_AND_EXIT() - for (j = 0; j < NUM_MARKERS; j++) { - if (this == *MARKERS[j]) - FAIL_ROUTE_AND_EXIT() - } + if (is_marker(this)) + FAIL_ROUTE_AND_EXIT() j = 0; while (1) { if (!valid[j]) @@ -1508,7 +1507,7 @@ static int Tokenizer_parse_entity(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - if (Tokenizer_emit_char(self, *"&")) + if (Tokenizer_emit_char(self, '&')) return -1; return 0; } @@ -1537,14 +1536,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) return -1; while (1) { this = Tokenizer_READ(self, 0); - if (this == *"") { + if (!this) { comment = Tokenizer_pop(self); Py_XDECREF(comment); self->head = reset; return Tokenizer_emit_text(self, "", str(node)) - def test_iternodes(self): - """test Comment.__iternodes__()""" + def test_children(self): + """test Comment.__children__()""" node = Comment("foobar") - gen = node.__iternodes__(None) - self.assertEqual((None, node), next(gen)) + gen = node.__children__() self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 2374516..52427e3 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest try: from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 6d066bd..c873f0e 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,11 @@ from __future__ import print_function, unicode_literals import json -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest import mwparserfromhell from mwparserfromhell.compat import py3k, str diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 13a82bf..c81470e 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestExternalLink(TreeEqualityTestCase): """Test cases for the ExternalLink node.""" @@ -43,21 +47,16 @@ class TestExternalLink(TreeEqualityTestCase): wraptext("Example Web Page")) self.assertEqual("[http://example.com/ Example Web Page]", str(node4)) - def test_iternodes(self): - """test ExternalLink.__iternodes__()""" - node1n1 = Text("http://example.com/") - node2n1 = Text("http://example.com/") - node2n2, node2n3 = Text("Example"), Text("Page") - node1 = ExternalLink(wrap([node1n1]), brackets=False) - node2 = ExternalLink(wrap([node2n1]), wrap([node2n2, node2n3])) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.url, node1n1), next(gen1)) - self.assertEqual((node2.url, node2n1), next(gen2)) - self.assertEqual((node2.title, node2n2), next(gen2)) - self.assertEqual((node2.title, node2n3), next(gen2)) + def test_children(self): + """test ExternalLink.__children__()""" + node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) + node2 = ExternalLink(wraptext("http://example.com/"), + wrap([Text("Example"), Text("Page")])) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertEqual(node1.url, next(gen1)) + self.assertEqual(node2.url, next(gen2)) + self.assertEqual(node2.title, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) diff --git a/tests/test_heading.py b/tests/test_heading.py index 7a65872..7c7a7ee 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" @@ -38,14 +42,11 @@ class TestHeading(TreeEqualityTestCase): node2 = Heading(wraptext(" zzz "), 5) self.assertEqual("===== zzz =====", str(node2)) - def test_iternodes(self): - """test Heading.__iternodes__()""" - text1, text2 = Text("foo"), Text("bar") - node = Heading(wrap([text1, text2]), 3) - gen = node.__iternodes__(getnodes) - self.assertEqual((None, node), next(gen)) - self.assertEqual((node.title, text1), next(gen)) - self.assertEqual((node.title, text2), next(gen)) + def test_children(self): + """test Heading.__children__()""" + node = Heading(wrap([Text("foo"), Text("bar")]), 3) + gen = node.__children__() + self.assertEqual(node.title, next(gen)) self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index d38e5ec..eb6f606 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity @@ -42,11 +46,10 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual("k", str(node3)) self.assertEqual("l", str(node4)) - def test_iternodes(self): - """test HTMLEntity.__iternodes__()""" + def test_children(self): + """test HTMLEntity.__children__()""" node = HTMLEntity("nbsp", named=True, hexadecimal=False) - gen = node.__iternodes__(None) - self.assertEqual((None, node), next(gen)) + gen = node.__children__() self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 4786e12..ee52b59 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text diff --git a/tests/test_parser.py b/tests/test_parser.py index 8760c0e..955f455 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,24 +21,30 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell import parser -from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.compat import range +from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext -from .compat import range class TestParser(TreeEqualityTestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" def test_use_c(self): """make sure the correct tokenizer is used""" + restore = parser.use_c if parser.use_c: self.assertTrue(parser.Parser()._tokenizer.USES_C) parser.use_c = False self.assertFalse(parser.Parser()._tokenizer.USES_C) + parser.use_c = restore def test_parsing(self): """integration test for parsing overall""" @@ -62,5 +68,26 @@ class TestParser(TreeEqualityTestCase): actual = parser.Parser().parse(text) self.assertWikicodeEqual(expected, actual) + def test_skip_style_tags(self): + """test Parser.parse(skip_style_tags=True)""" + def test(): + with_style = parser.Parser().parse(text, skip_style_tags=False) + without_style = parser.Parser().parse(text, skip_style_tags=True) + self.assertWikicodeEqual(a, with_style) + self.assertWikicodeEqual(b, without_style) + + text = "This is an example with ''italics''!" + a = wrap([Text("This is an example with "), + Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), + Text("!")]) + b = wraptext("This is an example with ''italics''!") + + restore = parser.use_c + if parser.use_c: + test() + parser.use_c = False + test() + parser.use_c = restore + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 0211e7f..40e2caf 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 25df555..b739d62 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,14 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest -from mwparserfromhell.compat import py3k -from mwparserfromhell.smart_list import SmartList, _ListProxy +try: + import unittest2 as unittest +except ImportError: + import unittest -from .compat import range +from mwparserfromhell.compat import py3k, range +from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 5ee857c..bc44f55 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,12 +23,14 @@ from __future__ import unicode_literals from sys import getdefaultencoding from types import GeneratorType -import unittest -from mwparserfromhell.compat import bytes, py3k, py32, str -from mwparserfromhell.string_mixin import StringMixIn +try: + import unittest2 as unittest +except ImportError: + import unittest -from .compat import range +from mwparserfromhell.compat import bytes, py3k, py32, range, str +from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): def __init__(self, data): @@ -59,8 +61,8 @@ class TestStringMixIn(unittest.TestCase): else: methods.append("decode") for meth in methods: - expected = getattr(str, meth).__doc__ - actual = getattr(StringMixIn, meth).__doc__ + expected = getattr("foo", meth).__doc__ + actual = getattr(_FakeString("foo"), meth).__doc__ self.assertEqual(expected, actual) def test_types(self): @@ -109,12 +111,12 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str1 < str4) self.assertTrue(str1 <= str4) - self.assertTrue(str1 > str5) - self.assertTrue(str1 >= str5) - self.assertFalse(str1 == str5) - self.assertTrue(str1 != str5) - self.assertFalse(str1 < str5) - self.assertFalse(str1 <= str5) + self.assertFalse(str5 > str1) + self.assertFalse(str5 >= str1) + self.assertFalse(str5 == str1) + self.assertTrue(str5 != str1) + self.assertTrue(str5 < str1) + self.assertTrue(str5 <= str1) def test_other_magics(self): """test other magically implemented features, like len() and iter()""" @@ -376,7 +378,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) - if py3k: + if py3k and not py32: actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) @@ -394,7 +396,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) - if py3k: + if py3k and not py32: actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) diff --git a/tests/test_tag.py b/tests/test_tag.py index 5ef92a5..111511a 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text from mwparserfromhell.nodes.extras import Attribute -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) @@ -64,37 +68,30 @@ class TestTag(TreeEqualityTestCase): self.assertEqual("----", str(node8)) self.assertEqual("''italics!''", str(node9)) - def test_iternodes(self): - """test Tag.__iternodes__()""" - node1n1, node1n2 = Text("ref"), Text("foobar") - node2n1, node3n1, node3n2 = Text("bold text"), Text("img"), Text("id") - node3n3, node3n4, node3n5 = Text("foo"), Text("class"), Text("bar") - + def test_children(self): + """test Tag.__children__()""" # foobar - node1 = Tag(wrap([node1n1]), wrap([node1n2])) + node1 = Tag(wraptext("ref"), wraptext("foobar")) # '''bold text''' - node2 = Tag(wraptext("b"), wrap([node2n1]), wiki_markup="'''") + node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") # - node3 = Tag(wrap([node3n1]), - attrs=[Attribute(wrap([node3n2]), wrap([node3n3])), - Attribute(wrap([node3n4]), wrap([node3n5]))], + node3 = Tag(wraptext("img"), + attrs=[Attribute(wraptext("id"), wraptext("foo")), + Attribute(wraptext("class"), wraptext("bar"))], self_closing=True, padding=" ") - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - gen3 = node3.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((None, node3), next(gen3)) - self.assertEqual((node1.tag, node1n1), next(gen1)) - self.assertEqual((node3.tag, node3n1), next(gen3)) - self.assertEqual((node3.attributes[0].name, node3n2), next(gen3)) - self.assertEqual((node3.attributes[0].value, node3n3), next(gen3)) - self.assertEqual((node3.attributes[1].name, node3n4), next(gen3)) - self.assertEqual((node3.attributes[1].value, node3n5), next(gen3)) - self.assertEqual((node1.contents, node1n2), next(gen1)) - self.assertEqual((node2.contents, node2n1), next(gen2)) - self.assertEqual((node1.closing_tag, node1n1), next(gen1)) + gen1 = node1.__children__() + gen2 = node2.__children__() + gen3 = node3.__children__() + self.assertEqual(node1.tag, next(gen1)) + self.assertEqual(node3.tag, next(gen3)) + self.assertEqual(node3.attributes[0].name, next(gen3)) + self.assertEqual(node3.attributes[0].value, next(gen3)) + self.assertEqual(node3.attributes[1].name, next(gen3)) + self.assertEqual(node3.attributes[1].value, next(gen3)) + self.assertEqual(node1.contents, next(gen1)) + self.assertEqual(node2.contents, next(gen2)) + self.assertEqual(node1.closing_tag, next(gen1)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) self.assertRaises(StopIteration, next, gen3) diff --git a/tests/test_template.py b/tests/test_template.py index 26a2e39..584b02f 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) @@ -42,27 +46,21 @@ class TestTemplate(TreeEqualityTestCase): [pgenh("1", "bar"), pgens("abc", "def")]) self.assertEqual("{{foo|bar|abc=def}}", str(node2)) - def test_iternodes(self): - """test Template.__iternodes__()""" - node1n1 = Text("foobar") - node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") - node2n4, node2n5 = Text("def"), Text("ghi") - node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) - node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), + def test_children(self): + """test Template.__children__()""" + node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) + node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), showkey=True) - node1 = Template(wrap([node1n1])) - node2 = Template(wrap([node2n1]), [node2p1, node2p2]) + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [node2p1, node2p2]) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.name, node1n1), next(gen1)) - self.assertEqual((node2.name, node2n1), next(gen2)) - self.assertEqual((node2.params[0].value, node2n2), next(gen2)) - self.assertEqual((node2.params[1].name, node2n3), next(gen2)) - self.assertEqual((node2.params[1].value, node2n4), next(gen2)) - self.assertEqual((node2.params[1].value, node2n5), next(gen2)) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertEqual(node1.name, next(gen1)) + self.assertEqual(node2.name, next(gen2)) + self.assertEqual(node2.params[0].value, next(gen2)) + self.assertEqual(node2.params[1].name, next(gen2)) + self.assertEqual(node2.params[1].value, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) @@ -123,15 +121,15 @@ class TestTemplate(TreeEqualityTestCase): node3 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) - self.assertFalse(node1.has("foobar")) - self.assertTrue(node2.has(1)) - self.assertTrue(node2.has("abc")) - self.assertFalse(node2.has("def")) - self.assertTrue(node3.has("1")) - self.assertTrue(node3.has(" b ")) - self.assertFalse(node4.has("b")) - self.assertTrue(node3.has("b", False)) + self.assertFalse(node1.has("foobar", False)) + self.assertTrue(node2.has(1, False)) + self.assertTrue(node2.has("abc", False)) + self.assertFalse(node2.has("def", False)) + self.assertTrue(node3.has("1", False)) + self.assertTrue(node3.has(" b ", False)) self.assertTrue(node4.has("b", False)) + self.assertTrue(node3.has("b", True)) + self.assertFalse(node4.has("b", True)) def test_get(self): """test Template.get()""" @@ -223,6 +221,7 @@ class TestTemplate(TreeEqualityTestCase): pgenh("1", "c"), pgenh("2", "d")]) node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("f", "g")]) + node41 = Template(wraptext("a"), [pgenh("1", "")]) node1.add("e", "f", showkey=True) node2.add(2, "g", showkey=False) @@ -266,6 +265,7 @@ class TestTemplate(TreeEqualityTestCase): node38.add("1", "e") node39.add("1", "e") node40.add("d", "h", before="b") + node41.add(1, "b") self.assertEqual("{{a|b=c|d|e=f}}", node1) self.assertEqual("{{a|b=c|d|g}}", node2) @@ -312,6 +312,7 @@ class TestTemplate(TreeEqualityTestCase): self.assertEqual("{{a|1=e|x=y|2=d}}", node38) self.assertEqual("{{a|x=y|e|d}}", node39) self.assertEqual("{{a|b=c|d=h|f=g}}", node40) + self.assertEqual("{{a|b}}", node41) def test_remove(self): """test Template.remove()""" diff --git a/tests/test_text.py b/tests/test_text.py index 35ac340..ee2e5c7 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text @@ -36,11 +40,10 @@ class TestText(unittest.TestCase): node2 = Text("fóóbar") self.assertEqual("fóóbar", str(node2)) - def test_iternodes(self): - """test Text.__iternodes__()""" + def test_children(self): + """test Text.__children__()""" node = Text("foobar") - gen = node.__iternodes__(None) - self.assertEqual((None, node), next(gen)) + gen = node.__children__() self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 2048bb9..3efce86 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens diff --git a/tests/test_utils.py b/tests/test_utils.py index 80a0e5e..ddcc078 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.nodes import Template, Text from mwparserfromhell.utils import parse_anything diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 14d801c..9ff5949 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,14 +24,18 @@ from __future__ import unicode_literals from functools import partial import re from types import GeneratorType -import unittest +try: + import unittest2 as unittest +except ImportError: + import unittest + +from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse -from mwparserfromhell.compat import py3k, str from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext @@ -242,6 +246,7 @@ class TestWikicode(TreeEqualityTestCase): """test Wikicode.matches()""" code1 = parse("Cleanup") code2 = parse("\nstub") + code3 = parse("") self.assertTrue(code1.matches("Cleanup")) self.assertTrue(code1.matches("cleanup")) self.assertTrue(code1.matches(" cleanup\n")) @@ -250,13 +255,22 @@ class TestWikicode(TreeEqualityTestCase): self.assertTrue(code2.matches("stub")) self.assertTrue(code2.matches("Stub")) self.assertFalse(code2.matches("StuB")) + self.assertTrue(code1.matches(("cleanup", "stub"))) + self.assertTrue(code2.matches(("cleanup", "stub"))) + self.assertFalse(code2.matches(("StuB", "sTUb", "foobar"))) + self.assertFalse(code2.matches(["StuB", "sTUb", "foobar"])) + self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub"))) + self.assertTrue(code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"])) + self.assertTrue(code3.matches("")) + self.assertTrue(code3.matches("")) + self.assertTrue(code3.matches(("a", "b", ""))) def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" def genlist(gen): self.assertIsInstance(gen, GeneratorType) return list(gen) - ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) + ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") for func in (code.filter, ifilter(code)): @@ -292,21 +306,27 @@ class TestWikicode(TreeEqualityTestCase): "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], func(recursive=True, forcetype=Template)) - code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") + code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") for func in (code3.filter, ifilter(code3)): - self.assertEqual(["{{foobar}}", "{{FOO}}"], func(recursive=False, matches=r"foo")) + self.assertEqual(["{{foobar}}", "{{barfoo}}"], + func(False, matches=lambda node: "foo" in node)) + self.assertEqual(["{{foobar}}", "{{FOO}}", "{{barfoo}}"], + func(False, matches=r"foo")) self.assertEqual(["{{foobar}}", "{{FOO}}"], - func(recursive=False, matches=r"^{{foo.*?}}")) + func(matches=r"^{{foo.*?}}")) self.assertEqual(["{{foobar}}"], - func(recursive=False, matches=r"^{{foo.*?}}", flags=re.UNICODE)) - self.assertEqual(["{{baz}}", "{{bz}}"], func(recursive=False, matches=r"^{{b.*?z")) - self.assertEqual(["{{baz}}"], func(recursive=False, matches=r"^{{b.+?z}}")) + func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) + self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) + self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], code2.filter_templates(recursive=False)) self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], code2.filter_templates(recursive=True)) + + self.assertEqual(["{{foobar}}"], code3.filter_templates( + matches=lambda node: node.name.matches("Foobar"))) self.assertEqual(["{{baz}}", "{{bz}}"], code3.filter_templates(matches=r"^{{b.*?z")) self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) @@ -335,35 +355,43 @@ class TestWikicode(TreeEqualityTestCase): p4_III = "== Section III ==\n" + p4_IIIA page4 = parse(p4_lead + p4_I + p4_II + p4_III) - self.assertEqual([], page1.get_sections()) + self.assertEqual([""], page1.get_sections()) self.assertEqual(["", "==Heading=="], page2.get_sections()) self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], page3.get_sections()) - self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, - p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, + p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], page4.get_sections()) self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], page3.get_sections(levels=(2, 3))) + self.assertEqual(["===Heading===\nFoo bar baz\n"], + page3.get_sections(levels=(2, 3), flat=True)) self.assertEqual([], page3.get_sections(levels=[0])) self.assertEqual(["", "====Gnidaeh====\n"], page3.get_sections(levels=[4], include_lead=True)) self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], page3.get_sections(include_lead=False)) + self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], + page3.get_sections(flat=True, include_lead=False)) self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) - self.assertEqual([""], page2.get_sections(include_headings=False)) + self.assertEqual([p4_IA, p4_IB, p4_IIIA], page4.get_sections(levels=[3])) + self.assertEqual([p4_IA, "=== Section I.B ===\n", + "=== Section III.A ===\nText.\n"], + page4.get_sections(levels=[3], flat=True)) + self.assertEqual(["", ""], page2.get_sections(include_headings=False)) self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", "\nEven more text.\n" + p4_IIIA2ai1], page4.get_sections(levels=[4], include_headings=False)) self.assertEqual([], page4.get_sections(matches=r"body")) - self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], + self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1], page4.get_sections(matches=r"Section\sI[.\s].*?")) - self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], page4.get_sections(matches=r".*?a.*?")) self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], page4.get_sections(matches=r".*?a.*?", flags=re.U)) @@ -371,6 +399,11 @@ class TestWikicode(TreeEqualityTestCase): page4.get_sections(matches=r".*?a.*?", flags=re.U, include_headings=False)) + sections = page2.get_sections(include_headings=False) + sections[0].append("Lead!\n") + sections[1].append("\nFirst section!") + self.assertEqual("Lead!\n==Heading==\nFirst section!", page2) + page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") section = page5.get_sections(matches="Foo")[0] section.replace("\nBar\n", "\nBarf ") diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 7851032..1bdc907 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" @@ -38,20 +42,15 @@ class TestWikilink(TreeEqualityTestCase): node2 = Wikilink(wraptext("foo"), wraptext("bar")) self.assertEqual("[[foo|bar]]", str(node2)) - def test_iternodes(self): - """test Wikilink.__iternodes__()""" - node1n1 = Text("foobar") - node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") - node1 = Wikilink(wrap([node1n1])) - node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.title, node1n1), next(gen1)) - self.assertEqual((node2.title, node2n1), next(gen2)) - self.assertEqual((node2.text, node2n2), next(gen2)) - self.assertEqual((node2.text, node2n3), next(gen2)) + def test_children(self): + """test Wikilink.__children__()""" + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wrap([Text("bar"), Text("baz")])) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertEqual(node1.title, next(gen1)) + self.assertEqual(node2.title, next(gen2)) + self.assertEqual(node2.text, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 37ef9f1..bf19f4d 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -150,3 +150,31 @@ name: comment_inside_bracketed_link label: an HTML comment inside a bracketed external link input: "[http://example.com/foobar]" output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] + +--- + +name: wikilink_inside_external_link +label: a wikilink inside an external link, which the parser considers valid (see issue #61) +input: "[http://example.com/foo Foo [[Bar]]]" +output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), ExternalLinkSeparator(), Text(text="Foo "), WikilinkOpen(), Text(text="Bar"), WikilinkClose(), ExternalLinkClose()] + +--- + +name: external_link_inside_wikilink +label: an external link inside a wikilink, valid in the case of images (see issue #62) +input: "[[File:Example.png|thumb|http://example.com]]" +output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), WikilinkClose()] + +--- + +name: external_link_inside_wikilink_brackets +label: an external link with brackets inside a wikilink +input: "[[File:Example.png|thumb|[http://example.com Example]]]" +output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose(), WikilinkClose()] + +--- + +name: external_link_inside_wikilink_title +label: an external link inside a wikilink title, which is invalid +input: "[[File:Example.png http://example.com]]" +output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest index 8eb381a..ce0ec79 100644 --- a/tests/tokenizer/wikilinks.mwtest +++ b/tests/tokenizer/wikilinks.mwtest @@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b --- +name: nested +label: a wikilink nested within another +input: "[[foo|[[bar]]]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] + +--- + +name: nested_padding +label: a wikilink nested within another, separated by other data +input: "[[foo|a[[b]]c]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] + +--- + name: invalid_newline label: invalid wikilink: newline as only content input: "[[\n]]" @@ -103,27 +117,13 @@ output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), --- -name: invalid_nested_text -label: invalid wikilink: a wikilink nested within the value of another +name: invalid_nested_no_close +label: invalid wikilink: a wikilink nested within the value of another, missing a pair of closing brackets input: "[[foo|[[bar]]" output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()] --- -name: invalid_nested_text_2 -label: invalid wikilink: a wikilink nested within the value of another, two pairs of closing brackets -input: "[[foo|[[bar]]]]" -output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] - ---- - -name: invalid_nested_text_padding -label: invalid wikilink: a wikilink nested within the value of another, separated by other data -input: "[[foo|a[[b]]c]]" -output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")] - ---- - name: incomplete_open_only label: incomplete wikilinks: just an open input: "[["