@@ -1,5 +1,6 @@ | |||
language: python | |||
python: | |||
- "2.6" | |||
- "2.7" | |||
- "3.2" | |||
- "3.3" | |||
@@ -1,3 +1,21 @@ | |||
v0.3.3 (released April 22, 2014): | |||
- Added support for Python 2.6 and 3.4. | |||
- Template.has() is now passed 'ignore_empty=False' by default instead of True. | |||
This fixes a bug when adding parameters to templates with empty fields, and | |||
is a breaking change if you rely on the default behavior. | |||
- The 'matches' argument of Wikicode's filter methods now accepts a function | |||
(taking one argument, a Node, and returning a bool) in addition to a regex. | |||
- Re-added 'flat' argument to Wikicode.get_sections(), fixed the order in which | |||
it returns sections, and made it faster. | |||
- Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects | |||
instead of just a single string or Wikicode. | |||
- Given the frequency of issues with the (admittedly insufficient) tag parser, | |||
there's a temporary skip_style_tags argument to parse() that ignores '' and | |||
''' until these issues are corrected. | |||
- Fixed a parser bug involving nested wikilinks and external links. | |||
- C code cleanup and speed improvements. | |||
v0.3.2 (released September 1, 2013): | |||
- Added support for Python 3.2 (along with current support for 3.3 and 2.7). | |||
@@ -1,4 +1,4 @@ | |||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,32 @@ | |||
Changelog | |||
========= | |||
v0.3.3 | |||
------ | |||
`Released April 22, 2014 <https://github.com/earwig/mwparserfromhell/tree/v0.3.3>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.2...v0.3.3>`__): | |||
- Added support for Python 2.6 and 3.4. | |||
- :py:meth:`.Template.has` is now passed *ignore_empty=False* by default | |||
instead of *True*. This fixes a bug when adding parameters to templates with | |||
empty fields, **and is a breaking change if you rely on the default | |||
behavior.** | |||
- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` | |||
:py:meth:`.filter` methods now accepts a function (taking one argument, a | |||
:py:class:`.Node`, and returning a bool) in addition to a regex. | |||
- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the | |||
order in which it returns sections, and made it faster. | |||
- :py:meth:`.Wikicode.matches` now accepts a tuple or list of | |||
strings/:py:class:`.Wikicode` objects instead of just a single string or | |||
:py:class:`.Wikicode`. | |||
- Given the frequency of issues with the (admittedly insufficient) tag parser, | |||
there's a temporary *skip_style_tags* argument to | |||
:py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues | |||
are corrected. | |||
- Fixed a parser bug involving nested wikilinks and external links. | |||
- C code cleanup and speed improvements. | |||
v0.3.2 | |||
------ | |||
@@ -42,7 +42,7 @@ master_doc = 'index' | |||
# General information about the project. | |||
project = u'mwparserfromhell' | |||
copyright = u'2012, 2013 Ben Kurtovic' | |||
copyright = u'2012, 2013, 2014 Ben Kurtovic' | |||
# The version info for the project you're documenting, acts as replacement for | |||
# |version| and |release|, also used in various other places throughout the | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -29,10 +29,10 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||
from __future__ import unicode_literals | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.3.2" | |||
__email__ = "ben.kurtovic@verizon.net" | |||
__version__ = "0.3.3" | |||
__email__ = "ben.kurtovic@gmail.com" | |||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | |||
utils, wikicode) | |||
@@ -10,18 +10,21 @@ types are meant to be imported directly from within the parser's modules. | |||
import sys | |||
py3k = sys.version_info.major == 3 | |||
py32 = py3k and sys.version_info.minor == 2 | |||
py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) | |||
py3k = (sys.version_info[0] == 3) | |||
py32 = py3k and (sys.version_info[1] == 2) | |||
if py3k: | |||
bytes = bytes | |||
str = str | |||
range = range | |||
maxsize = sys.maxsize | |||
import html.entities as htmlentities | |||
else: | |||
bytes = str | |||
str = unicode | |||
range = xrange | |||
maxsize = sys.maxint | |||
import htmlentitydefs as htmlentities | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -42,21 +42,21 @@ class Node(StringMixIn): | |||
:py:meth:`__unicode__` must be overridden. It should return a ``unicode`` | |||
or (``str`` in py3k) representation of the node. If the node contains | |||
:py:class:`~.Wikicode` objects inside of it, :py:meth:`__iternodes__` | |||
should be overridden to yield tuples of (``wikicode``, | |||
``node_in_wikicode``) for each node in each wikicode, as well as the node | |||
itself (``None``, ``self``). If the node is printable, :py:meth:`__strip__` | |||
should be overridden to return the printable version of the node - it does | |||
not have to be a string, but something that can be converted to a string | |||
with ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build | |||
a nice tree representation of the node, if desired, for | |||
:py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__` | |||
should be a generator that iterates over them. If the node is printable | |||
(shown when the page is rendered), :py:meth:`__strip__` should return its | |||
printable version, stripping out any formatting marks. It does not have to | |||
return a string, but something that can be converted to a string with | |||
``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a | |||
nice tree representation of the node, if desired, for | |||
:py:meth:`~.Wikicode.get_tree`. | |||
""" | |||
def __unicode__(self): | |||
raise NotImplementedError() | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
def __children__(self): | |||
return # Funny generator-that-yields-nothing syntax | |||
yield | |||
def __strip__(self, normalize, collapse): | |||
return None | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -42,13 +42,10 @@ class Argument(Node): | |||
return start + "|" + str(self.default) + "}}}" | |||
return start + "}}}" | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.name): | |||
yield self.name, child | |||
def __children__(self): | |||
yield self.name | |||
if self.default is not None: | |||
for child in getter(self.default): | |||
yield self.default, child | |||
yield self.default | |||
def __strip__(self, normalize, collapse): | |||
if self.default is not None: | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -44,13 +44,10 @@ class ExternalLink(Node): | |||
return "[" + str(self.url) + "]" | |||
return str(self.url) | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.url): | |||
yield self.url, child | |||
def __children__(self): | |||
yield self.url | |||
if self.title is not None: | |||
for child in getter(self.title): | |||
yield self.title, child | |||
yield self.title | |||
def __strip__(self, normalize, collapse): | |||
if self.brackets: | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -39,10 +39,8 @@ class Heading(Node): | |||
def __unicode__(self): | |||
return ("=" * self.level) + str(self.title) + ("=" * self.level) | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.title): | |||
yield self.title, child | |||
def __children__(self): | |||
yield self.title | |||
def __strip__(self, normalize, collapse): | |||
return self.title.strip_code(normalize, collapse) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -70,23 +70,17 @@ class Tag(Node): | |||
result += "</" + str(self.closing_tag) + ">" | |||
return result | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
def __children__(self): | |||
if not self.wiki_markup: | |||
for child in getter(self.tag): | |||
yield self.tag, child | |||
yield self.tag | |||
for attr in self.attributes: | |||
for child in getter(attr.name): | |||
yield attr.name, child | |||
if attr.value: | |||
for child in getter(attr.value): | |||
yield attr.value, child | |||
yield attr.name | |||
if attr.value is not None: | |||
yield attr.value | |||
if self.contents: | |||
for child in getter(self.contents): | |||
yield self.contents, child | |||
yield self.contents | |||
if not self.self_closing and not self.wiki_markup and self.closing_tag: | |||
for child in getter(self.closing_tag): | |||
yield self.closing_tag, child | |||
yield self.closing_tag | |||
def __strip__(self, normalize, collapse): | |||
if self.contents and is_visible(self.tag): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -26,7 +26,7 @@ import re | |||
from . import HTMLEntity, Node, Text | |||
from .extras import Parameter | |||
from ..compat import str | |||
from ..compat import range, str | |||
from ..utils import parse_anything | |||
__all__ = ["Template"] | |||
@@ -51,16 +51,12 @@ class Template(Node): | |||
else: | |||
return "{{" + str(self.name) + "}}" | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.name): | |||
yield self.name, child | |||
def __children__(self): | |||
yield self.name | |||
for param in self.params: | |||
if param.showkey: | |||
for child in getter(param.name): | |||
yield param.name, child | |||
for child in getter(param.value): | |||
yield param.value, child | |||
yield param.name | |||
yield param.value | |||
def __showtree__(self, write, get, mark): | |||
write("{{") | |||
@@ -174,7 +170,7 @@ class Template(Node): | |||
def name(self, value): | |||
self._name = parse_anything(value) | |||
def has(self, name, ignore_empty=True): | |||
def has(self, name, ignore_empty=False): | |||
"""Return ``True`` if any parameter in the template is named *name*. | |||
With *ignore_empty*, ``False`` will be returned even if the template | |||
@@ -190,7 +186,7 @@ class Template(Node): | |||
return True | |||
return False | |||
has_param = lambda self, name, ignore_empty=True: \ | |||
has_param = lambda self, name, ignore_empty=False: \ | |||
self.has(name, ignore_empty) | |||
has_param.__doc__ = "Alias for :py:meth:`has`." | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -41,13 +41,10 @@ class Wikilink(Node): | |||
return "[[" + str(self.title) + "|" + str(self.text) + "]]" | |||
return "[[" + str(self.title) + "]]" | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.title): | |||
yield self.title, child | |||
def __children__(self): | |||
yield self.title | |||
if self.text is not None: | |||
for child in getter(self.text): | |||
yield self.text, child | |||
yield self.text | |||
def __strip__(self, normalize, collapse): | |||
if self.text is not None: | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -53,8 +53,12 @@ class Parser(object): | |||
self._tokenizer = Tokenizer() | |||
self._builder = Builder() | |||
def parse(self, text, context=0): | |||
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree.""" | |||
tokens = self._tokenizer.tokenize(text, context) | |||
def parse(self, text, context=0, skip_style_tags=False): | |||
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree. | |||
If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be | |||
parsed, but instead be treated as plain text. | |||
""" | |||
tokens = self._tokenizer.tokenize(text, context, skip_style_tags) | |||
code = self._builder.build(tokens) | |||
return code |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -55,7 +55,6 @@ Local (stack-specific) contexts: | |||
* :py:const:`EXT_LINK_URI` | |||
* :py:const:`EXT_LINK_TITLE` | |||
* :py:const:`EXT_LINK_BRACKETS` | |||
* :py:const:`HEADING` | |||
@@ -100,7 +99,8 @@ Aggregate contexts: | |||
* :py:const:`FAIL` | |||
* :py:const:`UNSAFE` | |||
* :py:const:`DOUBLE` | |||
* :py:const:`INVALID_LINK` | |||
* :py:const:`NO_WIKILINKS` | |||
* :py:const:`NO_EXT_LINKS` | |||
""" | |||
@@ -121,38 +121,37 @@ WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | |||
EXT_LINK_URI = 1 << 7 | |||
EXT_LINK_TITLE = 1 << 8 | |||
EXT_LINK_BRACKETS = 1 << 9 | |||
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS | |||
HEADING_LEVEL_1 = 1 << 10 | |||
HEADING_LEVEL_2 = 1 << 11 | |||
HEADING_LEVEL_3 = 1 << 12 | |||
HEADING_LEVEL_4 = 1 << 13 | |||
HEADING_LEVEL_5 = 1 << 14 | |||
HEADING_LEVEL_6 = 1 << 15 | |||
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE | |||
HEADING_LEVEL_1 = 1 << 9 | |||
HEADING_LEVEL_2 = 1 << 10 | |||
HEADING_LEVEL_3 = 1 << 11 | |||
HEADING_LEVEL_4 = 1 << 12 | |||
HEADING_LEVEL_5 = 1 << 13 | |||
HEADING_LEVEL_6 = 1 << 14 | |||
HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + | |||
HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) | |||
TAG_OPEN = 1 << 16 | |||
TAG_ATTR = 1 << 17 | |||
TAG_BODY = 1 << 18 | |||
TAG_CLOSE = 1 << 19 | |||
TAG_OPEN = 1 << 15 | |||
TAG_ATTR = 1 << 16 | |||
TAG_BODY = 1 << 17 | |||
TAG_CLOSE = 1 << 18 | |||
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | |||
STYLE_ITALICS = 1 << 20 | |||
STYLE_BOLD = 1 << 21 | |||
STYLE_PASS_AGAIN = 1 << 22 | |||
STYLE_SECOND_PASS = 1 << 23 | |||
STYLE_ITALICS = 1 << 19 | |||
STYLE_BOLD = 1 << 20 | |||
STYLE_PASS_AGAIN = 1 << 21 | |||
STYLE_SECOND_PASS = 1 << 22 | |||
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | |||
DL_TERM = 1 << 24 | |||
DL_TERM = 1 << 23 | |||
HAS_TEXT = 1 << 25 | |||
FAIL_ON_TEXT = 1 << 26 | |||
FAIL_NEXT = 1 << 27 | |||
FAIL_ON_LBRACE = 1 << 28 | |||
FAIL_ON_RBRACE = 1 << 29 | |||
FAIL_ON_EQUALS = 1 << 30 | |||
HAS_TEXT = 1 << 24 | |||
FAIL_ON_TEXT = 1 << 25 | |||
FAIL_NEXT = 1 << 26 | |||
FAIL_ON_LBRACE = 1 << 27 | |||
FAIL_ON_RBRACE = 1 << 28 | |||
FAIL_ON_EQUALS = 1 << 29 | |||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | |||
@@ -163,7 +162,8 @@ GL_HEADING = 1 << 0 | |||
# Aggregate contexts: | |||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE | |||
UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY + | |||
ARGUMENT_NAME + TAG_CLOSE) | |||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | |||
TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | |||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | |||
INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK | |||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | |||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK |
@@ -1,6 +1,6 @@ | |||
/* | |||
Tokenizer for MWParserFromHell | |||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
@@ -31,7 +31,7 @@ static int is_marker(Py_UNICODE this) | |||
int i; | |||
for (i = 0; i < NUM_MARKERS; i++) { | |||
if (*MARKERS[i] == this) | |||
if (MARKERS[i] == this) | |||
return 1; | |||
} | |||
return 0; | |||
@@ -440,7 +440,7 @@ static int | |||
Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) | |||
{ | |||
Textbuffer *original = buffer; | |||
int i; | |||
long i; | |||
if (reverse) { | |||
do { | |||
@@ -642,7 +642,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
PyObject *tokenlist; | |||
self->head += 2; | |||
while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { | |||
while (Tokenizer_READ(self, 0) == '{' && braces < MAX_BRACES) { | |||
self->head++; | |||
braces++; | |||
} | |||
@@ -674,8 +674,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
char text[MAX_BRACES + 1]; | |||
RESET_ROUTE(); | |||
for (i = 0; i < braces; i++) text[i] = *"{"; | |||
text[braces] = *""; | |||
for (i = 0; i < braces; i++) text[i] = '{'; | |||
text[braces] = '\0'; | |||
if (Tokenizer_emit_text_then_stack(self, text)) { | |||
Py_XDECREF(text); | |||
return -1; | |||
@@ -872,7 +872,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||
if (Tokenizer_push(self, LC_EXT_LINK_URI)) | |||
return -1; | |||
if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") { | |||
if (Tokenizer_READ(self, 0) == '/' && Tokenizer_READ(self, 1) == '/') { | |||
if (Tokenizer_emit_text(self, "//")) | |||
return -1; | |||
self->head += 2; | |||
@@ -881,7 +881,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||
buffer = Textbuffer_new(); | |||
if (!buffer) | |||
return -1; | |||
while ((this = Tokenizer_READ(self, 0)) != *"") { | |||
while ((this = Tokenizer_READ(self, 0))) { | |||
i = 0; | |||
while (1) { | |||
if (!valid[i]) | |||
@@ -898,18 +898,18 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||
self->head++; | |||
} | |||
end_of_loop: | |||
if (this != *":") { | |||
if (this != ':') { | |||
Textbuffer_dealloc(buffer); | |||
Tokenizer_fail_route(self); | |||
return 0; | |||
} | |||
if (Tokenizer_emit_char(self, *":")) { | |||
if (Tokenizer_emit_char(self, ':')) { | |||
Textbuffer_dealloc(buffer); | |||
return -1; | |||
} | |||
self->head++; | |||
slashes = (Tokenizer_READ(self, 0) == *"/" && | |||
Tokenizer_READ(self, 1) == *"/"); | |||
slashes = (Tokenizer_READ(self, 0) == '/' && | |||
Tokenizer_READ(self, 1) == '/'); | |||
if (slashes) { | |||
if (Tokenizer_emit_text(self, "//")) { | |||
Textbuffer_dealloc(buffer); | |||
@@ -940,7 +940,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||
Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer; | |||
PyObject *scheme; | |||
Py_UNICODE chunk; | |||
int slashes, i, j; | |||
long i; | |||
int slashes, j; | |||
if (!scheme_buffer) | |||
return -1; | |||
@@ -973,8 +974,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||
Textbuffer_dealloc(scheme_buffer); | |||
return -1; | |||
} | |||
slashes = (Tokenizer_READ(self, 0) == *"/" && | |||
Tokenizer_READ(self, 1) == *"/"); | |||
slashes = (Tokenizer_READ(self, 0) == '/' && | |||
Tokenizer_READ(self, 1) == '/'); | |||
if (!IS_SCHEME(scheme, slashes, 1)) { | |||
Py_DECREF(scheme); | |||
Textbuffer_dealloc(scheme_buffer); | |||
@@ -988,7 +989,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||
} | |||
if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1)) | |||
return -1; | |||
if (Tokenizer_emit_char(self, *":")) | |||
if (Tokenizer_emit_char(self, ':')) | |||
return -1; | |||
if (slashes) { | |||
if (Tokenizer_emit_text(self, "//")) | |||
@@ -1014,13 +1015,13 @@ Tokenizer_handle_free_link_text(Tokenizer* self, int* parens, | |||
return error; \ | |||
} | |||
if (this == *"(" && !(*parens)) { | |||
if (this == '(' && !(*parens)) { | |||
*parens = 1; | |||
PUSH_TAIL_BUFFER(*tail, -1) | |||
} | |||
else if (this == *"," || this == *";" || this == *"\\" || this == *"." || | |||
this == *":" || this == *"!" || this == *"?" || | |||
(!(*parens) && this == *")")) | |||
else if (this == ',' || this == ';' || this == '\\' || this == '.' || | |||
this == ':' || this == '!' || this == '?' || | |||
(!(*parens) && this == ')')) | |||
return Textbuffer_write(tail, this); | |||
else | |||
PUSH_TAIL_BUFFER(*tail, -1) | |||
@@ -1037,12 +1038,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) | |||
Py_UNICODE after = Tokenizer_READ(self, 2); | |||
int ctx = self->topstack->context; | |||
return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || | |||
this == *"<" || this == *">" || (this == *"'" && next == *"'") || | |||
(this == *"|" && ctx & LC_TEMPLATE) || | |||
(this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | |||
(this == *"}" && next == *"}" && | |||
(ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); | |||
return (!this || this == '\n' || this == '[' || this == ']' || | |||
this == '<' || this == '>' || (this == '\'' && next == '\'') || | |||
(this == '|' && ctx & LC_TEMPLATE) || | |||
(this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | |||
(this == '}' && next == '}' && | |||
(ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); | |||
} | |||
/* | |||
@@ -1061,21 +1062,21 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||
if (BAD_ROUTE) | |||
return NULL; | |||
this = Tokenizer_READ(self, 0); | |||
if (this == *"" || this == *"\n" || this == *" " || this == *"]") | |||
if (!this || this == '\n' || this == ' ' || this == ']') | |||
return Tokenizer_fail_route(self); | |||
if (!brackets && this == *"[") | |||
if (!brackets && this == '[') | |||
return Tokenizer_fail_route(self); | |||
while (1) { | |||
this = Tokenizer_READ(self, 0); | |||
next = Tokenizer_READ(self, 1); | |||
if (this == *"&") { | |||
if (this == '&') { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_entity(self)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && next == *"!" | |||
&& Tokenizer_READ(self, 2) == *"-" | |||
&& Tokenizer_READ(self, 3) == *"-") { | |||
else if (this == '<' && next == '!' | |||
&& Tokenizer_READ(self, 2) == '-' | |||
&& Tokenizer_READ(self, 3) == '-') { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_comment(self)) | |||
return NULL; | |||
@@ -1084,16 +1085,16 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||
self->head--; | |||
return Tokenizer_pop(self); | |||
} | |||
else if (this == *"" || this == *"\n") | |||
else if (!this || this == '\n') | |||
return Tokenizer_fail_route(self); | |||
else if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) { | |||
else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_template_or_argument(self)) | |||
return NULL; | |||
} | |||
else if (this == *"]") | |||
else if (this == ']') | |||
return Tokenizer_pop(self); | |||
else if (this == *" ") { | |||
else if (this == ' ') { | |||
if (brackets) { | |||
if (Tokenizer_emit(self, ExternalLinkSeparator)) | |||
return NULL; | |||
@@ -1102,7 +1103,7 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||
self->head++; | |||
return Tokenizer_parse(self, 0, 0); | |||
} | |||
if (Textbuffer_write(extra, *" ")) | |||
if (Textbuffer_write(extra, ' ')) | |||
return NULL; | |||
return Tokenizer_pop(self); | |||
} | |||
@@ -1157,7 +1158,7 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) | |||
*/ | |||
static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) | |||
{ | |||
#define INVALID_CONTEXT self->topstack->context & AGG_INVALID_LINK | |||
#define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS | |||
#define NOT_A_LINK \ | |||
if (!brackets && self->topstack->context & LC_DLTERM) \ | |||
return Tokenizer_handle_dl_term(self); \ | |||
@@ -1232,7 +1233,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
self->global |= GL_HEADING; | |||
self->head += 1; | |||
while (Tokenizer_READ(self, 0) == *"=") { | |||
while (Tokenizer_READ(self, 0) == '=') { | |||
best++; | |||
self->head++; | |||
} | |||
@@ -1242,7 +1243,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
RESET_ROUTE(); | |||
self->head = reset + best - 1; | |||
for (i = 0; i < best; i++) { | |||
if (Tokenizer_emit_char(self, *"=")) | |||
if (Tokenizer_emit_char(self, '=')) | |||
return -1; | |||
} | |||
self->global ^= GL_HEADING; | |||
@@ -1271,7 +1272,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
if (heading->level < best) { | |||
diff = best - heading->level; | |||
for (i = 0; i < diff; i++) { | |||
if (Tokenizer_emit_char(self, *"=")) { | |||
if (Tokenizer_emit_char(self, '=')) { | |||
Py_DECREF(heading->title); | |||
free(heading); | |||
return -1; | |||
@@ -1296,14 +1297,14 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
*/ | |||
static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head, best; | |||
int i, current, level, diff; | |||
Py_ssize_t reset = self->head; | |||
int best, i, current, level, diff; | |||
HeadingData *after, *heading; | |||
PyObject *stack; | |||
self->head += 1; | |||
best = 1; | |||
while (Tokenizer_READ(self, 0) == *"=") { | |||
while (Tokenizer_READ(self, 0) == '=') { | |||
best++; | |||
self->head++; | |||
} | |||
@@ -1316,7 +1317,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
if (level < best) { | |||
diff = best - level; | |||
for (i = 0; i < diff; i++) { | |||
if (Tokenizer_emit_char(self, *"=")) | |||
if (Tokenizer_emit_char(self, '=')) | |||
return NULL; | |||
} | |||
} | |||
@@ -1324,7 +1325,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
} | |||
else { | |||
for (i = 0; i < best; i++) { | |||
if (Tokenizer_emit_char(self, *"=")) { | |||
if (Tokenizer_emit_char(self, '=')) { | |||
Py_DECREF(after->title); | |||
free(after); | |||
return NULL; | |||
@@ -1372,21 +1373,21 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) | |||
return -1; | |||
self->head++; | |||
this = Tokenizer_READ(self, 0); | |||
if (this == *"") { | |||
if (!this) { | |||
Tokenizer_fail_route(self); | |||
return 0; | |||
} | |||
if (this == *"#") { | |||
if (this == '#') { | |||
numeric = 1; | |||
if (Tokenizer_emit(self, HTMLEntityNumeric)) | |||
return -1; | |||
self->head++; | |||
this = Tokenizer_READ(self, 0); | |||
if (this == *"") { | |||
if (!this) { | |||
Tokenizer_fail_route(self); | |||
return 0; | |||
} | |||
if (this == *"x" || this == *"X") { | |||
if (this == 'x' || this == 'X') { | |||
hexadecimal = 1; | |||
kwargs = PyDict_New(); | |||
if (!kwargs) | |||
@@ -1416,22 +1417,20 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) | |||
zeroes = 0; | |||
while (1) { | |||
this = Tokenizer_READ(self, 0); | |||
if (this == *";") { | |||
if (this == ';') { | |||
if (i == 0) | |||
FAIL_ROUTE_AND_EXIT() | |||
break; | |||
} | |||
if (i == 0 && this == *"0") { | |||
if (i == 0 && this == '0') { | |||
zeroes++; | |||
self->head++; | |||
continue; | |||
} | |||
if (i >= MAX_ENTITY_SIZE) | |||
FAIL_ROUTE_AND_EXIT() | |||
for (j = 0; j < NUM_MARKERS; j++) { | |||
if (this == *MARKERS[j]) | |||
FAIL_ROUTE_AND_EXIT() | |||
} | |||
if (is_marker(this)) | |||
FAIL_ROUTE_AND_EXIT() | |||
j = 0; | |||
while (1) { | |||
if (!valid[j]) | |||
@@ -1508,7 +1507,7 @@ static int Tokenizer_parse_entity(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (Tokenizer_emit_char(self, *"&")) | |||
if (Tokenizer_emit_char(self, '&')) | |||
return -1; | |||
return 0; | |||
} | |||
@@ -1537,14 +1536,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||
return -1; | |||
while (1) { | |||
this = Tokenizer_READ(self, 0); | |||
if (this == *"") { | |||
if (!this) { | |||
comment = Tokenizer_pop(self); | |||
Py_XDECREF(comment); | |||
self->head = reset; | |||
return Tokenizer_emit_text(self, "<!--"); | |||
} | |||
if (this == *"-" && Tokenizer_READ(self, 1) == this && | |||
Tokenizer_READ(self, 2) == *">") { | |||
if (this == '-' && Tokenizer_READ(self, 1) == this && | |||
Tokenizer_READ(self, 2) == '>') { | |||
if (Tokenizer_emit_first(self, CommentStart)) | |||
return -1; | |||
if (Tokenizer_emit(self, CommentEnd)) | |||
@@ -1654,11 +1653,11 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text) | |||
if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self)) | |||
return Tokenizer_emit_char(self, text); | |||
else if (text == next && next == *"{") | |||
else if (text == next && next == '{') | |||
return Tokenizer_parse_template_or_argument(self); | |||
else if (text == next && next == *"[") | |||
else if (text == next && next == '[') | |||
return Tokenizer_parse_wikilink(self); | |||
else if (text == *"<") | |||
else if (text == '<') | |||
return Tokenizer_parse_tag(self); | |||
return Tokenizer_emit_char(self, text); | |||
} | |||
@@ -1705,7 +1704,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||
return -1; | |||
} | |||
else if (data->context & TAG_ATTR_NAME) { | |||
if (chunk == *"=") { | |||
if (chunk == '=') { | |||
data->context = TAG_ATTR_VALUE | TAG_NOTE_QUOTE; | |||
if (Tokenizer_emit(self, TagAttrEquals)) | |||
return -1; | |||
@@ -1720,11 +1719,11 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||
} | |||
} | |||
else if (data->context & TAG_ATTR_VALUE) { | |||
escaped = (Tokenizer_READ_BACKWARDS(self, 1) == *"\\" && | |||
Tokenizer_READ_BACKWARDS(self, 2) != *"\\"); | |||
escaped = (Tokenizer_READ_BACKWARDS(self, 1) == '\\' && | |||
Tokenizer_READ_BACKWARDS(self, 2) != '\\'); | |||
if (data->context & TAG_NOTE_QUOTE) { | |||
data->context ^= TAG_NOTE_QUOTE; | |||
if (chunk == *"\"" && !escaped) { | |||
if (chunk == '"' && !escaped) { | |||
data->context |= TAG_QUOTED; | |||
if (Tokenizer_push(self, self->topstack->context)) | |||
return -1; | |||
@@ -1733,7 +1732,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||
} | |||
} | |||
else if (data->context & TAG_QUOTED) { | |||
if (chunk == *"\"" && !escaped) { | |||
if (chunk == '"' && !escaped) { | |||
data->context |= TAG_NOTE_SPACE; | |||
return 0; | |||
} | |||
@@ -1844,15 +1843,15 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | |||
while (1) { | |||
this = Tokenizer_READ(self, 0); | |||
next = Tokenizer_READ(self, 1); | |||
if (this == *"") | |||
if (!this) | |||
return Tokenizer_fail_route(self); | |||
else if (this == *"<" && next == *"/") { | |||
else if (this == '<' && next == '/') { | |||
if (Tokenizer_handle_tag_open_close(self)) | |||
return NULL; | |||
self->head++; | |||
return Tokenizer_parse(self, 0, 0); | |||
} | |||
else if (this == *"&") { | |||
else if (this == '&') { | |||
if (Tokenizer_parse_entity(self)) | |||
return NULL; | |||
} | |||
@@ -1957,7 +1956,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||
next = Tokenizer_READ(self, 1); | |||
can_exit = (!(data->context & (TAG_QUOTED | TAG_NAME)) || | |||
data->context & TAG_NOTE_SPACE); | |||
if (this == *"") { | |||
if (!this) { | |||
if (self->topstack->context & LC_TAG_ATTR) { | |||
if (data->context & TAG_QUOTED) { | |||
// Unclosed attribute quote: reset, don't die | |||
@@ -1973,7 +1972,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||
TagData_dealloc(data); | |||
return Tokenizer_fail_route(self); | |||
} | |||
else if (this == *">" && can_exit) { | |||
else if (this == '>' && can_exit) { | |||
if (Tokenizer_handle_tag_close_open(self, data, TagCloseOpen)) { | |||
TagData_dealloc(data); | |||
return NULL; | |||
@@ -1995,7 +1994,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||
Py_DECREF(text); | |||
return Tokenizer_handle_blacklisted_tag(self); | |||
} | |||
else if (this == *"/" && next == *">" && can_exit) { | |||
else if (this == '/' && next == '>' && can_exit) { | |||
if (Tokenizer_handle_tag_close_open(self, data, | |||
TagCloseSelfclose)) { | |||
TagData_dealloc(data); | |||
@@ -2078,7 +2077,7 @@ static int Tokenizer_parse_tag(Tokenizer* self) | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
return Tokenizer_emit_char(self, *"<"); | |||
return Tokenizer_emit_char(self, '<'); | |||
} | |||
if (!tag) { | |||
return -1; | |||
@@ -2165,12 +2164,12 @@ static int Tokenizer_parse_bold(Tokenizer* self) | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (self->topstack->context & LC_STYLE_SECOND_PASS) | |||
return Tokenizer_emit_char(self, *"'") ? -1 : 1; | |||
return Tokenizer_emit_char(self, '\'') ? -1 : 1; | |||
if (self->topstack->context & LC_STYLE_ITALICS) { | |||
self->topstack->context |= LC_STYLE_PASS_AGAIN; | |||
return Tokenizer_emit_text(self, "'''"); | |||
} | |||
if (Tokenizer_emit_char(self, *"'")) | |||
if (Tokenizer_emit_char(self, '\'')) | |||
return -1; | |||
return Tokenizer_parse_italics(self); | |||
} | |||
@@ -2256,19 +2255,19 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||
int context = self->topstack->context, ticks = 2, i; | |||
self->head += 2; | |||
while (Tokenizer_READ(self, 0) == *"'") { | |||
while (Tokenizer_READ(self, 0) == '\'') { | |||
self->head++; | |||
ticks++; | |||
} | |||
if (ticks > 5) { | |||
for (i = 0; i < ticks - 5; i++) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
if (Tokenizer_emit_char(self, '\'')) | |||
return NULL; | |||
} | |||
ticks = 5; | |||
} | |||
else if (ticks == 4) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
if (Tokenizer_emit_char(self, '\'')) | |||
return NULL; | |||
ticks = 3; | |||
} | |||
@@ -2281,7 +2280,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||
if (!Tokenizer_CAN_RECURSE(self)) { | |||
if (ticks == 3) { | |||
if (context & LC_STYLE_SECOND_PASS) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
if (Tokenizer_emit_char(self, '\'')) | |||
return NULL; | |||
return Tokenizer_pop(self); | |||
} | |||
@@ -2289,7 +2288,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||
self->topstack->context |= LC_STYLE_PASS_AGAIN; | |||
} | |||
for (i = 0; i < ticks; i++) { | |||
if (Tokenizer_emit_char(self, *"'")) | |||
if (Tokenizer_emit_char(self, '\'')) | |||
return NULL; | |||
} | |||
} | |||
@@ -2321,7 +2320,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self) | |||
PyObject *markup = Tokenizer_read(self, 0), *kwargs; | |||
Py_UNICODE code = *PyUnicode_AS_UNICODE(markup); | |||
if (code == *";") | |||
if (code == ';') | |||
self->topstack->context |= LC_DLTERM; | |||
kwargs = PyDict_New(); | |||
if (!kwargs) | |||
@@ -2345,8 +2344,8 @@ static int Tokenizer_handle_list(Tokenizer* self) | |||
if (Tokenizer_handle_list_marker(self)) | |||
return -1; | |||
while (marker == *"#" || marker == *"*" || marker == *";" || | |||
marker == *":") { | |||
while (marker == '#' || marker == '*' || marker == ';' || | |||
marker == ':') { | |||
self->head++; | |||
if (Tokenizer_handle_list_marker(self)) | |||
return -1; | |||
@@ -2368,11 +2367,11 @@ static int Tokenizer_handle_hr(Tokenizer* self) | |||
return -1; | |||
self->head += 3; | |||
for (i = 0; i < 4; i++) { | |||
if (Textbuffer_write(&buffer, *"-")) | |||
if (Textbuffer_write(&buffer, '-')) | |||
return -1; | |||
} | |||
while (Tokenizer_READ(self, 1) == *"-") { | |||
if (Textbuffer_write(&buffer, *"-")) | |||
while (Tokenizer_READ(self, 1) == '-') { | |||
if (Textbuffer_write(&buffer, '-')) | |||
return -1; | |||
self->head++; | |||
} | |||
@@ -2400,9 +2399,9 @@ static int Tokenizer_handle_hr(Tokenizer* self) | |||
static int Tokenizer_handle_dl_term(Tokenizer* self) | |||
{ | |||
self->topstack->context ^= LC_DLTERM; | |||
if (Tokenizer_READ(self, 0) == *":") | |||
if (Tokenizer_READ(self, 0) == ':') | |||
return Tokenizer_handle_list_marker(self); | |||
return Tokenizer_emit_char(self, *"\n"); | |||
return Tokenizer_emit_char(self, '\n'); | |||
} | |||
/* | |||
@@ -2441,28 +2440,26 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
{ | |||
if (context & LC_FAIL_NEXT) | |||
return -1; | |||
if (context & LC_WIKILINK) { | |||
if (context & LC_WIKILINK_TEXT) | |||
return (data == *"[" && Tokenizer_READ(self, 1) == *"[") ? -1 : 0; | |||
else if (data == *"]" || data == *"{") | |||
if (context & LC_WIKILINK_TITLE) { | |||
if (data == ']' || data == '{') | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
else if (data == *"\n" || data == *"[" || data == *"}") | |||
else if (data == '\n' || data == '[' || data == '}') | |||
return -1; | |||
return 0; | |||
} | |||
if (context & LC_EXT_LINK_TITLE) | |||
return (data == *"\n") ? -1 : 0; | |||
return (data == '\n') ? -1 : 0; | |||
if (context & LC_TAG_CLOSE) | |||
return (data == *"<") ? -1 : 0; | |||
return (data == '<') ? -1 : 0; | |||
if (context & LC_TEMPLATE_NAME) { | |||
if (data == *"{" || data == *"}" || data == *"[") { | |||
if (data == '{' || data == '}' || data == '[') { | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (data == *"]") { | |||
if (data == ']') { | |||
return -1; | |||
} | |||
if (data == *"|") | |||
if (data == '|') | |||
return 0; | |||
if (context & LC_HAS_TEXT) { | |||
if (context & LC_FAIL_ON_TEXT) { | |||
@@ -2470,7 +2467,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
return -1; | |||
} | |||
else { | |||
if (data == *"\n") | |||
if (data == '\n') | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
} | |||
@@ -2479,13 +2476,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
} | |||
else { | |||
if (context & LC_FAIL_ON_EQUALS) { | |||
if (data == *"=") { | |||
if (data == '=') { | |||
return -1; | |||
} | |||
} | |||
else if (context & LC_FAIL_ON_LBRACE) { | |||
if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && | |||
Tokenizer_READ(self, -2) == *"{")) { | |||
if (data == '{' || (Tokenizer_READ(self, -1) == '{' && | |||
Tokenizer_READ(self, -2) == '{')) { | |||
if (context & LC_TEMPLATE) | |||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||
else | |||
@@ -2495,7 +2492,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
self->topstack->context ^= LC_FAIL_ON_LBRACE; | |||
} | |||
else if (context & LC_FAIL_ON_RBRACE) { | |||
if (data == *"}") { | |||
if (data == '}') { | |||
if (context & LC_TEMPLATE) | |||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||
else | |||
@@ -2504,9 +2501,9 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
} | |||
self->topstack->context ^= LC_FAIL_ON_RBRACE; | |||
} | |||
else if (data == *"{") | |||
else if (data == '{') | |||
self->topstack->context |= LC_FAIL_ON_LBRACE; | |||
else if (data == *"}") | |||
else if (data == '}') | |||
self->topstack->context |= LC_FAIL_ON_RBRACE; | |||
} | |||
return 0; | |||
@@ -2544,11 +2541,11 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
self->head++; | |||
continue; | |||
} | |||
if (this == *"") | |||
if (!this) | |||
return Tokenizer_handle_end(self, this_context); | |||
next = Tokenizer_READ(self, 1); | |||
last = Tokenizer_READ_BACKWARDS(self, 1); | |||
if (this == next && next == *"{") { | |||
if (this == next && next == '{') { | |||
if (Tokenizer_CAN_RECURSE(self)) { | |||
if (Tokenizer_parse_template_or_argument(self)) | |||
return NULL; | |||
@@ -2556,84 +2553,83 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"|" && this_context & LC_TEMPLATE) { | |||
else if (this == '|' && this_context & LC_TEMPLATE) { | |||
if (Tokenizer_handle_template_param(self)) | |||
return NULL; | |||
} | |||
else if (this == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) { | |||
else if (this == '=' && this_context & LC_TEMPLATE_PARAM_KEY) { | |||
if (Tokenizer_handle_template_param_value(self)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"}" && this_context & LC_TEMPLATE) | |||
else if (this == next && next == '}' && this_context & LC_TEMPLATE) | |||
return Tokenizer_handle_template_end(self); | |||
else if (this == *"|" && this_context & LC_ARGUMENT_NAME) { | |||
else if (this == '|' && this_context & LC_ARGUMENT_NAME) { | |||
if (Tokenizer_handle_argument_separator(self)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"}" && this_context & LC_ARGUMENT) { | |||
if (Tokenizer_READ(self, 2) == *"}") { | |||
else if (this == next && next == '}' && this_context & LC_ARGUMENT) { | |||
if (Tokenizer_READ(self, 2) == '}') { | |||
return Tokenizer_handle_argument_end(self); | |||
} | |||
if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"[" && Tokenizer_CAN_RECURSE(self)) { | |||
if (!(this_context & AGG_INVALID_LINK)) { | |||
else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) { | |||
if (!(this_context & AGG_NO_WIKILINKS)) { | |||
if (Tokenizer_parse_wikilink(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { | |||
else if (this == '|' && this_context & LC_WIKILINK_TITLE) { | |||
if (Tokenizer_handle_wikilink_separator(self)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"]" && this_context & LC_WIKILINK) | |||
else if (this == next && next == ']' && this_context & LC_WIKILINK) | |||
return Tokenizer_handle_wikilink_end(self); | |||
else if (this == *"[") { | |||
else if (this == '[') { | |||
if (Tokenizer_parse_external_link(self, 1)) | |||
return NULL; | |||
} | |||
else if (this == *":" && !is_marker(last)) { | |||
else if (this == ':' && !is_marker(last)) { | |||
if (Tokenizer_parse_external_link(self, 0)) | |||
return NULL; | |||
} | |||
else if (this == *"]" && this_context & LC_EXT_LINK_TITLE) | |||
else if (this == ']' && this_context & LC_EXT_LINK_TITLE) | |||
return Tokenizer_pop(self); | |||
else if (this == *"=" && !(self->global & GL_HEADING)) { | |||
if (last == *"\n" || last == *"") { | |||
else if (this == '=' && !(self->global & GL_HEADING)) { | |||
if (!last || last == '\n') { | |||
if (Tokenizer_parse_heading(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"=" && this_context & LC_HEADING) | |||
else if (this == '=' && this_context & LC_HEADING) | |||
return (PyObject*) Tokenizer_handle_heading_end(self); | |||
else if (this == *"\n" && this_context & LC_HEADING) | |||
else if (this == '\n' && this_context & LC_HEADING) | |||
return Tokenizer_fail_route(self); | |||
else if (this == *"&") { | |||
else if (this == '&') { | |||
if (Tokenizer_parse_entity(self)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && next == *"!") { | |||
else if (this == '<' && next == '!') { | |||
next_next = Tokenizer_READ(self, 2); | |||
if (next_next == Tokenizer_READ(self, 3) && next_next == *"-") { | |||
if (next_next == Tokenizer_READ(self, 3) && next_next == '-') { | |||
if (Tokenizer_parse_comment(self)) | |||
return NULL; | |||
} | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && next == *"/" && | |||
Tokenizer_READ(self, 2) != *"") { | |||
else if (this == '<' && next == '/' && Tokenizer_READ(self, 2)) { | |||
if (this_context & LC_TAG_BODY ? | |||
Tokenizer_handle_tag_open_close(self) : | |||
Tokenizer_handle_invalid_tag_start(self)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && !(this_context & LC_TAG_CLOSE)) { | |||
else if (this == '<' && !(this_context & LC_TAG_CLOSE)) { | |||
if (Tokenizer_CAN_RECURSE(self)) { | |||
if (Tokenizer_parse_tag(self)) | |||
return NULL; | |||
@@ -2641,19 +2637,19 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == *">" && this_context & LC_TAG_CLOSE) | |||
else if (this == '>' && this_context & LC_TAG_CLOSE) | |||
return Tokenizer_handle_tag_close_close(self); | |||
else if (this == next && next == *"'") { | |||
else if (this == next && next == '\'' && !self->skip_style_tags) { | |||
temp = Tokenizer_parse_style(self); | |||
if (temp != Py_None) | |||
return temp; | |||
} | |||
else if (last == *"\n" || last == *"") { | |||
if (this == *"#" || this == *"*" || this == *";" || this == *":") { | |||
else if (!last || last == '\n') { | |||
if (this == '#' || this == '*' || this == ';' || this == ':') { | |||
if (Tokenizer_handle_list(self)) | |||
return NULL; | |||
} | |||
else if (this == *"-" && this == next && | |||
else if (this == '-' && this == next && | |||
this == Tokenizer_READ(self, 2) && | |||
this == Tokenizer_READ(self, 3)) { | |||
if (Tokenizer_handle_hr(self)) | |||
@@ -2662,7 +2658,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
else if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if ((this == *"\n" || this == *":") && this_context & LC_DLTERM) { | |||
else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) { | |||
if (Tokenizer_handle_dl_term(self)) | |||
return NULL; | |||
} | |||
@@ -2678,9 +2674,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||
{ | |||
PyObject *text, *temp; | |||
int context = 0; | |||
int context = 0, skip_style_tags = 0; | |||
if (PyArg_ParseTuple(args, "U|i", &text, &context)) { | |||
if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) { | |||
Py_XDECREF(self->text); | |||
self->text = PySequence_Fast(text, "expected a sequence"); | |||
} | |||
@@ -2689,7 +2685,8 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||
Py_ssize_t size; | |||
/* Failed to parse a Unicode object; try a string instead. */ | |||
PyErr_Clear(); | |||
if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context)) | |||
if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, | |||
&skip_style_tags)) | |||
return NULL; | |||
temp = PyUnicode_FromStringAndSize(encoded, size); | |||
if (!text) | |||
@@ -2701,6 +2698,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||
} | |||
self->head = self->global = self->depth = self->cycles = 0; | |||
self->length = PyList_GET_SIZE(self->text); | |||
self->skip_style_tags = skip_style_tags; | |||
return Tokenizer_parse(self, context, 1); | |||
} | |||
@@ -1,6 +1,6 @@ | |||
/* | |||
Tokenizer Header File for MWParserFromHell | |||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
@@ -41,9 +41,9 @@ SOFTWARE. | |||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||
static const char* MARKERS[] = { | |||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", ":", "/", | |||
"-", "\n", ""}; | |||
static const char MARKERS[] = { | |||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||
'-', '\n', '\0'}; | |||
#define NUM_MARKERS 18 | |||
#define TEXTBUFFER_BLOCKSIZE 1024 | |||
@@ -121,40 +121,39 @@ static PyObject* TagCloseClose; | |||
#define LC_WIKILINK_TITLE 0x00000020 | |||
#define LC_WIKILINK_TEXT 0x00000040 | |||
#define LC_EXT_LINK 0x00000380 | |||
#define LC_EXT_LINK 0x00000180 | |||
#define LC_EXT_LINK_URI 0x00000080 | |||
#define LC_EXT_LINK_TITLE 0x00000100 | |||
#define LC_EXT_LINK_BRACKETS 0x00000200 | |||
#define LC_HEADING 0x0000FC00 | |||
#define LC_HEADING_LEVEL_1 0x00000400 | |||
#define LC_HEADING_LEVEL_2 0x00000800 | |||
#define LC_HEADING_LEVEL_3 0x00001000 | |||
#define LC_HEADING_LEVEL_4 0x00002000 | |||
#define LC_HEADING_LEVEL_5 0x00004000 | |||
#define LC_HEADING_LEVEL_6 0x00008000 | |||
#define LC_TAG 0x000F0000 | |||
#define LC_TAG_OPEN 0x00010000 | |||
#define LC_TAG_ATTR 0x00020000 | |||
#define LC_TAG_BODY 0x00040000 | |||
#define LC_TAG_CLOSE 0x00080000 | |||
#define LC_STYLE 0x00F00000 | |||
#define LC_STYLE_ITALICS 0x00100000 | |||
#define LC_STYLE_BOLD 0x00200000 | |||
#define LC_STYLE_PASS_AGAIN 0x00400000 | |||
#define LC_STYLE_SECOND_PASS 0x00800000 | |||
#define LC_DLTERM 0x01000000 | |||
#define LC_SAFETY_CHECK 0x7E000000 | |||
#define LC_HAS_TEXT 0x02000000 | |||
#define LC_FAIL_ON_TEXT 0x04000000 | |||
#define LC_FAIL_NEXT 0x08000000 | |||
#define LC_FAIL_ON_LBRACE 0x10000000 | |||
#define LC_FAIL_ON_RBRACE 0x20000000 | |||
#define LC_FAIL_ON_EQUALS 0x40000000 | |||
#define LC_HEADING 0x00007E00 | |||
#define LC_HEADING_LEVEL_1 0x00000200 | |||
#define LC_HEADING_LEVEL_2 0x00000400 | |||
#define LC_HEADING_LEVEL_3 0x00000800 | |||
#define LC_HEADING_LEVEL_4 0x00001000 | |||
#define LC_HEADING_LEVEL_5 0x00002000 | |||
#define LC_HEADING_LEVEL_6 0x00004000 | |||
#define LC_TAG 0x00078000 | |||
#define LC_TAG_OPEN 0x00008000 | |||
#define LC_TAG_ATTR 0x00010000 | |||
#define LC_TAG_BODY 0x00020000 | |||
#define LC_TAG_CLOSE 0x00040000 | |||
#define LC_STYLE 0x00780000 | |||
#define LC_STYLE_ITALICS 0x00080000 | |||
#define LC_STYLE_BOLD 0x00100000 | |||
#define LC_STYLE_PASS_AGAIN 0x00200000 | |||
#define LC_STYLE_SECOND_PASS 0x00400000 | |||
#define LC_DLTERM 0x00800000 | |||
#define LC_SAFETY_CHECK 0x3F000000 | |||
#define LC_HAS_TEXT 0x01000000 | |||
#define LC_FAIL_ON_TEXT 0x02000000 | |||
#define LC_FAIL_NEXT 0x04000000 | |||
#define LC_FAIL_ON_LBRACE 0x08000000 | |||
#define LC_FAIL_ON_RBRACE 0x10000000 | |||
#define LC_FAIL_ON_EQUALS 0x20000000 | |||
/* Global contexts: */ | |||
@@ -163,9 +162,10 @@ static PyObject* TagCloseClose; | |||
/* Aggregate contexts: */ | |||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK) | |||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||
/* Tag contexts: */ | |||
@@ -223,6 +223,7 @@ typedef struct { | |||
int global; /* global context */ | |||
int depth; /* stack recursion depth */ | |||
int cycles; /* total number of stack recursions */ | |||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||
} Tokenizer; | |||
@@ -241,7 +242,7 @@ typedef struct { | |||
/* Macros for accessing definitions: */ | |||
#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li") | |||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL)) | |||
#define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL)) | |||
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL)) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -25,7 +25,7 @@ from math import log | |||
import re | |||
from . import contexts, tokens | |||
from ..compat import htmlentities | |||
from ..compat import htmlentities, range | |||
from ..definitions import (get_html_tag, is_parsable, is_single, | |||
is_single_only, is_scheme) | |||
@@ -467,7 +467,7 @@ class Tokenizer(object): | |||
reset = self._head | |||
self._head += 1 | |||
try: | |||
bad_context = self._context & contexts.INVALID_LINK | |||
bad_context = self._context & contexts.NO_EXT_LINKS | |||
if bad_context or not self._can_recurse(): | |||
raise BadRoute() | |||
link, extra, delta = self._really_parse_external_link(brackets) | |||
@@ -620,7 +620,8 @@ class Tokenizer(object): | |||
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | |||
pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"])) | |||
self._emit_all(self._pop()) | |||
data.padding_buffer = {key: "" for key in data.padding_buffer} | |||
for key in data.padding_buffer: | |||
data.padding_buffer[key] = "" | |||
def _handle_tag_space(self, data, text): | |||
"""Handle whitespace (*text*) inside of an HTML open tag.""" | |||
@@ -989,10 +990,8 @@ class Tokenizer(object): | |||
context = self._context | |||
if context & contexts.FAIL_NEXT: | |||
return False | |||
if context & contexts.WIKILINK: | |||
if context & contexts.WIKILINK_TEXT: | |||
return not (this == self._read(1) == "[") | |||
elif this == "]" or this == "{": | |||
if context & contexts.WIKILINK_TITLE: | |||
if this == "]" or this == "{": | |||
self._context |= contexts.FAIL_NEXT | |||
elif this == "\n" or this == "[" or this == "}": | |||
return False | |||
@@ -1082,7 +1081,7 @@ class Tokenizer(object): | |||
else: | |||
self._emit_text("}") | |||
elif this == next == "[" and self._can_recurse(): | |||
if not self._context & contexts.INVALID_LINK: | |||
if not self._context & contexts.NO_WIKILINKS: | |||
self._parse_wikilink() | |||
else: | |||
self._emit_text("[") | |||
@@ -1124,7 +1123,7 @@ class Tokenizer(object): | |||
self._emit_text("<") | |||
elif this == ">" and self._context & contexts.TAG_CLOSE: | |||
return self._handle_tag_close_close() | |||
elif this == next == "'": | |||
elif this == next == "'" and not self._skip_style_tags: | |||
result = self._parse_style() | |||
if result is not None: | |||
return result | |||
@@ -1141,8 +1140,9 @@ class Tokenizer(object): | |||
self._emit_text(this) | |||
self._head += 1 | |||
def tokenize(self, text, context=0): | |||
def tokenize(self, text, context=0, skip_style_tags=False): | |||
"""Build a list of tokens from a string of wikicode and return it.""" | |||
self._skip_style_tags = skip_style_tags | |||
split = self.regex.split(text) | |||
self._text = [segment for segment in split if segment] | |||
self._head = self._global = self._depth = self._cycles = 0 | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -34,15 +34,12 @@ from ..compat import py3k, str | |||
__all__ = ["Token"] | |||
class Token(object): | |||
class Token (dict): | |||
"""A token stores the semantic meaning of a unit of wikicode.""" | |||
def __init__(self, **kwargs): | |||
super(Token, self).__setattr__("_kwargs", kwargs) | |||
def __repr__(self): | |||
args = [] | |||
for key, value in self._kwargs.items(): | |||
for key, value in self.items(): | |||
if isinstance(value, str) and len(value) > 100: | |||
args.append(key + "=" + repr(value[:97] + "...")) | |||
else: | |||
@@ -50,18 +47,19 @@ class Token(object): | |||
return "{0}({1})".format(type(self).__name__, ", ".join(args)) | |||
def __eq__(self, other): | |||
if isinstance(other, type(self)): | |||
return self._kwargs == other._kwargs | |||
return False | |||
return isinstance(other, type(self)) and dict.__eq__(self, other) | |||
def __ne__(self, other): | |||
return not self.__eq__(other) | |||
def __getattr__(self, key): | |||
return self._kwargs.get(key) | |||
return self.get(key) | |||
def __setattr__(self, key, value): | |||
self._kwargs[key] = value | |||
self[key] = value | |||
def __delattr__(self, key): | |||
del self._kwargs[key] | |||
del self[key] | |||
def make(name): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -79,6 +79,11 @@ class SmartList(_SliceNormalizerMixIn, list): | |||
[2, 3, 4] | |||
>>> parent | |||
[0, 1, 2, 3, 4] | |||
The parent needs to keep a list of its children in order to update them, | |||
which prevents them from being garbage-collected. If you are keeping the | |||
parent around for a while but creating many children, it is advisable to | |||
call :py:meth:`~._ListProxy.destroy` when you're finished with them. | |||
""" | |||
def __init__(self, iterable=None): | |||
@@ -146,6 +151,11 @@ class SmartList(_SliceNormalizerMixIn, list): | |||
self.extend(other) | |||
return self | |||
def _release_children(self): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
@inheritdoc | |||
def append(self, item): | |||
head = len(self) | |||
@@ -174,17 +184,13 @@ class SmartList(_SliceNormalizerMixIn, list): | |||
@inheritdoc | |||
def reverse(self): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
self._release_children() | |||
super(SmartList, self).reverse() | |||
if py3k: | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
self._release_children() | |||
kwargs = {} | |||
if key is not None: | |||
kwargs["key"] = key | |||
@@ -194,9 +200,7 @@ class SmartList(_SliceNormalizerMixIn, list): | |||
else: | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
self._release_children() | |||
kwargs = {} | |||
if cmp is not None: | |||
kwargs["cmp"] = cmp | |||
@@ -448,5 +452,9 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item | |||
def destroy(self): | |||
"""Make the parent forget this child. The child will no longer work.""" | |||
self._parent._children.pop(id(self)) | |||
del inheritdoc |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -26,8 +26,9 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. | |||
""" | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from .compat import py3k, py32, str | |||
from .compat import bytes, py26, py3k, str | |||
__all__ = ["StringMixIn"] | |||
@@ -55,10 +56,10 @@ class StringMixIn(object): | |||
return self.__unicode__() | |||
def __bytes__(self): | |||
return self.__unicode__().encode("utf8") | |||
return bytes(self.__unicode__(), getdefaultencoding()) | |||
else: | |||
def __str__(self): | |||
return self.__unicode__().encode("utf8") | |||
return bytes(self.__unicode__()) | |||
def __unicode__(self): | |||
raise NotImplementedError() | |||
@@ -67,33 +68,21 @@ class StringMixIn(object): | |||
return repr(self.__unicode__()) | |||
def __lt__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return self.__unicode__() < other.__unicode__() | |||
return self.__unicode__() < other | |||
def __le__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return self.__unicode__() <= other.__unicode__() | |||
return self.__unicode__() <= other | |||
def __eq__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return self.__unicode__() == other.__unicode__() | |||
return self.__unicode__() == other | |||
def __ne__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return self.__unicode__() != other.__unicode__() | |||
return self.__unicode__() != other | |||
def __gt__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return self.__unicode__() > other.__unicode__() | |||
return self.__unicode__() > other | |||
def __ge__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return self.__unicode__() >= other.__unicode__() | |||
return self.__unicode__() >= other | |||
if py3k: | |||
@@ -117,250 +106,22 @@ class StringMixIn(object): | |||
return reversed(self.__unicode__()) | |||
def __contains__(self, item): | |||
if isinstance(item, StringMixIn): | |||
return str(item) in self.__unicode__() | |||
return item in self.__unicode__() | |||
return str(item) in self.__unicode__() | |||
@inheritdoc | |||
def capitalize(self): | |||
return self.__unicode__().capitalize() | |||
if py3k and not py32: | |||
@inheritdoc | |||
def casefold(self): | |||
return self.__unicode__().casefold() | |||
@inheritdoc | |||
def center(self, width, fillchar=None): | |||
if fillchar is None: | |||
return self.__unicode__().center(width) | |||
return self.__unicode__().center(width, fillchar) | |||
@inheritdoc | |||
def count(self, sub, start=None, end=None): | |||
return self.__unicode__().count(sub, start, end) | |||
if not py3k: | |||
@inheritdoc | |||
def decode(self, encoding=None, errors=None): | |||
kwargs = {} | |||
if encoding is not None: | |||
kwargs["encoding"] = encoding | |||
if errors is not None: | |||
kwargs["errors"] = errors | |||
return self.__unicode__().decode(**kwargs) | |||
@inheritdoc | |||
def encode(self, encoding=None, errors=None): | |||
kwargs = {} | |||
if encoding is not None: | |||
kwargs["encoding"] = encoding | |||
if errors is not None: | |||
kwargs["errors"] = errors | |||
return self.__unicode__().encode(**kwargs) | |||
@inheritdoc | |||
def endswith(self, prefix, start=None, end=None): | |||
return self.__unicode__().endswith(prefix, start, end) | |||
@inheritdoc | |||
def expandtabs(self, tabsize=None): | |||
if tabsize is None: | |||
return self.__unicode__().expandtabs() | |||
return self.__unicode__().expandtabs(tabsize) | |||
@inheritdoc | |||
def find(self, sub, start=None, end=None): | |||
return self.__unicode__().find(sub, start, end) | |||
@inheritdoc | |||
def format(self, *args, **kwargs): | |||
return self.__unicode__().format(*args, **kwargs) | |||
if py3k: | |||
@inheritdoc | |||
def format_map(self, mapping): | |||
return self.__unicode__().format_map(mapping) | |||
@inheritdoc | |||
def index(self, sub, start=None, end=None): | |||
return self.__unicode__().index(sub, start, end) | |||
@inheritdoc | |||
def isalnum(self): | |||
return self.__unicode__().isalnum() | |||
@inheritdoc | |||
def isalpha(self): | |||
return self.__unicode__().isalpha() | |||
@inheritdoc | |||
def isdecimal(self): | |||
return self.__unicode__().isdecimal() | |||
@inheritdoc | |||
def isdigit(self): | |||
return self.__unicode__().isdigit() | |||
if py3k: | |||
@inheritdoc | |||
def isidentifier(self): | |||
return self.__unicode__().isidentifier() | |||
@inheritdoc | |||
def islower(self): | |||
return self.__unicode__().islower() | |||
@inheritdoc | |||
def isnumeric(self): | |||
return self.__unicode__().isnumeric() | |||
if py3k: | |||
@inheritdoc | |||
def isprintable(self): | |||
return self.__unicode__().isprintable() | |||
@inheritdoc | |||
def isspace(self): | |||
return self.__unicode__().isspace() | |||
@inheritdoc | |||
def istitle(self): | |||
return self.__unicode__().istitle() | |||
@inheritdoc | |||
def isupper(self): | |||
return self.__unicode__().isupper() | |||
@inheritdoc | |||
def join(self, iterable): | |||
return self.__unicode__().join(iterable) | |||
@inheritdoc | |||
def ljust(self, width, fillchar=None): | |||
if fillchar is None: | |||
return self.__unicode__().ljust(width) | |||
return self.__unicode__().ljust(width, fillchar) | |||
@inheritdoc | |||
def lower(self): | |||
return self.__unicode__().lower() | |||
@inheritdoc | |||
def lstrip(self, chars=None): | |||
return self.__unicode__().lstrip(chars) | |||
def __getattr__(self, attr): | |||
return getattr(self.__unicode__(), attr) | |||
if py3k: | |||
@staticmethod | |||
@inheritdoc | |||
def maketrans(x, y=None, z=None): | |||
if z is None: | |||
if y is None: | |||
return str.maketrans(x) | |||
return str.maketrans(x, y) | |||
return str.maketrans(x, y, z) | |||
@inheritdoc | |||
def partition(self, sep): | |||
return self.__unicode__().partition(sep) | |||
maketrans = str.maketrans # Static method can't rely on __getattr__ | |||
@inheritdoc | |||
def replace(self, old, new, count=None): | |||
if count is None: | |||
return self.__unicode__().replace(old, new) | |||
return self.__unicode__().replace(old, new, count) | |||
@inheritdoc | |||
def rfind(self, sub, start=None, end=None): | |||
return self.__unicode__().rfind(sub, start, end) | |||
@inheritdoc | |||
def rindex(self, sub, start=None, end=None): | |||
return self.__unicode__().rindex(sub, start, end) | |||
@inheritdoc | |||
def rjust(self, width, fillchar=None): | |||
if fillchar is None: | |||
return self.__unicode__().rjust(width) | |||
return self.__unicode__().rjust(width, fillchar) | |||
@inheritdoc | |||
def rpartition(self, sep): | |||
return self.__unicode__().rpartition(sep) | |||
if py3k and not py32: | |||
@inheritdoc | |||
def rsplit(self, sep=None, maxsplit=None): | |||
kwargs = {} | |||
if sep is not None: | |||
kwargs["sep"] = sep | |||
if maxsplit is not None: | |||
kwargs["maxsplit"] = maxsplit | |||
return self.__unicode__().rsplit(**kwargs) | |||
else: | |||
if py26: | |||
@inheritdoc | |||
def rsplit(self, sep=None, maxsplit=None): | |||
if maxsplit is None: | |||
if sep is None: | |||
return self.__unicode__().rsplit() | |||
return self.__unicode__().rsplit(sep) | |||
return self.__unicode__().rsplit(sep, maxsplit) | |||
@inheritdoc | |||
def rstrip(self, chars=None): | |||
return self.__unicode__().rstrip(chars) | |||
if py3k and not py32: | |||
@inheritdoc | |||
def split(self, sep=None, maxsplit=None): | |||
kwargs = {} | |||
if sep is not None: | |||
kwargs["sep"] = sep | |||
if maxsplit is not None: | |||
kwargs["maxsplit"] = maxsplit | |||
return self.__unicode__().split(**kwargs) | |||
else: | |||
@inheritdoc | |||
def split(self, sep=None, maxsplit=None): | |||
if maxsplit is None: | |||
if sep is None: | |||
return self.__unicode__().split() | |||
return self.__unicode__().split(sep) | |||
return self.__unicode__().split(sep, maxsplit) | |||
@inheritdoc | |||
def splitlines(self, keepends=None): | |||
if keepends is None: | |||
return self.__unicode__().splitlines() | |||
return self.__unicode__().splitlines(keepends) | |||
@inheritdoc | |||
def startswith(self, prefix, start=None, end=None): | |||
return self.__unicode__().startswith(prefix, start, end) | |||
@inheritdoc | |||
def strip(self, chars=None): | |||
return self.__unicode__().strip(chars) | |||
@inheritdoc | |||
def swapcase(self): | |||
return self.__unicode__().swapcase() | |||
@inheritdoc | |||
def title(self): | |||
return self.__unicode__().title() | |||
@inheritdoc | |||
def translate(self, table): | |||
return self.__unicode__().translate(table) | |||
@inheritdoc | |||
def upper(self): | |||
return self.__unicode__().upper() | |||
@inheritdoc | |||
def zfill(self, width): | |||
return self.__unicode__().zfill(width) | |||
def encode(self, encoding=None, errors=None): | |||
if encoding is None: | |||
encoding = getdefaultencoding() | |||
if errors is not None: | |||
return self.__unicode__().encode(encoding, errors) | |||
return self.__unicode__().encode(encoding) | |||
del inheritdoc |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,8 +21,8 @@ | |||
# SOFTWARE. | |||
""" | |||
This module contains accessory functions that wrap around existing ones to | |||
provide additional functionality. | |||
This module contains accessory functions for other parts of the library. Parser | |||
users generally won't need stuff from here. | |||
""" | |||
from __future__ import unicode_literals | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,9 +21,10 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from itertools import chain | |||
import re | |||
from .compat import maxsize, py3k, str | |||
from .compat import py3k, range, str | |||
from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | |||
Node, Tag, Template, Text, Wikilink) | |||
from .string_mixin import StringMixIn | |||
@@ -51,96 +52,130 @@ class Wikicode(StringMixIn): | |||
def __unicode__(self): | |||
return "".join([str(node) for node in self.nodes]) | |||
def _get_children(self, node): | |||
"""Iterate over all descendants of a given *node*, including itself. | |||
This is implemented by the ``__iternodes__()`` generator of ``Node`` | |||
classes, which by default yields itself and nothing more. | |||
""" | |||
for context, child in node.__iternodes__(self._get_all_nodes): | |||
yield child | |||
def _get_all_nodes(self, code): | |||
"""Iterate over all of our descendant nodes. | |||
This is implemented by calling :py:meth:`_get_children` on every node | |||
in our node list (:py:attr:`self.nodes <nodes>`). | |||
@staticmethod | |||
def _get_children(node, contexts=False, parent=None): | |||
"""Iterate over all child :py:class:`.Node`\ s of a given *node*.""" | |||
yield (parent, node) if contexts else node | |||
for code in node.__children__(): | |||
for child in code.nodes: | |||
for result in Wikicode._get_children(child, contexts, code): | |||
yield result | |||
@staticmethod | |||
def _slice_replace(code, index, old, new): | |||
"""Replace the string *old* with *new* across *index* in *code*.""" | |||
nodes = [str(node) for node in code.get(index)] | |||
substring = "".join(nodes).replace(old, new) | |||
code.nodes[index] = parse_anything(substring).nodes | |||
@staticmethod | |||
def _build_matcher(matches, flags): | |||
"""Helper for :py:meth:`_indexed_ifilter` and others. | |||
If *matches* is a function, return it. If it's a regex, return a | |||
wrapper around it that can be called with a node to do a search. If | |||
it's ``None``, return a function that always returns ``True``. | |||
""" | |||
for node in code.nodes: | |||
for child in self._get_children(node): | |||
yield child | |||
def _is_equivalent(self, obj, node): | |||
"""Return ``True`` if *obj* and *node* are equivalent, else ``False``. | |||
If *obj* is a ``Node``, the function will test whether they are the | |||
same object, otherwise it will compare them with ``==``. | |||
if matches: | |||
if callable(matches): | |||
return matches | |||
return lambda obj: re.search(matches, str(obj), flags) # r | |||
return lambda obj: True | |||
def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||
forcetype=None): | |||
"""Iterate over nodes and their corresponding indices in the node list. | |||
The arguments are interpreted as for :py:meth:`ifilter`. For each tuple | |||
``(i, node)`` yielded by this method, ``self.index(node) == i``. Note | |||
that if *recursive* is ``True``, ``self.nodes[i]`` might not be the | |||
node itself, but will still contain it. | |||
""" | |||
return (node is obj) if isinstance(obj, Node) else (node == obj) | |||
def _contains(self, nodes, obj): | |||
"""Return ``True`` if *obj* is inside of *nodes*, else ``False``. | |||
If *obj* is a ``Node``, we will only return ``True`` if *obj* is | |||
actually in the list (and not just a node that equals it). Otherwise, | |||
the test is simply ``obj in nodes``. | |||
match = self._build_matcher(matches, flags) | |||
if recursive: | |||
def getter(i, node): | |||
for ch in self._get_children(node): | |||
yield (i, ch) | |||
inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | |||
else: | |||
inodes = enumerate(self.nodes) | |||
for i, node in inodes: | |||
if (not forcetype or isinstance(node, forcetype)) and match(node): | |||
yield (i, node) | |||
def _do_strong_search(self, obj, recursive=True): | |||
"""Search for the specific element *obj* within the node list. | |||
*obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode` | |||
object. If found, we return a tuple (*context*, *index*) where | |||
*context* is the :py:class:`.Wikicode` that contains *obj* and *index* | |||
is its index there, as a :py:class:`slice`. Note that if *recursive* is | |||
``False``, *context* will always be ``self`` (since we only look for | |||
*obj* among immediate descendants), but if *recursive* is ``True``, | |||
then it could be any :py:class:`.Wikicode` contained by a node within | |||
``self``. If *obj* is not found, :py:exc:`ValueError` is raised. | |||
""" | |||
mkslice = lambda i: slice(i, i + 1) | |||
if isinstance(obj, Node): | |||
for node in nodes: | |||
if node is obj: | |||
return True | |||
return False | |||
return obj in nodes | |||
def _do_search(self, obj, recursive, context=None, literal=None): | |||
"""Return some info about the location of *obj* within *context*. | |||
If *recursive* is ``True``, we'll look within *context* (``self`` by | |||
default) and its descendants, otherwise just *context*. We raise | |||
:py:exc:`ValueError` if *obj* isn't found. The return data is a list of | |||
3-tuples (*type*, *context*, *data*) where *type* is *obj*\ 's best | |||
type resolution (either ``Node``, ``Wikicode``, or ``str``), *context* | |||
is the closest ``Wikicode`` encompassing it, and *data* is either a | |||
``Node``, a list of ``Node``\ s, or ``None`` depending on *type*. | |||
""" | |||
if not context: | |||
context = self | |||
literal = isinstance(obj, (Node, Wikicode)) | |||
obj = parse_anything(obj) | |||
if not obj or obj not in self: | |||
raise ValueError(obj) | |||
if len(obj.nodes) == 1: | |||
obj = obj.get(0) | |||
if not recursive: | |||
return self, mkslice(self.index(obj)) | |||
for i, node in enumerate(self.nodes): | |||
for context, child in self._get_children(node, contexts=True): | |||
if obj is child: | |||
if not context: | |||
context = self | |||
return context, mkslice(context.index(child)) | |||
else: | |||
context, ind = self._do_strong_search(obj.get(0), recursive) | |||
for i in range(1, len(obj.nodes)): | |||
if obj.get(i) is not context.get(ind.start + i): | |||
break | |||
else: | |||
return context, slice(ind.start, ind.start + len(obj.nodes)) | |||
raise ValueError(obj) | |||
compare = lambda a, b: (a is b) if literal else (a == b) | |||
results = [] | |||
i = 0 | |||
while i < len(context.nodes): | |||
node = context.get(i) | |||
if isinstance(obj, Node) and compare(obj, node): | |||
results.append((Node, context, node)) | |||
elif isinstance(obj, Wikicode) and compare(obj.get(0), node): | |||
for j in range(1, len(obj.nodes)): | |||
if not compare(obj.get(j), context.get(i + j)): | |||
break | |||
else: | |||
nodes = list(context.nodes[i:i + len(obj.nodes)]) | |||
results.append((Wikicode, context, nodes)) | |||
i += len(obj.nodes) - 1 | |||
elif recursive: | |||
contexts = node.__iternodes__(self._get_all_nodes) | |||
processed = [] | |||
for code in (ctx for ctx, child in contexts): | |||
if code and code not in processed and obj in code: | |||
search = self._do_search(obj, recursive, code, literal) | |||
results.extend(search) | |||
processed.append(code) | |||
i += 1 | |||
if not results and not literal and recursive: | |||
results.append((str, context, None)) | |||
if not results and context is self: | |||
def _do_weak_search(self, obj, recursive): | |||
"""Search for an element that looks like *obj* within the node list. | |||
This follows the same rules as :py:meth:`_do_strong_search` with some | |||
differences. *obj* is treated as a string that might represent any | |||
:py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two | |||
present in the node list. Thus, matching is weak (using string | |||
comparisons) rather than strong (using ``is``). Because multiple nodes | |||
can match *obj*, the result is a list of tuples instead of just one | |||
(however, :py:exc:`ValueError` is still raised if nothing is found). | |||
Individual matches will never overlap. | |||
The tuples contain a new first element, *exact*, which is ``True`` if | |||
we were able to match *obj* exactly to one or more adjacent nodes, or | |||
``False`` if we found *obj* inside a node or incompletely spanning | |||
multiple nodes. | |||
""" | |||
obj = parse_anything(obj) | |||
if not obj or obj not in self: | |||
raise ValueError(obj) | |||
results = [] | |||
contexts = [self] | |||
while contexts: | |||
context = contexts.pop() | |||
i = len(context.nodes) - 1 | |||
while i >= 0: | |||
node = context.get(i) | |||
if obj.get(-1) == node: | |||
for j in range(-len(obj.nodes), -1): | |||
if obj.get(j) != context.get(i + j + 1): | |||
break | |||
else: | |||
i -= len(obj.nodes) - 1 | |||
index = slice(i, i + len(obj.nodes)) | |||
results.append((True, context, index)) | |||
elif recursive and obj in node: | |||
contexts.extend(node.__children__()) | |||
i -= 1 | |||
if not results: | |||
if not recursive: | |||
raise ValueError(obj) | |||
results.append((False, self, slice(0, len(self.nodes)))) | |||
return results | |||
def _get_tree(self, code, lines, marker, indent): | |||
@@ -245,14 +280,14 @@ class Wikicode(StringMixIn): | |||
return the index of our direct descendant node within *our* list of | |||
nodes. Otherwise, the lookup is done only on direct descendants. | |||
""" | |||
if recursive: | |||
for i, node in enumerate(self.nodes): | |||
if self._contains(self._get_children(node), obj): | |||
return i | |||
raise ValueError(obj) | |||
strict = isinstance(obj, Node) | |||
equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n) | |||
for i, node in enumerate(self.nodes): | |||
if self._is_equivalent(obj, node): | |||
if recursive: | |||
for child in self._get_children(node): | |||
if equivalent(obj, child): | |||
return i | |||
elif equivalent(obj, node): | |||
return i | |||
raise ValueError(obj) | |||
@@ -268,66 +303,79 @@ class Wikicode(StringMixIn): | |||
self.nodes.insert(index, node) | |||
def insert_before(self, obj, value, recursive=True): | |||
"""Insert *value* immediately before *obj* in the list of nodes. | |||
"""Insert *value* immediately before *obj*. | |||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||
for example). *value* can be anything parasable by | |||
:py:func:`.parse_anything`. If *recursive* is ``True``, we will try to | |||
find *obj* within our child nodes even if it is not a direct descendant | |||
of this :py:class:`~.Wikicode` object. If *obj* is not found, | |||
for example). If *obj* is a string, we will operate on all instances | |||
of that string within the code, otherwise only on the specific instance | |||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||
If *recursive* is ``True``, we will try to find *obj* within our child | |||
nodes even if it is not a direct descendant of this | |||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||
:py:exc:`ValueError` is raised. | |||
""" | |||
for restype, context, data in self._do_search(obj, recursive): | |||
if restype in (Node, Wikicode): | |||
i = context.index(data if restype is Node else data[0], False) | |||
context.insert(i, value) | |||
else: | |||
obj = str(obj) | |||
context.nodes = str(context).replace(obj, str(value) + obj) | |||
if isinstance(obj, (Node, Wikicode)): | |||
context, index = self._do_strong_search(obj, recursive) | |||
context.insert(index.start, value) | |||
else: | |||
for exact, context, index in self._do_weak_search(obj, recursive): | |||
if exact: | |||
context.insert(index.start, value) | |||
else: | |||
obj = str(obj) | |||
self._slice_replace(context, index, obj, str(value) + obj) | |||
def insert_after(self, obj, value, recursive=True): | |||
"""Insert *value* immediately after *obj* in the list of nodes. | |||
"""Insert *value* immediately after *obj*. | |||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||
for example). *value* can be anything parasable by | |||
:py:func:`.parse_anything`. If *recursive* is ``True``, we will try to | |||
find *obj* within our child nodes even if it is not a direct descendant | |||
of this :py:class:`~.Wikicode` object. If *obj* is not found, | |||
for example). If *obj* is a string, we will operate on all instances | |||
of that string within the code, otherwise only on the specific instance | |||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||
If *recursive* is ``True``, we will try to find *obj* within our child | |||
nodes even if it is not a direct descendant of this | |||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||
:py:exc:`ValueError` is raised. | |||
""" | |||
for restype, context, data in self._do_search(obj, recursive): | |||
if restype in (Node, Wikicode): | |||
i = context.index(data if restype is Node else data[-1], False) | |||
context.insert(i + 1, value) | |||
else: | |||
obj = str(obj) | |||
context.nodes = str(context).replace(obj, obj + str(value)) | |||
if isinstance(obj, (Node, Wikicode)): | |||
context, index = self._do_strong_search(obj, recursive) | |||
context.insert(index.stop, value) | |||
else: | |||
for exact, context, index in self._do_weak_search(obj, recursive): | |||
if exact: | |||
context.insert(index.stop, value) | |||
else: | |||
obj = str(obj) | |||
self._slice_replace(context, index, obj, obj + str(value)) | |||
def replace(self, obj, value, recursive=True): | |||
"""Replace *obj* with *value* in the list of nodes. | |||
"""Replace *obj* with *value*. | |||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||
for example). *value* can be anything parasable by | |||
:py:func:`.parse_anything`. If *recursive* is ``True``, we will try to | |||
find *obj* within our child nodes even if it is not a direct descendant | |||
of this :py:class:`~.Wikicode` object. If *obj* is not found, | |||
for example). If *obj* is a string, we will operate on all instances | |||
of that string within the code, otherwise only on the specific instance | |||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||
If *recursive* is ``True``, we will try to find *obj* within our child | |||
nodes even if it is not a direct descendant of this | |||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||
:py:exc:`ValueError` is raised. | |||
""" | |||
for restype, context, data in self._do_search(obj, recursive): | |||
if restype is Node: | |||
i = context.index(data, False) | |||
context.nodes.pop(i) | |||
context.insert(i, value) | |||
elif restype is Wikicode: | |||
i = context.index(data[0], False) | |||
for _ in data: | |||
context.nodes.pop(i) | |||
context.insert(i, value) | |||
else: | |||
context.nodes = str(context).replace(str(obj), str(value)) | |||
if isinstance(obj, (Node, Wikicode)): | |||
context, index = self._do_strong_search(obj, recursive) | |||
for i in range(index.start, index.stop): | |||
context.nodes.pop(index.start) | |||
context.insert(index.start, value) | |||
else: | |||
for exact, context, index in self._do_weak_search(obj, recursive): | |||
if exact: | |||
for i in range(index.start, index.stop): | |||
context.nodes.pop(index.start) | |||
context.insert(index.start, value) | |||
else: | |||
self._slice_replace(context, index, str(obj), str(value)) | |||
def append(self, value): | |||
"""Insert *value* at the end of the list of nodes. | |||
@@ -341,55 +389,65 @@ class Wikicode(StringMixIn): | |||
def remove(self, obj, recursive=True): | |||
"""Remove *obj* from the list of nodes. | |||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||
for example). If *recursive* is ``True``, we will try to find *obj* | |||
within our child nodes even if it is not a direct descendant of this | |||
for example). If *obj* is a string, we will operate on all instances | |||
of that string within the code, otherwise only on the specific instance | |||
given. If *recursive* is ``True``, we will try to find *obj* within our | |||
child nodes even if it is not a direct descendant of this | |||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||
:py:exc:`ValueError` is raised. | |||
""" | |||
for restype, context, data in self._do_search(obj, recursive): | |||
if restype is Node: | |||
context.nodes.pop(context.index(data, False)) | |||
elif restype is Wikicode: | |||
i = context.index(data[0], False) | |||
for _ in data: | |||
context.nodes.pop(i) | |||
else: | |||
context.nodes = str(context).replace(str(obj), "") | |||
if isinstance(obj, (Node, Wikicode)): | |||
context, index = self._do_strong_search(obj, recursive) | |||
for i in range(index.start, index.stop): | |||
context.nodes.pop(index.start) | |||
else: | |||
for exact, context, index in self._do_weak_search(obj, recursive): | |||
if exact: | |||
for i in range(index.start, index.stop): | |||
context.nodes.pop(index.start) | |||
else: | |||
self._slice_replace(context, index, str(obj), "") | |||
def matches(self, other): | |||
"""Do a loose equivalency test suitable for comparing page names. | |||
*other* can be any string-like object, including | |||
:py:class:`~.Wikicode`. This operation is symmetric; both sides are | |||
adjusted. Specifically, whitespace and markup is stripped and the first | |||
letter's case is normalized. Typical usage is | |||
:py:class:`~.Wikicode`, or a tuple of these. This operation is | |||
symmetric; both sides are adjusted. Specifically, whitespace and markup | |||
is stripped and the first letter's case is normalized. Typical usage is | |||
``if template.name.matches("stub"): ...``. | |||
""" | |||
cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] | |||
if a and b else a == b) | |||
this = self.strip_code().strip() | |||
if isinstance(other, (tuple, list)): | |||
for obj in other: | |||
that = parse_anything(obj).strip_code().strip() | |||
if cmp(this, that): | |||
return True | |||
return False | |||
that = parse_anything(other).strip_code().strip() | |||
if not this or not that: | |||
return this == that | |||
return this[0].upper() + this[1:] == that[0].upper() + that[1:] | |||
return cmp(this, that) | |||
def ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||
forcetype=None): | |||
"""Iterate over nodes in our list matching certain conditions. | |||
If *recursive* is ``True``, we will iterate over our children and all | |||
descendants of our children, otherwise just our immediate children. If | |||
*matches* is given, we will only yield the nodes that match the given | |||
regular expression (with :py:func:`re.search`). The default flags used | |||
are :py:const:`re.IGNORECASE`, :py:const:`re.DOTALL`, and | |||
:py:const:`re.UNICODE`, but custom flags can be specified by passing | |||
*flags*. If *forcetype* is given, only nodes that are instances of this | |||
type are yielded. | |||
of their descendants, otherwise just our immediate children. If | |||
*forcetype* is given, only nodes that are instances of this type are | |||
yielded. *matches* can be used to further restrict the nodes, either as | |||
a function (taking a single :py:class:`.Node` and returning a boolean) | |||
or a regular expression (matched against the node's string | |||
representation with :py:func:`re.search`). If *matches* is a regex, the | |||
flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`, | |||
:py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can | |||
be specified by passing *flags*. | |||
""" | |||
for node in (self._get_all_nodes(self) if recursive else self.nodes): | |||
if not forcetype or isinstance(node, forcetype): | |||
if not matches or re.search(matches, str(node), flags): | |||
yield node | |||
return (node for i, node in | |||
self._indexed_ifilter(recursive, matches, flags, forcetype)) | |||
def filter(self, recursive=True, matches=None, flags=FLAGS, | |||
forcetype=None): | |||
@@ -399,7 +457,7 @@ class Wikicode(StringMixIn): | |||
""" | |||
return list(self.ifilter(recursive, matches, flags, forcetype)) | |||
def get_sections(self, levels=None, matches=None, flags=FLAGS, | |||
def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, | |||
include_lead=None, include_headings=True): | |||
"""Return a list of sections within the page. | |||
@@ -407,13 +465,13 @@ class Wikicode(StringMixIn): | |||
node list (implemented using :py:class:`~.SmartList`) so that changes | |||
to sections are reflected in the parent Wikicode object. | |||
Each section contains all of its subsections. If *levels* is given, it | |||
should be a iterable of integers; only sections whose heading levels | |||
are within it will be returned. If *matches* is given, it should be a | |||
regex to be matched against the titles of section headings; only | |||
sections whose headings match the regex will be included. *flags* can | |||
be used to override the default regex flags (see :py:meth:`ifilter`) if | |||
*matches* is used. | |||
Each section contains all of its subsections, unless *flat* is | |||
``True``. If *levels* is given, it should be a iterable of integers; | |||
only sections whose heading levels are within it will be returned. If | |||
*matches* is given, it should be either a function or a regex; only | |||
sections whose headings match it (without the surrounding equal signs) | |||
will be included. *flags* can be used to override the default regex | |||
flags (see :py:meth:`ifilter`) if a regex *matches* is used. | |||
If *include_lead* is ``True``, the first, lead section (without a | |||
heading) will be included in the list; ``False`` will not include it; | |||
@@ -422,47 +480,58 @@ class Wikicode(StringMixIn): | |||
:py:class:`~.Heading` object will be included; otherwise, this is | |||
skipped. | |||
""" | |||
if matches: | |||
matches = r"^(=+?)\s*" + matches + r"\s*\1$" | |||
headings = self.filter_headings() | |||
filtered = self.filter_headings(matches=matches, flags=flags) | |||
if levels: | |||
filtered = [head for head in filtered if head.level in levels] | |||
if matches or include_lead is False or (not include_lead and levels): | |||
buffers = [] | |||
else: | |||
buffers = [(maxsize, 0)] | |||
sections = [] | |||
i = 0 | |||
while i < len(self.nodes): | |||
if self.nodes[i] in headings: | |||
this = self.nodes[i].level | |||
for (level, start) in buffers: | |||
if this <= level: | |||
sections.append(Wikicode(self.nodes[start:i])) | |||
buffers = [buf for buf in buffers if buf[0] < this] | |||
if self.nodes[i] in filtered: | |||
if not include_headings: | |||
i += 1 | |||
if i >= len(self.nodes): | |||
break | |||
buffers.append((this, i)) | |||
i += 1 | |||
for (level, start) in buffers: | |||
if start != i: | |||
sections.append(Wikicode(self.nodes[start:i])) | |||
return sections | |||
title_matcher = self._build_matcher(matches, flags) | |||
matcher = lambda heading: (title_matcher(heading.title) and | |||
(not levels or heading.level in levels)) | |||
iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) | |||
sections = [] # Tuples of (index_of_first_node, section) | |||
open_headings = [] # Tuples of (index, heading), where index and | |||
# heading.level are both monotonically increasing | |||
# Add the lead section if appropriate: | |||
if include_lead or not (include_lead is not None or matches or levels): | |||
itr = self._indexed_ifilter(recursive=False, forcetype=Heading) | |||
try: | |||
first = next(itr)[0] | |||
sections.append((0, Wikicode(self.nodes[:first]))) | |||
except StopIteration: # No headings in page | |||
sections.append((0, Wikicode(self.nodes[:]))) | |||
# Iterate over headings, adding sections to the list as they end: | |||
for i, heading in iheadings: | |||
if flat: # With flat, all sections close at the next heading | |||
newly_closed, open_headings = open_headings, [] | |||
else: # Otherwise, figure out which sections have closed, if any | |||
closed_start_index = len(open_headings) | |||
for j, (start, last_heading) in enumerate(open_headings): | |||
if heading.level <= last_heading.level: | |||
closed_start_index = j | |||
break | |||
newly_closed = open_headings[closed_start_index:] | |||
del open_headings[closed_start_index:] | |||
for start, closed_heading in newly_closed: | |||
if matcher(closed_heading): | |||
sections.append((start, Wikicode(self.nodes[start:i]))) | |||
start = i if include_headings else (i + 1) | |||
open_headings.append((start, heading)) | |||
# Add any remaining open headings to the list of sections: | |||
for start, heading in open_headings: | |||
if matcher(heading): | |||
sections.append((start, Wikicode(self.nodes[start:]))) | |||
# Ensure that earlier sections are earlier in the returned list: | |||
return [section for i, section in sorted(sections)] | |||
def strip_code(self, normalize=True, collapse=True): | |||
"""Return a rendered string without unprintable code such as templates. | |||
The way a node is stripped is handled by the | |||
:py:meth:`~.Node.__showtree__` method of :py:class:`~.Node` objects, | |||
which generally return a subset of their nodes or ``None``. For | |||
example, templates and tags are removed completely, links are stripped | |||
to just their display part, headings are stripped to just their title. | |||
If *normalize* is ``True``, various things may be done to strip code | |||
:py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which | |||
generally return a subset of their nodes or ``None``. For example, | |||
templates and tags are removed completely, links are stripped to just | |||
their display part, headings are stripped to just their title. If | |||
*normalize* is ``True``, various things may be done to strip code | |||
further, such as converting HTML entities like ``Σ``, ``Σ``, | |||
and ``Σ`` to ``Σ``. If *collapse* is ``True``, we will try to | |||
remove excess whitespace as well (three or more newlines are converted | |||
@@ -1,7 +1,7 @@ | |||
#! /usr/bin/env python | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,10 +21,16 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import sys | |||
if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ | |||
(sys.version_info[1] == 3 and sys.version_info[1] < 2): | |||
raise Exception('mwparserfromhell needs Python 2.6+ or 3.2+') | |||
from setuptools import setup, find_packages, Extension | |||
from mwparserfromhell import __version__ | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.compat import py26, py3k | |||
with open("README.rst") as fp: | |||
long_docs = fp.read() | |||
@@ -36,10 +42,11 @@ setup( | |||
name = "mwparserfromhell", | |||
packages = find_packages(exclude=("tests",)), | |||
ext_modules = [tokenizer], | |||
test_suite = "tests", | |||
tests_require = ["unittest2"] if py26 else [], | |||
test_suite = "tests.discover", | |||
version = __version__, | |||
author = "Ben Kurtovic", | |||
author_email = "ben.kurtovic@verizon.net", | |||
author_email = "ben.kurtovic@gmail.com", | |||
url = "https://github.com/earwig/mwparserfromhell", | |||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||
long_description = long_docs, | |||
@@ -52,10 +59,12 @@ setup( | |||
"Intended Audience :: Developers", | |||
"License :: OSI Approved :: MIT License", | |||
"Operating System :: OS Independent", | |||
"Programming Language :: Python :: 2.6", | |||
"Programming Language :: Python :: 2.7", | |||
"Programming Language :: Python :: 3", | |||
"Programming Language :: Python :: 3.2", | |||
"Programming Language :: Python :: 3.3", | |||
"Programming Language :: Python :: 3.4", | |||
"Topic :: Text Processing :: Markup" | |||
], | |||
) |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,8 +21,13 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from unittest import TestCase | |||
try: | |||
from unittest2 import TestCase | |||
except ImportError: | |||
from unittest import TestCase | |||
from mwparserfromhell.compat import range | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||
@@ -32,15 +37,6 @@ from mwparserfromhell.wikicode import Wikicode | |||
wrap = lambda L: Wikicode(SmartList(L)) | |||
wraptext = lambda *args: wrap([Text(t) for t in args]) | |||
def getnodes(code): | |||
"""Iterate over all child nodes of a given parent node. | |||
Imitates Wikicode._get_all_nodes(). | |||
""" | |||
for node in code.nodes: | |||
for context, child in node.__iternodes__(getnodes): | |||
yield child | |||
class TreeEqualityTestCase(TestCase): | |||
"""A base test case with support for comparing the equality of node trees. | |||
@@ -106,7 +102,7 @@ class TreeEqualityTestCase(TestCase): | |||
self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | |||
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | |||
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | |||
self.assertIs(expected.wiki_markup, actual.wiki_markup) | |||
self.assertEqual(expected.wiki_markup, actual.wiki_markup) | |||
self.assertIs(expected.self_closing, actual.self_closing) | |||
self.assertIs(expected.invalid, actual.invalid) | |||
self.assertIs(expected.implicit, actual.implicit) | |||
@@ -9,12 +9,10 @@ the main library. | |||
from mwparserfromhell.compat import py3k | |||
if py3k: | |||
range = range | |||
from io import StringIO | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
else: | |||
range = xrange | |||
from StringIO import StringIO | |||
from urllib import urlencode, urlopen |
@@ -0,0 +1,24 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Discover tests using ``unittest2` for Python 2.6. | |||
It appears the default distutils test suite doesn't play nice with | |||
``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load | |||
tests seems to work around that issue. | |||
http://stackoverflow.com/a/17004409/753501 | |||
""" | |||
import os.path | |||
from mwparserfromhell.compat import py26 | |||
if py26: | |||
import unittest2 as unittest | |||
else: | |||
import unittest | |||
def additional_tests(): | |||
project_root = os.path.split(os.path.dirname(__file__))[0] | |||
return unittest.defaultTestLoader.discover(project_root) |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,16 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Argument, Text | |||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
class TestArgument(TreeEqualityTestCase): | |||
"""Test cases for the Argument node.""" | |||
@@ -38,20 +42,15 @@ class TestArgument(TreeEqualityTestCase): | |||
node2 = Argument(wraptext("foo"), wraptext("bar")) | |||
self.assertEqual("{{{foo|bar}}}", str(node2)) | |||
def test_iternodes(self): | |||
"""test Argument.__iternodes__()""" | |||
node1n1 = Text("foobar") | |||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") | |||
node1 = Argument(wrap([node1n1])) | |||
node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3])) | |||
gen1 = node1.__iternodes__(getnodes) | |||
gen2 = node2.__iternodes__(getnodes) | |||
self.assertEqual((None, node1), next(gen1)) | |||
self.assertEqual((None, node2), next(gen2)) | |||
self.assertEqual((node1.name, node1n1), next(gen1)) | |||
self.assertEqual((node2.name, node2n1), next(gen2)) | |||
self.assertEqual((node2.default, node2n2), next(gen2)) | |||
self.assertEqual((node2.default, node2n3), next(gen2)) | |||
def test_children(self): | |||
"""test Argument.__children__()""" | |||
node1 = Argument(wraptext("foobar")) | |||
node2 = Argument(wraptext("foo"), wrap([Text("bar"), Text("baz")])) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
self.assertIs(node1.name, next(gen1)) | |||
self.assertIs(node2.name, next(gen2)) | |||
self.assertIs(node2.default, next(gen2)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertRaises(StopIteration, next, gen2) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Template | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | |||
HTMLEntity, Tag, Template, Text, Wikilink) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Comment | |||
@@ -36,11 +40,10 @@ class TestComment(TreeEqualityTestCase): | |||
node = Comment("foobar") | |||
self.assertEqual("<!--foobar-->", str(node)) | |||
def test_iternodes(self): | |||
"""test Comment.__iternodes__()""" | |||
def test_children(self): | |||
"""test Comment.__children__()""" | |||
node = Comment("foobar") | |||
gen = node.__iternodes__(None) | |||
self.assertEqual((None, node), next(gen)) | |||
gen = node.__children__() | |||
self.assertRaises(StopIteration, next, gen) | |||
def test_strip(self): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
try: | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -22,7 +22,11 @@ | |||
from __future__ import print_function, unicode_literals | |||
import json | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import mwparserfromhell | |||
from mwparserfromhell.compat import py3k, str | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,16 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import ExternalLink, Text | |||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
class TestExternalLink(TreeEqualityTestCase): | |||
"""Test cases for the ExternalLink node.""" | |||
@@ -43,21 +47,16 @@ class TestExternalLink(TreeEqualityTestCase): | |||
wraptext("Example Web Page")) | |||
self.assertEqual("[http://example.com/ Example Web Page]", str(node4)) | |||
def test_iternodes(self): | |||
"""test ExternalLink.__iternodes__()""" | |||
node1n1 = Text("http://example.com/") | |||
node2n1 = Text("http://example.com/") | |||
node2n2, node2n3 = Text("Example"), Text("Page") | |||
node1 = ExternalLink(wrap([node1n1]), brackets=False) | |||
node2 = ExternalLink(wrap([node2n1]), wrap([node2n2, node2n3])) | |||
gen1 = node1.__iternodes__(getnodes) | |||
gen2 = node2.__iternodes__(getnodes) | |||
self.assertEqual((None, node1), next(gen1)) | |||
self.assertEqual((None, node2), next(gen2)) | |||
self.assertEqual((node1.url, node1n1), next(gen1)) | |||
self.assertEqual((node2.url, node2n1), next(gen2)) | |||
self.assertEqual((node2.title, node2n2), next(gen2)) | |||
self.assertEqual((node2.title, node2n3), next(gen2)) | |||
def test_children(self): | |||
"""test ExternalLink.__children__()""" | |||
node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | |||
node2 = ExternalLink(wraptext("http://example.com/"), | |||
wrap([Text("Example"), Text("Page")])) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
self.assertEqual(node1.url, next(gen1)) | |||
self.assertEqual(node2.url, next(gen2)) | |||
self.assertEqual(node2.title, next(gen2)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertRaises(StopIteration, next, gen2) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,16 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Heading, Text | |||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
class TestHeading(TreeEqualityTestCase): | |||
"""Test cases for the Heading node.""" | |||
@@ -38,14 +42,11 @@ class TestHeading(TreeEqualityTestCase): | |||
node2 = Heading(wraptext(" zzz "), 5) | |||
self.assertEqual("===== zzz =====", str(node2)) | |||
def test_iternodes(self): | |||
"""test Heading.__iternodes__()""" | |||
text1, text2 = Text("foo"), Text("bar") | |||
node = Heading(wrap([text1, text2]), 3) | |||
gen = node.__iternodes__(getnodes) | |||
self.assertEqual((None, node), next(gen)) | |||
self.assertEqual((node.title, text1), next(gen)) | |||
self.assertEqual((node.title, text2), next(gen)) | |||
def test_children(self): | |||
"""test Heading.__children__()""" | |||
node = Heading(wrap([Text("foo"), Text("bar")]), 3) | |||
gen = node.__children__() | |||
self.assertEqual(node.title, next(gen)) | |||
self.assertRaises(StopIteration, next, gen) | |||
def test_strip(self): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import HTMLEntity | |||
@@ -42,11 +46,10 @@ class TestHTMLEntity(TreeEqualityTestCase): | |||
self.assertEqual("k", str(node3)) | |||
self.assertEqual("l", str(node4)) | |||
def test_iternodes(self): | |||
"""test HTMLEntity.__iternodes__()""" | |||
def test_children(self): | |||
"""test HTMLEntity.__children__()""" | |||
node = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||
gen = node.__iternodes__(None) | |||
self.assertEqual((None, node), next(gen)) | |||
gen = node.__children__() | |||
self.assertRaises(StopIteration, next, gen) | |||
def test_strip(self): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,24 +21,30 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell import parser | |||
from mwparserfromhell.nodes import Template, Text, Wikilink | |||
from mwparserfromhell.compat import range | |||
from mwparserfromhell.nodes import Tag, Template, Text, Wikilink | |||
from mwparserfromhell.nodes.extras import Parameter | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
from .compat import range | |||
class TestParser(TreeEqualityTestCase): | |||
"""Tests for the Parser class itself, which tokenizes and builds nodes.""" | |||
def test_use_c(self): | |||
"""make sure the correct tokenizer is used""" | |||
restore = parser.use_c | |||
if parser.use_c: | |||
self.assertTrue(parser.Parser()._tokenizer.USES_C) | |||
parser.use_c = False | |||
self.assertFalse(parser.Parser()._tokenizer.USES_C) | |||
parser.use_c = restore | |||
def test_parsing(self): | |||
"""integration test for parsing overall""" | |||
@@ -62,5 +68,26 @@ class TestParser(TreeEqualityTestCase): | |||
actual = parser.Parser().parse(text) | |||
self.assertWikicodeEqual(expected, actual) | |||
def test_skip_style_tags(self): | |||
"""test Parser.parse(skip_style_tags=True)""" | |||
def test(): | |||
with_style = parser.Parser().parse(text, skip_style_tags=False) | |||
without_style = parser.Parser().parse(text, skip_style_tags=True) | |||
self.assertWikicodeEqual(a, with_style) | |||
self.assertWikicodeEqual(b, without_style) | |||
text = "This is an example with ''italics''!" | |||
a = wrap([Text("This is an example with "), | |||
Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), | |||
Text("!")]) | |||
b = wraptext("This is an example with ''italics''!") | |||
restore = parser.use_c | |||
if parser.use_c: | |||
test() | |||
parser.use_c = False | |||
test() | |||
parser.use_c = restore | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.parser.tokenizer import Tokenizer | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,14 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from .compat import range | |||
from mwparserfromhell.compat import py3k, range | |||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||
class TestSmartList(unittest.TestCase): | |||
"""Test cases for the SmartList class and its child, _ListProxy.""" | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -23,12 +23,14 @@ | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from types import GeneratorType | |||
import unittest | |||
from mwparserfromhell.compat import bytes, py3k, py32, str | |||
from mwparserfromhell.string_mixin import StringMixIn | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from .compat import range | |||
from mwparserfromhell.compat import bytes, py3k, py32, range, str | |||
from mwparserfromhell.string_mixin import StringMixIn | |||
class _FakeString(StringMixIn): | |||
def __init__(self, data): | |||
@@ -59,8 +61,8 @@ class TestStringMixIn(unittest.TestCase): | |||
else: | |||
methods.append("decode") | |||
for meth in methods: | |||
expected = getattr(str, meth).__doc__ | |||
actual = getattr(StringMixIn, meth).__doc__ | |||
expected = getattr("foo", meth).__doc__ | |||
actual = getattr(_FakeString("foo"), meth).__doc__ | |||
self.assertEqual(expected, actual) | |||
def test_types(self): | |||
@@ -109,12 +111,12 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertFalse(str1 < str4) | |||
self.assertTrue(str1 <= str4) | |||
self.assertTrue(str1 > str5) | |||
self.assertTrue(str1 >= str5) | |||
self.assertFalse(str1 == str5) | |||
self.assertTrue(str1 != str5) | |||
self.assertFalse(str1 < str5) | |||
self.assertFalse(str1 <= str5) | |||
self.assertFalse(str5 > str1) | |||
self.assertFalse(str5 >= str1) | |||
self.assertFalse(str5 == str1) | |||
self.assertTrue(str5 != str1) | |||
self.assertTrue(str5 < str1) | |||
self.assertTrue(str5 <= str1) | |||
def test_other_magics(self): | |||
"""test other magically implemented features, like len() and iter()""" | |||
@@ -376,7 +378,7 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(actual, str25.rsplit(None, 3)) | |||
actual = [" this is a sentence with", "", "whitespace", ""] | |||
self.assertEqual(actual, str25.rsplit(" ", 3)) | |||
if py3k: | |||
if py3k and not py32: | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||
@@ -394,7 +396,7 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(actual, str25.split(None, 3)) | |||
actual = ["", "", "", "this is a sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(" ", 3)) | |||
if py3k: | |||
if py3k and not py32: | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,16 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Tag, Template, Text | |||
from mwparserfromhell.nodes.extras import Attribute | |||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) | |||
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) | |||
@@ -64,37 +68,30 @@ class TestTag(TreeEqualityTestCase): | |||
self.assertEqual("----", str(node8)) | |||
self.assertEqual("''italics!''", str(node9)) | |||
def test_iternodes(self): | |||
"""test Tag.__iternodes__()""" | |||
node1n1, node1n2 = Text("ref"), Text("foobar") | |||
node2n1, node3n1, node3n2 = Text("bold text"), Text("img"), Text("id") | |||
node3n3, node3n4, node3n5 = Text("foo"), Text("class"), Text("bar") | |||
def test_children(self): | |||
"""test Tag.__children__()""" | |||
# <ref>foobar</ref> | |||
node1 = Tag(wrap([node1n1]), wrap([node1n2])) | |||
node1 = Tag(wraptext("ref"), wraptext("foobar")) | |||
# '''bold text''' | |||
node2 = Tag(wraptext("b"), wrap([node2n1]), wiki_markup="'''") | |||
node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | |||
# <img id="foo" class="bar" /> | |||
node3 = Tag(wrap([node3n1]), | |||
attrs=[Attribute(wrap([node3n2]), wrap([node3n3])), | |||
Attribute(wrap([node3n4]), wrap([node3n5]))], | |||
node3 = Tag(wraptext("img"), | |||
attrs=[Attribute(wraptext("id"), wraptext("foo")), | |||
Attribute(wraptext("class"), wraptext("bar"))], | |||
self_closing=True, padding=" ") | |||
gen1 = node1.__iternodes__(getnodes) | |||
gen2 = node2.__iternodes__(getnodes) | |||
gen3 = node3.__iternodes__(getnodes) | |||
self.assertEqual((None, node1), next(gen1)) | |||
self.assertEqual((None, node2), next(gen2)) | |||
self.assertEqual((None, node3), next(gen3)) | |||
self.assertEqual((node1.tag, node1n1), next(gen1)) | |||
self.assertEqual((node3.tag, node3n1), next(gen3)) | |||
self.assertEqual((node3.attributes[0].name, node3n2), next(gen3)) | |||
self.assertEqual((node3.attributes[0].value, node3n3), next(gen3)) | |||
self.assertEqual((node3.attributes[1].name, node3n4), next(gen3)) | |||
self.assertEqual((node3.attributes[1].value, node3n5), next(gen3)) | |||
self.assertEqual((node1.contents, node1n2), next(gen1)) | |||
self.assertEqual((node2.contents, node2n1), next(gen2)) | |||
self.assertEqual((node1.closing_tag, node1n1), next(gen1)) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
gen3 = node3.__children__() | |||
self.assertEqual(node1.tag, next(gen1)) | |||
self.assertEqual(node3.tag, next(gen3)) | |||
self.assertEqual(node3.attributes[0].name, next(gen3)) | |||
self.assertEqual(node3.attributes[0].value, next(gen3)) | |||
self.assertEqual(node3.attributes[1].name, next(gen3)) | |||
self.assertEqual(node3.attributes[1].value, next(gen3)) | |||
self.assertEqual(node1.contents, next(gen1)) | |||
self.assertEqual(node2.contents, next(gen2)) | |||
self.assertEqual(node1.closing_tag, next(gen1)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertRaises(StopIteration, next, gen2) | |||
self.assertRaises(StopIteration, next, gen3) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,16 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import HTMLEntity, Template, Text | |||
from mwparserfromhell.nodes.extras import Parameter | |||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | |||
pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | |||
@@ -42,27 +46,21 @@ class TestTemplate(TreeEqualityTestCase): | |||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||
self.assertEqual("{{foo|bar|abc=def}}", str(node2)) | |||
def test_iternodes(self): | |||
"""test Template.__iternodes__()""" | |||
node1n1 = Text("foobar") | |||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") | |||
node2n4, node2n5 = Text("def"), Text("ghi") | |||
node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) | |||
node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), | |||
def test_children(self): | |||
"""test Template.__children__()""" | |||
node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) | |||
node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), | |||
showkey=True) | |||
node1 = Template(wrap([node1n1])) | |||
node2 = Template(wrap([node2n1]), [node2p1, node2p2]) | |||
node1 = Template(wraptext("foobar")) | |||
node2 = Template(wraptext("foo"), [node2p1, node2p2]) | |||
gen1 = node1.__iternodes__(getnodes) | |||
gen2 = node2.__iternodes__(getnodes) | |||
self.assertEqual((None, node1), next(gen1)) | |||
self.assertEqual((None, node2), next(gen2)) | |||
self.assertEqual((node1.name, node1n1), next(gen1)) | |||
self.assertEqual((node2.name, node2n1), next(gen2)) | |||
self.assertEqual((node2.params[0].value, node2n2), next(gen2)) | |||
self.assertEqual((node2.params[1].name, node2n3), next(gen2)) | |||
self.assertEqual((node2.params[1].value, node2n4), next(gen2)) | |||
self.assertEqual((node2.params[1].value, node2n5), next(gen2)) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
self.assertEqual(node1.name, next(gen1)) | |||
self.assertEqual(node2.name, next(gen2)) | |||
self.assertEqual(node2.params[0].value, next(gen2)) | |||
self.assertEqual(node2.params[1].name, next(gen2)) | |||
self.assertEqual(node2.params[1].value, next(gen2)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertRaises(StopIteration, next, gen2) | |||
@@ -123,15 +121,15 @@ class TestTemplate(TreeEqualityTestCase): | |||
node3 = Template(wraptext("foo"), | |||
[pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) | |||
node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | |||
self.assertFalse(node1.has("foobar")) | |||
self.assertTrue(node2.has(1)) | |||
self.assertTrue(node2.has("abc")) | |||
self.assertFalse(node2.has("def")) | |||
self.assertTrue(node3.has("1")) | |||
self.assertTrue(node3.has(" b ")) | |||
self.assertFalse(node4.has("b")) | |||
self.assertTrue(node3.has("b", False)) | |||
self.assertFalse(node1.has("foobar", False)) | |||
self.assertTrue(node2.has(1, False)) | |||
self.assertTrue(node2.has("abc", False)) | |||
self.assertFalse(node2.has("def", False)) | |||
self.assertTrue(node3.has("1", False)) | |||
self.assertTrue(node3.has(" b ", False)) | |||
self.assertTrue(node4.has("b", False)) | |||
self.assertTrue(node3.has("b", True)) | |||
self.assertFalse(node4.has("b", True)) | |||
def test_get(self): | |||
"""test Template.get()""" | |||
@@ -223,6 +221,7 @@ class TestTemplate(TreeEqualityTestCase): | |||
pgenh("1", "c"), pgenh("2", "d")]) | |||
node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||
pgens("f", "g")]) | |||
node41 = Template(wraptext("a"), [pgenh("1", "")]) | |||
node1.add("e", "f", showkey=True) | |||
node2.add(2, "g", showkey=False) | |||
@@ -266,6 +265,7 @@ class TestTemplate(TreeEqualityTestCase): | |||
node38.add("1", "e") | |||
node39.add("1", "e") | |||
node40.add("d", "h", before="b") | |||
node41.add(1, "b") | |||
self.assertEqual("{{a|b=c|d|e=f}}", node1) | |||
self.assertEqual("{{a|b=c|d|g}}", node2) | |||
@@ -312,6 +312,7 @@ class TestTemplate(TreeEqualityTestCase): | |||
self.assertEqual("{{a|1=e|x=y|2=d}}", node38) | |||
self.assertEqual("{{a|x=y|e|d}}", node39) | |||
self.assertEqual("{{a|b=c|d=h|f=g}}", node40) | |||
self.assertEqual("{{a|b}}", node41) | |||
def test_remove(self): | |||
"""test Template.remove()""" | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text | |||
@@ -36,11 +40,10 @@ class TestText(unittest.TestCase): | |||
node2 = Text("fóóbar") | |||
self.assertEqual("fóóbar", str(node2)) | |||
def test_iternodes(self): | |||
"""test Text.__iternodes__()""" | |||
def test_children(self): | |||
"""test Text.__children__()""" | |||
node = Text("foobar") | |||
gen = node.__iternodes__(None) | |||
self.assertEqual((None, node), next(gen)) | |||
gen = node.__children__() | |||
self.assertRaises(StopIteration, next, gen) | |||
def test_strip(self): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.parser import tokens | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,7 +21,11 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.nodes import Template, Text | |||
from mwparserfromhell.utils import parse_anything | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -24,14 +24,18 @@ from __future__ import unicode_literals | |||
from functools import partial | |||
import re | |||
from types import GeneratorType | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import py3k, str | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
Node, Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.wikicode import Wikicode | |||
from mwparserfromhell import parse | |||
from mwparserfromhell.compat import py3k, str | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
@@ -242,6 +246,7 @@ class TestWikicode(TreeEqualityTestCase): | |||
"""test Wikicode.matches()""" | |||
code1 = parse("Cleanup") | |||
code2 = parse("\nstub<!-- TODO: make more specific -->") | |||
code3 = parse("") | |||
self.assertTrue(code1.matches("Cleanup")) | |||
self.assertTrue(code1.matches("cleanup")) | |||
self.assertTrue(code1.matches(" cleanup\n")) | |||
@@ -250,13 +255,22 @@ class TestWikicode(TreeEqualityTestCase): | |||
self.assertTrue(code2.matches("stub")) | |||
self.assertTrue(code2.matches("Stub<!-- no, it's fine! -->")) | |||
self.assertFalse(code2.matches("StuB")) | |||
self.assertTrue(code1.matches(("cleanup", "stub"))) | |||
self.assertTrue(code2.matches(("cleanup", "stub"))) | |||
self.assertFalse(code2.matches(("StuB", "sTUb", "foobar"))) | |||
self.assertFalse(code2.matches(["StuB", "sTUb", "foobar"])) | |||
self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub"))) | |||
self.assertTrue(code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"])) | |||
self.assertTrue(code3.matches("")) | |||
self.assertTrue(code3.matches("<!-- nothing -->")) | |||
self.assertTrue(code3.matches(("a", "b", ""))) | |||
def test_filter_family(self): | |||
"""test the Wikicode.i?filter() family of functions""" | |||
def genlist(gen): | |||
self.assertIsInstance(gen, GeneratorType) | |||
return list(gen) | |||
ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) | |||
ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) | |||
code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | |||
for func in (code.filter, ifilter(code)): | |||
@@ -292,21 +306,27 @@ class TestWikicode(TreeEqualityTestCase): | |||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | |||
func(recursive=True, forcetype=Template)) | |||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") | |||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") | |||
for func in (code3.filter, ifilter(code3)): | |||
self.assertEqual(["{{foobar}}", "{{FOO}}"], func(recursive=False, matches=r"foo")) | |||
self.assertEqual(["{{foobar}}", "{{barfoo}}"], | |||
func(False, matches=lambda node: "foo" in node)) | |||
self.assertEqual(["{{foobar}}", "{{FOO}}", "{{barfoo}}"], | |||
func(False, matches=r"foo")) | |||
self.assertEqual(["{{foobar}}", "{{FOO}}"], | |||
func(recursive=False, matches=r"^{{foo.*?}}")) | |||
func(matches=r"^{{foo.*?}}")) | |||
self.assertEqual(["{{foobar}}"], | |||
func(recursive=False, matches=r"^{{foo.*?}}", flags=re.UNICODE)) | |||
self.assertEqual(["{{baz}}", "{{bz}}"], func(recursive=False, matches=r"^{{b.*?z")) | |||
self.assertEqual(["{{baz}}"], func(recursive=False, matches=r"^{{b.+?z}}")) | |||
func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) | |||
self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) | |||
self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) | |||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], | |||
code2.filter_templates(recursive=False)) | |||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | |||
code2.filter_templates(recursive=True)) | |||
self.assertEqual(["{{foobar}}"], code3.filter_templates( | |||
matches=lambda node: node.name.matches("Foobar"))) | |||
self.assertEqual(["{{baz}}", "{{bz}}"], | |||
code3.filter_templates(matches=r"^{{b.*?z")) | |||
self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) | |||
@@ -335,35 +355,43 @@ class TestWikicode(TreeEqualityTestCase): | |||
p4_III = "== Section III ==\n" + p4_IIIA | |||
page4 = parse(p4_lead + p4_I + p4_II + p4_III) | |||
self.assertEqual([], page1.get_sections()) | |||
self.assertEqual([""], page1.get_sections()) | |||
self.assertEqual(["", "==Heading=="], page2.get_sections()) | |||
self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||
"====Gnidaeh====\n"], page3.get_sections()) | |||
self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, | |||
p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], | |||
self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, | |||
p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], | |||
page4.get_sections()) | |||
self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) | |||
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], | |||
page3.get_sections(levels=(2, 3))) | |||
self.assertEqual(["===Heading===\nFoo bar baz\n"], | |||
page3.get_sections(levels=(2, 3), flat=True)) | |||
self.assertEqual([], page3.get_sections(levels=[0])) | |||
self.assertEqual(["", "====Gnidaeh====\n"], | |||
page3.get_sections(levels=[4], include_lead=True)) | |||
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||
"====Gnidaeh====\n"], | |||
page3.get_sections(include_lead=False)) | |||
self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], | |||
page3.get_sections(flat=True, include_lead=False)) | |||
self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) | |||
self.assertEqual([""], page2.get_sections(include_headings=False)) | |||
self.assertEqual([p4_IA, p4_IB, p4_IIIA], page4.get_sections(levels=[3])) | |||
self.assertEqual([p4_IA, "=== Section I.B ===\n", | |||
"=== Section III.A ===\nText.\n"], | |||
page4.get_sections(levels=[3], flat=True)) | |||
self.assertEqual(["", ""], page2.get_sections(include_headings=False)) | |||
self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", | |||
"\nEven more text.\n" + p4_IIIA2ai1], | |||
page4.get_sections(levels=[4], | |||
include_headings=False)) | |||
self.assertEqual([], page4.get_sections(matches=r"body")) | |||
self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], | |||
self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1], | |||
page4.get_sections(matches=r"Section\sI[.\s].*?")) | |||
self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], | |||
self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], | |||
page4.get_sections(matches=r".*?a.*?")) | |||
self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], | |||
page4.get_sections(matches=r".*?a.*?", flags=re.U)) | |||
@@ -371,6 +399,11 @@ class TestWikicode(TreeEqualityTestCase): | |||
page4.get_sections(matches=r".*?a.*?", flags=re.U, | |||
include_headings=False)) | |||
sections = page2.get_sections(include_headings=False) | |||
sections[0].append("Lead!\n") | |||
sections[1].append("\nFirst section!") | |||
self.assertEqual("Lead!\n==Heading==\nFirst section!", page2) | |||
page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") | |||
section = page5.get_sections(matches="Foo")[0] | |||
section.replace("\nBar\n", "\nBarf ") | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -21,12 +21,16 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text, Wikilink | |||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
class TestWikilink(TreeEqualityTestCase): | |||
"""Test cases for the Wikilink node.""" | |||
@@ -38,20 +42,15 @@ class TestWikilink(TreeEqualityTestCase): | |||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | |||
self.assertEqual("[[foo|bar]]", str(node2)) | |||
def test_iternodes(self): | |||
"""test Wikilink.__iternodes__()""" | |||
node1n1 = Text("foobar") | |||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") | |||
node1 = Wikilink(wrap([node1n1])) | |||
node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) | |||
gen1 = node1.__iternodes__(getnodes) | |||
gen2 = node2.__iternodes__(getnodes) | |||
self.assertEqual((None, node1), next(gen1)) | |||
self.assertEqual((None, node2), next(gen2)) | |||
self.assertEqual((node1.title, node1n1), next(gen1)) | |||
self.assertEqual((node2.title, node2n1), next(gen2)) | |||
self.assertEqual((node2.text, node2n2), next(gen2)) | |||
self.assertEqual((node2.text, node2n3), next(gen2)) | |||
def test_children(self): | |||
"""test Wikilink.__children__()""" | |||
node1 = Wikilink(wraptext("foobar")) | |||
node2 = Wikilink(wraptext("foo"), wrap([Text("bar"), Text("baz")])) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
self.assertEqual(node1.title, next(gen1)) | |||
self.assertEqual(node2.title, next(gen2)) | |||
self.assertEqual(node2.text, next(gen2)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertRaises(StopIteration, next, gen2) | |||
@@ -150,3 +150,31 @@ name: comment_inside_bracketed_link | |||
label: an HTML comment inside a bracketed external link | |||
input: "[http://example.com/foo<!--comment-->bar]" | |||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] | |||
--- | |||
name: wikilink_inside_external_link | |||
label: a wikilink inside an external link, which the parser considers valid (see issue #61) | |||
input: "[http://example.com/foo Foo [[Bar]]]" | |||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), ExternalLinkSeparator(), Text(text="Foo "), WikilinkOpen(), Text(text="Bar"), WikilinkClose(), ExternalLinkClose()] | |||
--- | |||
name: external_link_inside_wikilink | |||
label: an external link inside a wikilink, valid in the case of images (see issue #62) | |||
input: "[[File:Example.png|thumb|http://example.com]]" | |||
output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), WikilinkClose()] | |||
--- | |||
name: external_link_inside_wikilink_brackets | |||
label: an external link with brackets inside a wikilink | |||
input: "[[File:Example.png|thumb|[http://example.com Example]]]" | |||
output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose(), WikilinkClose()] | |||
--- | |||
name: external_link_inside_wikilink_title | |||
label: an external link inside a wikilink title, which is invalid | |||
input: "[[File:Example.png http://example.com]]" | |||
output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] |
@@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b | |||
--- | |||
name: nested | |||
label: a wikilink nested within another | |||
input: "[[foo|[[bar]]]]" | |||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] | |||
--- | |||
name: nested_padding | |||
label: a wikilink nested within another, separated by other data | |||
input: "[[foo|a[[b]]c]]" | |||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] | |||
--- | |||
name: invalid_newline | |||
label: invalid wikilink: newline as only content | |||
input: "[[\n]]" | |||
@@ -103,27 +117,13 @@ output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), | |||
--- | |||
name: invalid_nested_text | |||
label: invalid wikilink: a wikilink nested within the value of another | |||
name: invalid_nested_no_close | |||
label: invalid wikilink: a wikilink nested within the value of another, missing a pair of closing brackets | |||
input: "[[foo|[[bar]]" | |||
output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()] | |||
--- | |||
name: invalid_nested_text_2 | |||
label: invalid wikilink: a wikilink nested within the value of another, two pairs of closing brackets | |||
input: "[[foo|[[bar]]]]" | |||
output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] | |||
name: invalid_nested_text_padding | |||
label: invalid wikilink: a wikilink nested within the value of another, separated by other data | |||
input: "[[foo|a[[b]]c]]" | |||
output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")] | |||
name: incomplete_open_only | |||
label: incomplete wikilinks: just an open | |||
input: "[[" | |||