@@ -4,4 +4,5 @@ | |||
.DS_Store | |||
__pycache__ | |||
build | |||
dist | |||
docs/_build |
@@ -28,9 +28,9 @@ Normal usage is rather straightforward (where ``text`` is page text):: | |||
>>> import mwparserfromhell | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
``wikicode`` is a ``mwparserfromhell.wikicode.Wikicode`` object, which acts | |||
like an ordinary ``unicode`` object (or ``str`` in Python 3) with some extra | |||
methods. For example:: | |||
``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an | |||
ordinary ``unicode`` object (or ``str`` in Python 3) with some extra methods. | |||
For example:: | |||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
@@ -70,7 +70,7 @@ passing ``recursive=True``:: | |||
>>> mwparserfromhell.parse(text).filter_templates(recursive=True) | |||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | |||
Templates can be easily modified to add, remove, alter or params. ``Wikicode`` | |||
Templates can be easily modified to add, remove, or alter params. ``Wikicode`` | |||
can also be treated like a list with ``append()``, ``insert()``, ``remove()``, | |||
``replace()``, and more:: | |||
@@ -131,7 +131,7 @@ following code (via the API_):: | |||
.. _MediaWiki: http://mediawiki.org | |||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: http://en.wikipedia.org/wiki/User:Σ | |||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||
.. _Python Package Index: http://pypi.python.org | |||
.. _get pip: http://pypi.python.org/pypi/pip | |||
.. _EarwigBot: https://github.com/earwig/earwigbot | |||
@@ -17,6 +17,14 @@ nodes Package | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`comment` Module | |||
--------------------- | |||
.. automodule:: mwparserfromhell.nodes.comment | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`heading` Module | |||
--------------------- | |||
@@ -56,6 +64,14 @@ nodes Package | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`wikilink` Module | |||
---------------------- | |||
.. automodule:: mwparserfromhell.nodes.wikilink | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
Subpackages | |||
----------- | |||
@@ -50,7 +50,7 @@ copyright = u'2012 Ben Kurtovic' | |||
# The short X.Y version. | |||
version = '0.1' | |||
# The full version, including alpha/beta/rc tags. | |||
release = '0.1' | |||
release = '0.1.1' | |||
# The language for content autogenerated by Sphinx. Refer to documentation | |||
# for a list of supported languages. | |||
@@ -9,7 +9,7 @@ Developed by Earwig_ with help from `Σ`_. | |||
.. _MediaWiki: http://mediawiki.org | |||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: http://en.wikipedia.org/wiki/User:Σ | |||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||
Installation | |||
------------ | |||
@@ -48,7 +48,7 @@ by passing *recursive=True*:: | |||
>>> mwparserfromhell.parse(text).filter_templates(recursive=True) | |||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | |||
Templates can be easily modified to add, remove alter or params. | |||
Templates can be easily modified to add, remove, or alter params. | |||
:py:class:`~.Wikicode` can also be treated like a list with | |||
:py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`, | |||
:py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more:: | |||
@@ -31,7 +31,7 @@ from __future__ import unicode_literals | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.1" | |||
__version__ = "0.1.1" | |||
__email__ = "ben.kurtovic@verizon.net" | |||
from . import nodes, parser, smart_list, string_mixin, wikicode | |||
@@ -68,7 +68,9 @@ class Node(StringMixIn): | |||
from . import extras | |||
from .text import Text | |||
from .argument import Argument | |||
from .comment import Comment | |||
from .heading import Heading | |||
from .html_entity import HTMLEntity | |||
from .tag import Tag | |||
from .template import Template | |||
from .wikilink import Wikilink |
@@ -0,0 +1,46 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
__all__ = ["Comment"] | |||
class Comment(Node): | |||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | |||
def __init__(self, contents): | |||
super(Comment, self).__init__() | |||
self._contents = contents | |||
def __unicode__(self): | |||
return "<!--" + str(self.contents) + "-->" | |||
@property | |||
def contents(self): | |||
"""The hidden text contained between ``<!--`` and ``-->``.""" | |||
return self._contents | |||
@contents.setter | |||
def contents(self, value): | |||
self._contents = str(value) |
@@ -45,7 +45,7 @@ class Heading(Node): | |||
yield self.title, child | |||
def __strip__(self, normalize, collapse): | |||
return self.title | |||
return self.title.strip_code(normalize, collapse) | |||
def __showtree__(self, write, get, mark): | |||
write("=" * self.level) | |||
@@ -0,0 +1,81 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
from ..utils import parse_anything | |||
__all__ = ["Wikilink"] | |||
class Wikilink(Node): | |||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||
def __init__(self, title, text=None): | |||
super(Wikilink, self).__init__() | |||
self._title = title | |||
self._text = text | |||
def __unicode__(self): | |||
if self.text is not None: | |||
return "[[" + str(self.title) + "|" + str(self.text) + "]]" | |||
return "[[" + str(self.title) + "]]" | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.title): | |||
yield self.title, child | |||
if self.text is not None: | |||
for child in getter(self.text): | |||
yield self.text, child | |||
def __strip__(self, normalize, collapse): | |||
if self.text is not None: | |||
return self.text.strip_code(normalize, collapse) | |||
return self.title.strip_code(normalize, collapse) | |||
def __showtree__(self, write, get, mark): | |||
write("[[") | |||
get(self.title) | |||
if self.text is not None: | |||
write(" | ") | |||
mark() | |||
get(self.text) | |||
write("]]") | |||
@property | |||
def title(self): | |||
"""The title of the linked page, as a :py:class:`~.Wikicode` object.""" | |||
return self._title | |||
@property | |||
def text(self): | |||
"""The text to display (if any), as a :py:class:`~.Wikicode` object.""" | |||
return self._text | |||
@title.setter | |||
def title(self, value): | |||
self._title = parse_anything(value) | |||
@text.setter | |||
def text(self, value): | |||
self._text = parse_anything(value) |
@@ -24,7 +24,8 @@ from __future__ import unicode_literals | |||
from . import tokens | |||
from ..compat import str | |||
from ..nodes import Argument, Heading, HTMLEntity, Tag, Template, Text | |||
from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, | |||
Text, Wikilink) | |||
from ..nodes.extras import Attribute, Parameter | |||
from ..smart_list import SmartList | |||
from ..wikicode import Wikicode | |||
@@ -125,8 +126,24 @@ class Builder(object): | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_wikilink(self): | |||
"""Handle a case where a wikilink is at the head of the tokens.""" | |||
title = None | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.WikilinkSeparator): | |||
title = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.WikilinkClose): | |||
if title is not None: | |||
return Wikilink(title, self._pop()) | |||
return Wikilink(self._pop()) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_entity(self): | |||
"""Handle a case where a HTML entity is at the head of the tokens.""" | |||
"""Handle a case where an HTML entity is at the head of the tokens.""" | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.HTMLEntityNumeric): | |||
token = self._tokens.pop() | |||
@@ -152,6 +169,17 @@ class Builder(object): | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_comment(self): | |||
"""Handle a case where a hidden comment is at the head of the tokens.""" | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.CommentEnd): | |||
contents = self._pop() | |||
return Comment(contents) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_attribute(self): | |||
"""Handle a case where a tag attribute is at the head of the tokens.""" | |||
name, quoted = None, False | |||
@@ -205,10 +233,14 @@ class Builder(object): | |||
return self._handle_template() | |||
elif isinstance(token, tokens.ArgumentOpen): | |||
return self._handle_argument() | |||
elif isinstance(token, tokens.WikilinkOpen): | |||
return self._handle_wikilink() | |||
elif isinstance(token, tokens.HTMLEntityStart): | |||
return self._handle_entity() | |||
elif isinstance(token, tokens.HeadingStart): | |||
return self._handle_heading(token) | |||
elif isinstance(token, tokens.CommentStart): | |||
return self._handle_comment() | |||
elif isinstance(token, tokens.TagOpenOpen): | |||
return self._handle_tag(token) | |||
@@ -35,49 +35,62 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``). | |||
Local (stack-specific) contexts: | |||
* :py:const:`TEMPLATE` (``0b00000000111``) | |||
* :py:const:`TEMPLATE` | |||
* :py:const:`TEMPLATE_NAME` (``0b00000000001``) | |||
* :py:const:`TEMPLATE_PARAM_KEY` (``0b00000000010``) | |||
* :py:const:`TEMPLATE_PARAM_VALUE` (``0b00000000100``) | |||
* :py:const:`TEMPLATE_NAME` | |||
* :py:const:`TEMPLATE_PARAM_KEY` | |||
* :py:const:`TEMPLATE_PARAM_VALUE` | |||
* :py:const:`ARGUMENT` (``0b00000011000``) | |||
* :py:const:`ARGUMENT` | |||
* :py:const:`ARGUMENT_NAME` (``0b00000001000``) | |||
* :py:const:`ARGUMENT_DEFAULT` (``0b00000010000``) | |||
* :py:const:`ARGUMENT_NAME` | |||
* :py:const:`ARGUMENT_DEFAULT` | |||
* :py:const:`HEADING` (``0b111111000``) | |||
* :py:const:`WIKILINK` | |||
* :py:const:`HEADING_LEVEL_1` (``0b00000100000``) | |||
* :py:const:`HEADING_LEVEL_2` (``0b00001000000``) | |||
* :py:const:`HEADING_LEVEL_3` (``0b00010000000``) | |||
* :py:const:`HEADING_LEVEL_4` (``0b00100000000``) | |||
* :py:const:`HEADING_LEVEL_5` (``0b01000000000``) | |||
* :py:const:`HEADING_LEVEL_6` (``0b10000000000``) | |||
* :py:const:`WIKILINK_TITLE` | |||
* :py:const:`WIKILINK_TEXT` | |||
* :py:const:`HEADING` | |||
* :py:const:`HEADING_LEVEL_1` | |||
* :py:const:`HEADING_LEVEL_2` | |||
* :py:const:`HEADING_LEVEL_3` | |||
* :py:const:`HEADING_LEVEL_4` | |||
* :py:const:`HEADING_LEVEL_5` | |||
* :py:const:`HEADING_LEVEL_6` | |||
* :py:const:`COMMENT` | |||
Global contexts: | |||
* :py:const:`GL_HEADING` (``0b1``) | |||
* :py:const:`GL_HEADING` | |||
""" | |||
# Local contexts: | |||
TEMPLATE = 0b00000000111 | |||
TEMPLATE_NAME = 0b00000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000100 | |||
ARGUMENT = 0b00000011000 | |||
ARGUMENT_NAME = 0b00000001000 | |||
ARGUMENT_DEFAULT = 0b00000010000 | |||
HEADING = 0b11111100000 | |||
HEADING_LEVEL_1 = 0b00000100000 | |||
HEADING_LEVEL_2 = 0b00001000000 | |||
HEADING_LEVEL_3 = 0b00010000000 | |||
HEADING_LEVEL_4 = 0b00100000000 | |||
HEADING_LEVEL_5 = 0b01000000000 | |||
HEADING_LEVEL_6 = 0b10000000000 | |||
TEMPLATE = 0b00000000000111 | |||
TEMPLATE_NAME = 0b00000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||
ARGUMENT = 0b00000000011000 | |||
ARGUMENT_NAME = 0b00000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000010000 | |||
WIKILINK = 0b00000001100000 | |||
WIKILINK_TITLE = 0b00000000100000 | |||
WIKILINK_TEXT = 0b00000001000000 | |||
HEADING = 0b01111110000000 | |||
HEADING_LEVEL_1 = 0b00000010000000 | |||
HEADING_LEVEL_2 = 0b00000100000000 | |||
HEADING_LEVEL_3 = 0b00001000000000 | |||
HEADING_LEVEL_4 = 0b00010000000000 | |||
HEADING_LEVEL_5 = 0b00100000000000 | |||
HEADING_LEVEL_6 = 0b01000000000000 | |||
COMMENT = 0b10000000000000 | |||
# Global contexts: | |||
@@ -41,8 +41,8 @@ class Tokenizer(object): | |||
START = object() | |||
END = object() | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | |||
"/", "-", "\n", END] | |||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-\n])", flags=re.IGNORECASE) | |||
"/", "-", "!", "\n", END] | |||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) | |||
def __init__(self): | |||
self._text = None | |||
@@ -83,9 +83,18 @@ class Tokenizer(object): | |||
self._stack.append(tokens.Text(text="".join(self._textbuffer))) | |||
self._textbuffer = [] | |||
def _pop(self): | |||
"""Pop the current stack/context/textbuffer, returing the stack.""" | |||
def _pop(self, keep_context=False): | |||
"""Pop the current stack/context/textbuffer, returing the stack. | |||
If *keep_context is ``True``, then we will replace the underlying | |||
stack's context with the current stack's. | |||
""" | |||
self._push_textbuffer() | |||
if keep_context: | |||
context = self._context | |||
stack = self._stacks.pop()[0] | |||
self._context = context | |||
return stack | |||
return self._stacks.pop()[0] | |||
def _fail_route(self): | |||
@@ -225,14 +234,23 @@ class Tokenizer(object): | |||
if self._context & contexts.TEMPLATE_NAME: | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
self._context ^= contexts.TEMPLATE_NAME | |||
if self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._write_all(self._pop(keep_context=True)) | |||
self._context |= contexts.TEMPLATE_PARAM_KEY | |||
self._write(tokens.TemplateParamSeparator()) | |||
self._push(self._context) | |||
def _handle_template_param_value(self): | |||
"""Handle a template parameter's value at the head of the string.""" | |||
self._verify_safe(["\n", "{{", "}}"]) | |||
try: | |||
self._verify_safe(["\n", "{{", "}}"]) | |||
except BadRoute: | |||
self._pop() | |||
raise | |||
else: | |||
self._write_all(self._pop(keep_context=True)) | |||
self._context ^= contexts.TEMPLATE_PARAM_KEY | |||
self._context |= contexts.TEMPLATE_PARAM_VALUE | |||
self._write(tokens.TemplateParamEquals()) | |||
@@ -241,6 +259,8 @@ class Tokenizer(object): | |||
"""Handle the end of a template at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_NAME: | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._write_all(self._pop(keep_context=True)) | |||
self._head += 1 | |||
return self._pop() | |||
@@ -258,6 +278,34 @@ class Tokenizer(object): | |||
self._head += 2 | |||
return self._pop() | |||
def _parse_wikilink(self): | |||
"""Parse an internal wikilink at the head of the wikicode string.""" | |||
self._head += 2 | |||
reset = self._head - 1 | |||
try: | |||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text("[[") | |||
else: | |||
self._write(tokens.WikilinkOpen()) | |||
self._write_all(wikilink) | |||
self._write(tokens.WikilinkClose()) | |||
def _handle_wikilink_separator(self): | |||
"""Handle the separator between a wikilink's title and its text.""" | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
self._context ^= contexts.WIKILINK_TITLE | |||
self._context |= contexts.WIKILINK_TEXT | |||
self._write(tokens.WikilinkSeparator()) | |||
def _handle_wikilink_end(self): | |||
"""Handle the end of a wikilink at the head of the string.""" | |||
if self._context & contexts.WIKILINK_TITLE: | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
self._head += 1 | |||
return self._pop() | |||
def _parse_heading(self): | |||
"""Parse a section heading at the head of the wikicode string.""" | |||
self._global |= contexts.GL_HEADING | |||
@@ -307,7 +355,7 @@ class Tokenizer(object): | |||
return self._pop(), after_level | |||
def _really_parse_entity(self): | |||
"""Actually parse a HTML entity and ensure that it is valid.""" | |||
"""Actually parse an HTML entity and ensure that it is valid.""" | |||
self._write(tokens.HTMLEntityStart()) | |||
self._head += 1 | |||
@@ -349,7 +397,7 @@ class Tokenizer(object): | |||
self._write(tokens.HTMLEntityEnd()) | |||
def _parse_entity(self): | |||
"""Parse a HTML entity at the head of the wikicode string.""" | |||
"""Parse an HTML entity at the head of the wikicode string.""" | |||
reset = self._head | |||
self._push() | |||
try: | |||
@@ -360,6 +408,21 @@ class Tokenizer(object): | |||
else: | |||
self._write_all(self._pop()) | |||
def _parse_comment(self): | |||
"""Parse an HTML comment at the head of the wikicode string.""" | |||
self._head += 4 | |||
reset = self._head - 1 | |||
try: | |||
comment = self._parse(contexts.COMMENT) | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text("<!--") | |||
else: | |||
self._write(tokens.CommentStart()) | |||
self._write_all(comment) | |||
self._write(tokens.CommentEnd()) | |||
self._head += 2 | |||
def _parse(self, context=0): | |||
"""Parse the wikicode string, using *context* for when to stop.""" | |||
self._push(context) | |||
@@ -370,12 +433,18 @@ class Tokenizer(object): | |||
self._head += 1 | |||
continue | |||
if this is self.END: | |||
fail = contexts.TEMPLATE | contexts.ARGUMENT | contexts.HEADING | |||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | | |||
contexts.HEADING | contexts.COMMENT) | |||
if self._context & fail: | |||
self._fail_route() | |||
return self._pop() | |||
next = self._read(1) | |||
if this == next == "{": | |||
if self._context & contexts.COMMENT: | |||
if this == next == "-" and self._read(2) == ">": | |||
return self._pop() | |||
else: | |||
self._write_text(this) | |||
elif this == next == "{": | |||
self._parse_template_or_argument() | |||
elif this == "|" and self._context & contexts.TEMPLATE: | |||
self._handle_template_param() | |||
@@ -390,6 +459,15 @@ class Tokenizer(object): | |||
return self._handle_argument_end() | |||
else: | |||
self._write_text("}") | |||
elif this == next == "[": | |||
if not self._context & contexts.WIKILINK_TITLE: | |||
self._parse_wikilink() | |||
else: | |||
self._write_text("[") | |||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | |||
self._handle_wikilink_separator() | |||
elif this == next == "]" and self._context & contexts.WIKILINK: | |||
return self._handle_wikilink_end() | |||
elif this == "=" and not self._global & contexts.GL_HEADING: | |||
if self._read(-1) in ("\n", self.START): | |||
self._parse_heading() | |||
@@ -401,6 +479,11 @@ class Tokenizer(object): | |||
self._fail_route() | |||
elif this == "&": | |||
self._parse_entity() | |||
elif this == "<" and next == "!": | |||
if self._read(2) == self._read(3) == "-": | |||
self._parse_comment() | |||
else: | |||
self._write_text(this) | |||
else: | |||
self._write_text(this) | |||
self._head += 1 | |||
@@ -63,6 +63,7 @@ class Token(object): | |||
def __delattr__(self, key): | |||
del self._kwargs[key] | |||
def make(name): | |||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | |||
__all__.append(name) | |||
@@ -79,6 +80,10 @@ ArgumentOpen = make("ArgumentOpen") # {{{ | |||
ArgumentSeparator = make("ArgumentSeparator") # | | |||
ArgumentClose = make("ArgumentClose") # }}} | |||
WikilinkOpen = make("WikilinkOpen") # [[ | |||
WikilinkSeparator = make("WikilinkSeparator") # | | |||
WikilinkClose = make("WikilinkClose") # ]] | |||
HTMLEntityStart = make("HTMLEntityStart") # & | |||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||
HTMLEntityHex = make("HTMLEntityHex") # x | |||
@@ -87,6 +92,9 @@ HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||
HeadingStart = make("HeadingStart") # =... | |||
HeadingEnd = make("HeadingEnd") # =... | |||
CommentStart = make("CommentStart") # <!-- | |||
CommentEnd = make("CommentEnd") # --> | |||
TagOpenOpen = make("TagOpenOpen") # < | |||
TagAttrStart = make("TagAttrStart") | |||
TagAttrEquals = make("TagAttrEquals") # = | |||
@@ -24,7 +24,7 @@ from __future__ import unicode_literals | |||
import re | |||
from .compat import maxsize, str | |||
from .nodes import Heading, Node, Tag, Template, Text | |||
from .nodes import Heading, Node, Tag, Template, Text, Wikilink | |||
from .string_mixin import StringMixIn | |||
from .utils import parse_anything | |||
@@ -303,6 +303,14 @@ class Wikicode(StringMixIn): | |||
if not matches or re.search(matches, str(node), flags): | |||
yield node | |||
def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Iterate over wikilink nodes. | |||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||
:py:class:`~.Wikilink`. | |||
""" | |||
return self.ifilter(recursive, matches, flags, forcetype=Wikilink) | |||
def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Iterate over template nodes. | |||
@@ -335,6 +343,14 @@ class Wikicode(StringMixIn): | |||
""" | |||
return list(self.ifilter(recursive, matches, flags, forcetype)) | |||
def filter_links(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Return a list of wikilink nodes. | |||
This is equivalent to calling :py:func:`list` on | |||
:py:meth:`ifilter_links`. | |||
""" | |||
return list(self.ifilter_links(recursive, matches, flags)) | |||
def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Return a list of template nodes. | |||