@@ -4,4 +4,5 @@ | |||||
.DS_Store | .DS_Store | ||||
__pycache__ | __pycache__ | ||||
build | build | ||||
dist | |||||
docs/_build | docs/_build |
@@ -28,9 +28,9 @@ Normal usage is rather straightforward (where ``text`` is page text):: | |||||
>>> import mwparserfromhell | >>> import mwparserfromhell | ||||
>>> wikicode = mwparserfromhell.parse(text) | >>> wikicode = mwparserfromhell.parse(text) | ||||
``wikicode`` is a ``mwparserfromhell.wikicode.Wikicode`` object, which acts | |||||
like an ordinary ``unicode`` object (or ``str`` in Python 3) with some extra | |||||
methods. For example:: | |||||
``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an | |||||
ordinary ``unicode`` object (or ``str`` in Python 3) with some extra methods. | |||||
For example:: | |||||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | ||||
>>> wikicode = mwparserfromhell.parse(text) | >>> wikicode = mwparserfromhell.parse(text) | ||||
@@ -70,7 +70,7 @@ passing ``recursive=True``:: | |||||
>>> mwparserfromhell.parse(text).filter_templates(recursive=True) | >>> mwparserfromhell.parse(text).filter_templates(recursive=True) | ||||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | ||||
Templates can be easily modified to add, remove, alter or params. ``Wikicode`` | |||||
Templates can be easily modified to add, remove, or alter params. ``Wikicode`` | |||||
can also be treated like a list with ``append()``, ``insert()``, ``remove()``, | can also be treated like a list with ``append()``, ``insert()``, ``remove()``, | ||||
``replace()``, and more:: | ``replace()``, and more:: | ||||
@@ -131,7 +131,7 @@ following code (via the API_):: | |||||
.. _MediaWiki: http://mediawiki.org | .. _MediaWiki: http://mediawiki.org | ||||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | ||||
.. _Σ: http://en.wikipedia.org/wiki/User:Σ | |||||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||||
.. _Python Package Index: http://pypi.python.org | .. _Python Package Index: http://pypi.python.org | ||||
.. _get pip: http://pypi.python.org/pypi/pip | .. _get pip: http://pypi.python.org/pypi/pip | ||||
.. _EarwigBot: https://github.com/earwig/earwigbot | .. _EarwigBot: https://github.com/earwig/earwigbot | ||||
@@ -17,6 +17,14 @@ nodes Package | |||||
:undoc-members: | :undoc-members: | ||||
:show-inheritance: | :show-inheritance: | ||||
:mod:`comment` Module | |||||
--------------------- | |||||
.. automodule:: mwparserfromhell.nodes.comment | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`heading` Module | :mod:`heading` Module | ||||
--------------------- | --------------------- | ||||
@@ -56,6 +64,14 @@ nodes Package | |||||
:undoc-members: | :undoc-members: | ||||
:show-inheritance: | :show-inheritance: | ||||
:mod:`wikilink` Module | |||||
---------------------- | |||||
.. automodule:: mwparserfromhell.nodes.wikilink | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
Subpackages | Subpackages | ||||
----------- | ----------- | ||||
@@ -50,7 +50,7 @@ copyright = u'2012 Ben Kurtovic' | |||||
# The short X.Y version. | # The short X.Y version. | ||||
version = '0.1' | version = '0.1' | ||||
# The full version, including alpha/beta/rc tags. | # The full version, including alpha/beta/rc tags. | ||||
release = '0.1' | |||||
release = '0.1.1' | |||||
# The language for content autogenerated by Sphinx. Refer to documentation | # The language for content autogenerated by Sphinx. Refer to documentation | ||||
# for a list of supported languages. | # for a list of supported languages. | ||||
@@ -9,7 +9,7 @@ Developed by Earwig_ with help from `Σ`_. | |||||
.. _MediaWiki: http://mediawiki.org | .. _MediaWiki: http://mediawiki.org | ||||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | ||||
.. _Σ: http://en.wikipedia.org/wiki/User:Σ | |||||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||||
Installation | Installation | ||||
------------ | ------------ | ||||
@@ -48,7 +48,7 @@ by passing *recursive=True*:: | |||||
>>> mwparserfromhell.parse(text).filter_templates(recursive=True) | >>> mwparserfromhell.parse(text).filter_templates(recursive=True) | ||||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | ||||
Templates can be easily modified to add, remove alter or params. | |||||
Templates can be easily modified to add, remove, or alter params. | |||||
:py:class:`~.Wikicode` can also be treated like a list with | :py:class:`~.Wikicode` can also be treated like a list with | ||||
:py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`, | :py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`, | ||||
:py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more:: | :py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more:: | ||||
@@ -31,7 +31,7 @@ from __future__ import unicode_literals | |||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2012 Ben Kurtovic" | __copyright__ = "Copyright (C) 2012 Ben Kurtovic" | ||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.1" | |||||
__version__ = "0.1.1" | |||||
__email__ = "ben.kurtovic@verizon.net" | __email__ = "ben.kurtovic@verizon.net" | ||||
from . import nodes, parser, smart_list, string_mixin, wikicode | from . import nodes, parser, smart_list, string_mixin, wikicode | ||||
@@ -68,7 +68,9 @@ class Node(StringMixIn): | |||||
from . import extras | from . import extras | ||||
from .text import Text | from .text import Text | ||||
from .argument import Argument | from .argument import Argument | ||||
from .comment import Comment | |||||
from .heading import Heading | from .heading import Heading | ||||
from .html_entity import HTMLEntity | from .html_entity import HTMLEntity | ||||
from .tag import Tag | from .tag import Tag | ||||
from .template import Template | from .template import Template | ||||
from .wikilink import Wikilink |
@@ -0,0 +1,46 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
from . import Node | |||||
from ..compat import str | |||||
__all__ = ["Comment"] | |||||
class Comment(Node): | |||||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | |||||
def __init__(self, contents): | |||||
super(Comment, self).__init__() | |||||
self._contents = contents | |||||
def __unicode__(self): | |||||
return "<!--" + str(self.contents) + "-->" | |||||
@property | |||||
def contents(self): | |||||
"""The hidden text contained between ``<!--`` and ``-->``.""" | |||||
return self._contents | |||||
@contents.setter | |||||
def contents(self, value): | |||||
self._contents = str(value) |
@@ -45,7 +45,7 @@ class Heading(Node): | |||||
yield self.title, child | yield self.title, child | ||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
return self.title | |||||
return self.title.strip_code(normalize, collapse) | |||||
def __showtree__(self, write, get, mark): | def __showtree__(self, write, get, mark): | ||||
write("=" * self.level) | write("=" * self.level) | ||||
@@ -0,0 +1,81 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
from . import Node | |||||
from ..compat import str | |||||
from ..utils import parse_anything | |||||
__all__ = ["Wikilink"] | |||||
class Wikilink(Node): | |||||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||||
def __init__(self, title, text=None): | |||||
super(Wikilink, self).__init__() | |||||
self._title = title | |||||
self._text = text | |||||
def __unicode__(self): | |||||
if self.text is not None: | |||||
return "[[" + str(self.title) + "|" + str(self.text) + "]]" | |||||
return "[[" + str(self.title) + "]]" | |||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.title): | |||||
yield self.title, child | |||||
if self.text is not None: | |||||
for child in getter(self.text): | |||||
yield self.text, child | |||||
def __strip__(self, normalize, collapse): | |||||
if self.text is not None: | |||||
return self.text.strip_code(normalize, collapse) | |||||
return self.title.strip_code(normalize, collapse) | |||||
def __showtree__(self, write, get, mark): | |||||
write("[[") | |||||
get(self.title) | |||||
if self.text is not None: | |||||
write(" | ") | |||||
mark() | |||||
get(self.text) | |||||
write("]]") | |||||
@property | |||||
def title(self): | |||||
"""The title of the linked page, as a :py:class:`~.Wikicode` object.""" | |||||
return self._title | |||||
@property | |||||
def text(self): | |||||
"""The text to display (if any), as a :py:class:`~.Wikicode` object.""" | |||||
return self._text | |||||
@title.setter | |||||
def title(self, value): | |||||
self._title = parse_anything(value) | |||||
@text.setter | |||||
def text(self, value): | |||||
self._text = parse_anything(value) |
@@ -24,7 +24,8 @@ from __future__ import unicode_literals | |||||
from . import tokens | from . import tokens | ||||
from ..compat import str | from ..compat import str | ||||
from ..nodes import Argument, Heading, HTMLEntity, Tag, Template, Text | |||||
from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, | |||||
Text, Wikilink) | |||||
from ..nodes.extras import Attribute, Parameter | from ..nodes.extras import Attribute, Parameter | ||||
from ..smart_list import SmartList | from ..smart_list import SmartList | ||||
from ..wikicode import Wikicode | from ..wikicode import Wikicode | ||||
@@ -125,8 +126,24 @@ class Builder(object): | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
def _handle_wikilink(self): | |||||
"""Handle a case where a wikilink is at the head of the tokens.""" | |||||
title = None | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.WikilinkSeparator): | |||||
title = self._pop() | |||||
self._push() | |||||
elif isinstance(token, tokens.WikilinkClose): | |||||
if title is not None: | |||||
return Wikilink(title, self._pop()) | |||||
return Wikilink(self._pop()) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_entity(self): | def _handle_entity(self): | ||||
"""Handle a case where a HTML entity is at the head of the tokens.""" | |||||
"""Handle a case where an HTML entity is at the head of the tokens.""" | |||||
token = self._tokens.pop() | token = self._tokens.pop() | ||||
if isinstance(token, tokens.HTMLEntityNumeric): | if isinstance(token, tokens.HTMLEntityNumeric): | ||||
token = self._tokens.pop() | token = self._tokens.pop() | ||||
@@ -152,6 +169,17 @@ class Builder(object): | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
def _handle_comment(self): | |||||
"""Handle a case where a hidden comment is at the head of the tokens.""" | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.CommentEnd): | |||||
contents = self._pop() | |||||
return Comment(contents) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_attribute(self): | def _handle_attribute(self): | ||||
"""Handle a case where a tag attribute is at the head of the tokens.""" | """Handle a case where a tag attribute is at the head of the tokens.""" | ||||
name, quoted = None, False | name, quoted = None, False | ||||
@@ -205,10 +233,14 @@ class Builder(object): | |||||
return self._handle_template() | return self._handle_template() | ||||
elif isinstance(token, tokens.ArgumentOpen): | elif isinstance(token, tokens.ArgumentOpen): | ||||
return self._handle_argument() | return self._handle_argument() | ||||
elif isinstance(token, tokens.WikilinkOpen): | |||||
return self._handle_wikilink() | |||||
elif isinstance(token, tokens.HTMLEntityStart): | elif isinstance(token, tokens.HTMLEntityStart): | ||||
return self._handle_entity() | return self._handle_entity() | ||||
elif isinstance(token, tokens.HeadingStart): | elif isinstance(token, tokens.HeadingStart): | ||||
return self._handle_heading(token) | return self._handle_heading(token) | ||||
elif isinstance(token, tokens.CommentStart): | |||||
return self._handle_comment() | |||||
elif isinstance(token, tokens.TagOpenOpen): | elif isinstance(token, tokens.TagOpenOpen): | ||||
return self._handle_tag(token) | return self._handle_tag(token) | ||||
@@ -35,49 +35,62 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``). | |||||
Local (stack-specific) contexts: | Local (stack-specific) contexts: | ||||
* :py:const:`TEMPLATE` (``0b00000000111``) | |||||
* :py:const:`TEMPLATE` | |||||
* :py:const:`TEMPLATE_NAME` (``0b00000000001``) | |||||
* :py:const:`TEMPLATE_PARAM_KEY` (``0b00000000010``) | |||||
* :py:const:`TEMPLATE_PARAM_VALUE` (``0b00000000100``) | |||||
* :py:const:`TEMPLATE_NAME` | |||||
* :py:const:`TEMPLATE_PARAM_KEY` | |||||
* :py:const:`TEMPLATE_PARAM_VALUE` | |||||
* :py:const:`ARGUMENT` (``0b00000011000``) | |||||
* :py:const:`ARGUMENT` | |||||
* :py:const:`ARGUMENT_NAME` (``0b00000001000``) | |||||
* :py:const:`ARGUMENT_DEFAULT` (``0b00000010000``) | |||||
* :py:const:`ARGUMENT_NAME` | |||||
* :py:const:`ARGUMENT_DEFAULT` | |||||
* :py:const:`HEADING` (``0b111111000``) | |||||
* :py:const:`WIKILINK` | |||||
* :py:const:`HEADING_LEVEL_1` (``0b00000100000``) | |||||
* :py:const:`HEADING_LEVEL_2` (``0b00001000000``) | |||||
* :py:const:`HEADING_LEVEL_3` (``0b00010000000``) | |||||
* :py:const:`HEADING_LEVEL_4` (``0b00100000000``) | |||||
* :py:const:`HEADING_LEVEL_5` (``0b01000000000``) | |||||
* :py:const:`HEADING_LEVEL_6` (``0b10000000000``) | |||||
* :py:const:`WIKILINK_TITLE` | |||||
* :py:const:`WIKILINK_TEXT` | |||||
* :py:const:`HEADING` | |||||
* :py:const:`HEADING_LEVEL_1` | |||||
* :py:const:`HEADING_LEVEL_2` | |||||
* :py:const:`HEADING_LEVEL_3` | |||||
* :py:const:`HEADING_LEVEL_4` | |||||
* :py:const:`HEADING_LEVEL_5` | |||||
* :py:const:`HEADING_LEVEL_6` | |||||
* :py:const:`COMMENT` | |||||
Global contexts: | Global contexts: | ||||
* :py:const:`GL_HEADING` (``0b1``) | |||||
* :py:const:`GL_HEADING` | |||||
""" | """ | ||||
# Local contexts: | # Local contexts: | ||||
TEMPLATE = 0b00000000111 | |||||
TEMPLATE_NAME = 0b00000000001 | |||||
TEMPLATE_PARAM_KEY = 0b00000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b00000000100 | |||||
ARGUMENT = 0b00000011000 | |||||
ARGUMENT_NAME = 0b00000001000 | |||||
ARGUMENT_DEFAULT = 0b00000010000 | |||||
HEADING = 0b11111100000 | |||||
HEADING_LEVEL_1 = 0b00000100000 | |||||
HEADING_LEVEL_2 = 0b00001000000 | |||||
HEADING_LEVEL_3 = 0b00010000000 | |||||
HEADING_LEVEL_4 = 0b00100000000 | |||||
HEADING_LEVEL_5 = 0b01000000000 | |||||
HEADING_LEVEL_6 = 0b10000000000 | |||||
TEMPLATE = 0b00000000000111 | |||||
TEMPLATE_NAME = 0b00000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||||
ARGUMENT = 0b00000000011000 | |||||
ARGUMENT_NAME = 0b00000000001000 | |||||
ARGUMENT_DEFAULT = 0b00000000010000 | |||||
WIKILINK = 0b00000001100000 | |||||
WIKILINK_TITLE = 0b00000000100000 | |||||
WIKILINK_TEXT = 0b00000001000000 | |||||
HEADING = 0b01111110000000 | |||||
HEADING_LEVEL_1 = 0b00000010000000 | |||||
HEADING_LEVEL_2 = 0b00000100000000 | |||||
HEADING_LEVEL_3 = 0b00001000000000 | |||||
HEADING_LEVEL_4 = 0b00010000000000 | |||||
HEADING_LEVEL_5 = 0b00100000000000 | |||||
HEADING_LEVEL_6 = 0b01000000000000 | |||||
COMMENT = 0b10000000000000 | |||||
# Global contexts: | # Global contexts: | ||||
@@ -41,8 +41,8 @@ class Tokenizer(object): | |||||
START = object() | START = object() | ||||
END = object() | END = object() | ||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | ||||
"/", "-", "\n", END] | |||||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-\n])", flags=re.IGNORECASE) | |||||
"/", "-", "!", "\n", END] | |||||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) | |||||
def __init__(self): | def __init__(self): | ||||
self._text = None | self._text = None | ||||
@@ -83,9 +83,18 @@ class Tokenizer(object): | |||||
self._stack.append(tokens.Text(text="".join(self._textbuffer))) | self._stack.append(tokens.Text(text="".join(self._textbuffer))) | ||||
self._textbuffer = [] | self._textbuffer = [] | ||||
def _pop(self): | |||||
"""Pop the current stack/context/textbuffer, returing the stack.""" | |||||
def _pop(self, keep_context=False): | |||||
"""Pop the current stack/context/textbuffer, returing the stack. | |||||
If *keep_context is ``True``, then we will replace the underlying | |||||
stack's context with the current stack's. | |||||
""" | |||||
self._push_textbuffer() | self._push_textbuffer() | ||||
if keep_context: | |||||
context = self._context | |||||
stack = self._stacks.pop()[0] | |||||
self._context = context | |||||
return stack | |||||
return self._stacks.pop()[0] | return self._stacks.pop()[0] | ||||
def _fail_route(self): | def _fail_route(self): | ||||
@@ -225,14 +234,23 @@ class Tokenizer(object): | |||||
if self._context & contexts.TEMPLATE_NAME: | if self._context & contexts.TEMPLATE_NAME: | ||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | self._verify_safe(["\n", "{", "}", "[", "]"]) | ||||
self._context ^= contexts.TEMPLATE_NAME | self._context ^= contexts.TEMPLATE_NAME | ||||
if self._context & contexts.TEMPLATE_PARAM_VALUE: | |||||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | |||||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | self._context ^= contexts.TEMPLATE_PARAM_VALUE | ||||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
self._write_all(self._pop(keep_context=True)) | |||||
self._context |= contexts.TEMPLATE_PARAM_KEY | self._context |= contexts.TEMPLATE_PARAM_KEY | ||||
self._write(tokens.TemplateParamSeparator()) | self._write(tokens.TemplateParamSeparator()) | ||||
self._push(self._context) | |||||
def _handle_template_param_value(self): | def _handle_template_param_value(self): | ||||
"""Handle a template parameter's value at the head of the string.""" | """Handle a template parameter's value at the head of the string.""" | ||||
self._verify_safe(["\n", "{{", "}}"]) | |||||
try: | |||||
self._verify_safe(["\n", "{{", "}}"]) | |||||
except BadRoute: | |||||
self._pop() | |||||
raise | |||||
else: | |||||
self._write_all(self._pop(keep_context=True)) | |||||
self._context ^= contexts.TEMPLATE_PARAM_KEY | self._context ^= contexts.TEMPLATE_PARAM_KEY | ||||
self._context |= contexts.TEMPLATE_PARAM_VALUE | self._context |= contexts.TEMPLATE_PARAM_VALUE | ||||
self._write(tokens.TemplateParamEquals()) | self._write(tokens.TemplateParamEquals()) | ||||
@@ -241,6 +259,8 @@ class Tokenizer(object): | |||||
"""Handle the end of a template at the head of the string.""" | """Handle the end of a template at the head of the string.""" | ||||
if self._context & contexts.TEMPLATE_NAME: | if self._context & contexts.TEMPLATE_NAME: | ||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | self._verify_safe(["\n", "{", "}", "[", "]"]) | ||||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
self._write_all(self._pop(keep_context=True)) | |||||
self._head += 1 | self._head += 1 | ||||
return self._pop() | return self._pop() | ||||
@@ -258,6 +278,34 @@ class Tokenizer(object): | |||||
self._head += 2 | self._head += 2 | ||||
return self._pop() | return self._pop() | ||||
def _parse_wikilink(self): | |||||
"""Parse an internal wikilink at the head of the wikicode string.""" | |||||
self._head += 2 | |||||
reset = self._head - 1 | |||||
try: | |||||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._write_text("[[") | |||||
else: | |||||
self._write(tokens.WikilinkOpen()) | |||||
self._write_all(wikilink) | |||||
self._write(tokens.WikilinkClose()) | |||||
def _handle_wikilink_separator(self): | |||||
"""Handle the separator between a wikilink's title and its text.""" | |||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._context ^= contexts.WIKILINK_TITLE | |||||
self._context |= contexts.WIKILINK_TEXT | |||||
self._write(tokens.WikilinkSeparator()) | |||||
def _handle_wikilink_end(self): | |||||
"""Handle the end of a wikilink at the head of the string.""" | |||||
if self._context & contexts.WIKILINK_TITLE: | |||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._head += 1 | |||||
return self._pop() | |||||
def _parse_heading(self): | def _parse_heading(self): | ||||
"""Parse a section heading at the head of the wikicode string.""" | """Parse a section heading at the head of the wikicode string.""" | ||||
self._global |= contexts.GL_HEADING | self._global |= contexts.GL_HEADING | ||||
@@ -307,7 +355,7 @@ class Tokenizer(object): | |||||
return self._pop(), after_level | return self._pop(), after_level | ||||
def _really_parse_entity(self): | def _really_parse_entity(self): | ||||
"""Actually parse a HTML entity and ensure that it is valid.""" | |||||
"""Actually parse an HTML entity and ensure that it is valid.""" | |||||
self._write(tokens.HTMLEntityStart()) | self._write(tokens.HTMLEntityStart()) | ||||
self._head += 1 | self._head += 1 | ||||
@@ -349,7 +397,7 @@ class Tokenizer(object): | |||||
self._write(tokens.HTMLEntityEnd()) | self._write(tokens.HTMLEntityEnd()) | ||||
def _parse_entity(self): | def _parse_entity(self): | ||||
"""Parse a HTML entity at the head of the wikicode string.""" | |||||
"""Parse an HTML entity at the head of the wikicode string.""" | |||||
reset = self._head | reset = self._head | ||||
self._push() | self._push() | ||||
try: | try: | ||||
@@ -360,6 +408,21 @@ class Tokenizer(object): | |||||
else: | else: | ||||
self._write_all(self._pop()) | self._write_all(self._pop()) | ||||
def _parse_comment(self): | |||||
"""Parse an HTML comment at the head of the wikicode string.""" | |||||
self._head += 4 | |||||
reset = self._head - 1 | |||||
try: | |||||
comment = self._parse(contexts.COMMENT) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._write_text("<!--") | |||||
else: | |||||
self._write(tokens.CommentStart()) | |||||
self._write_all(comment) | |||||
self._write(tokens.CommentEnd()) | |||||
self._head += 2 | |||||
def _parse(self, context=0): | def _parse(self, context=0): | ||||
"""Parse the wikicode string, using *context* for when to stop.""" | """Parse the wikicode string, using *context* for when to stop.""" | ||||
self._push(context) | self._push(context) | ||||
@@ -370,12 +433,18 @@ class Tokenizer(object): | |||||
self._head += 1 | self._head += 1 | ||||
continue | continue | ||||
if this is self.END: | if this is self.END: | ||||
fail = contexts.TEMPLATE | contexts.ARGUMENT | contexts.HEADING | |||||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | | |||||
contexts.HEADING | contexts.COMMENT) | |||||
if self._context & fail: | if self._context & fail: | ||||
self._fail_route() | self._fail_route() | ||||
return self._pop() | return self._pop() | ||||
next = self._read(1) | next = self._read(1) | ||||
if this == next == "{": | |||||
if self._context & contexts.COMMENT: | |||||
if this == next == "-" and self._read(2) == ">": | |||||
return self._pop() | |||||
else: | |||||
self._write_text(this) | |||||
elif this == next == "{": | |||||
self._parse_template_or_argument() | self._parse_template_or_argument() | ||||
elif this == "|" and self._context & contexts.TEMPLATE: | elif this == "|" and self._context & contexts.TEMPLATE: | ||||
self._handle_template_param() | self._handle_template_param() | ||||
@@ -390,6 +459,15 @@ class Tokenizer(object): | |||||
return self._handle_argument_end() | return self._handle_argument_end() | ||||
else: | else: | ||||
self._write_text("}") | self._write_text("}") | ||||
elif this == next == "[": | |||||
if not self._context & contexts.WIKILINK_TITLE: | |||||
self._parse_wikilink() | |||||
else: | |||||
self._write_text("[") | |||||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | |||||
self._handle_wikilink_separator() | |||||
elif this == next == "]" and self._context & contexts.WIKILINK: | |||||
return self._handle_wikilink_end() | |||||
elif this == "=" and not self._global & contexts.GL_HEADING: | elif this == "=" and not self._global & contexts.GL_HEADING: | ||||
if self._read(-1) in ("\n", self.START): | if self._read(-1) in ("\n", self.START): | ||||
self._parse_heading() | self._parse_heading() | ||||
@@ -401,6 +479,11 @@ class Tokenizer(object): | |||||
self._fail_route() | self._fail_route() | ||||
elif this == "&": | elif this == "&": | ||||
self._parse_entity() | self._parse_entity() | ||||
elif this == "<" and next == "!": | |||||
if self._read(2) == self._read(3) == "-": | |||||
self._parse_comment() | |||||
else: | |||||
self._write_text(this) | |||||
else: | else: | ||||
self._write_text(this) | self._write_text(this) | ||||
self._head += 1 | self._head += 1 | ||||
@@ -63,6 +63,7 @@ class Token(object): | |||||
def __delattr__(self, key): | def __delattr__(self, key): | ||||
del self._kwargs[key] | del self._kwargs[key] | ||||
def make(name): | def make(name): | ||||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | """Create a new Token class using ``type()`` and add it to ``__all__``.""" | ||||
__all__.append(name) | __all__.append(name) | ||||
@@ -79,6 +80,10 @@ ArgumentOpen = make("ArgumentOpen") # {{{ | |||||
ArgumentSeparator = make("ArgumentSeparator") # | | ArgumentSeparator = make("ArgumentSeparator") # | | ||||
ArgumentClose = make("ArgumentClose") # }}} | ArgumentClose = make("ArgumentClose") # }}} | ||||
WikilinkOpen = make("WikilinkOpen") # [[ | |||||
WikilinkSeparator = make("WikilinkSeparator") # | | |||||
WikilinkClose = make("WikilinkClose") # ]] | |||||
HTMLEntityStart = make("HTMLEntityStart") # & | HTMLEntityStart = make("HTMLEntityStart") # & | ||||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | HTMLEntityNumeric = make("HTMLEntityNumeric") # # | ||||
HTMLEntityHex = make("HTMLEntityHex") # x | HTMLEntityHex = make("HTMLEntityHex") # x | ||||
@@ -87,6 +92,9 @@ HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||||
HeadingStart = make("HeadingStart") # =... | HeadingStart = make("HeadingStart") # =... | ||||
HeadingEnd = make("HeadingEnd") # =... | HeadingEnd = make("HeadingEnd") # =... | ||||
CommentStart = make("CommentStart") # <!-- | |||||
CommentEnd = make("CommentEnd") # --> | |||||
TagOpenOpen = make("TagOpenOpen") # < | TagOpenOpen = make("TagOpenOpen") # < | ||||
TagAttrStart = make("TagAttrStart") | TagAttrStart = make("TagAttrStart") | ||||
TagAttrEquals = make("TagAttrEquals") # = | TagAttrEquals = make("TagAttrEquals") # = | ||||
@@ -24,7 +24,7 @@ from __future__ import unicode_literals | |||||
import re | import re | ||||
from .compat import maxsize, str | from .compat import maxsize, str | ||||
from .nodes import Heading, Node, Tag, Template, Text | |||||
from .nodes import Heading, Node, Tag, Template, Text, Wikilink | |||||
from .string_mixin import StringMixIn | from .string_mixin import StringMixIn | ||||
from .utils import parse_anything | from .utils import parse_anything | ||||
@@ -303,6 +303,14 @@ class Wikicode(StringMixIn): | |||||
if not matches or re.search(matches, str(node), flags): | if not matches or re.search(matches, str(node), flags): | ||||
yield node | yield node | ||||
def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Iterate over wikilink nodes. | |||||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||||
:py:class:`~.Wikilink`. | |||||
""" | |||||
return self.ifilter(recursive, matches, flags, forcetype=Wikilink) | |||||
def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | ||||
"""Iterate over template nodes. | """Iterate over template nodes. | ||||
@@ -335,6 +343,14 @@ class Wikicode(StringMixIn): | |||||
""" | """ | ||||
return list(self.ifilter(recursive, matches, flags, forcetype)) | return list(self.ifilter(recursive, matches, flags, forcetype)) | ||||
def filter_links(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Return a list of wikilink nodes. | |||||
This is equivalent to calling :py:func:`list` on | |||||
:py:meth:`ifilter_links`. | |||||
""" | |||||
return list(self.ifilter_links(recursive, matches, flags)) | |||||
def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | ||||
"""Return a list of template nodes. | """Return a list of template nodes. | ||||