@@ -17,6 +17,14 @@ nodes Package | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`comment` Module | |||
--------------------- | |||
.. automodule:: mwparserfromhell.nodes.comment | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`heading` Module | |||
--------------------- | |||
@@ -56,6 +64,14 @@ nodes Package | |||
:undoc-members: | |||
:show-inheritance: | |||
:mod:`wikilink` Module | |||
---------------------- | |||
.. automodule:: mwparserfromhell.nodes.wikilink | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
Subpackages | |||
----------- | |||
@@ -73,3 +73,4 @@ from .heading import Heading | |||
from .html_entity import HTMLEntity | |||
from .tag import Tag | |||
from .template import Template | |||
from .wikilink import Wikilink |
@@ -45,7 +45,7 @@ class Heading(Node): | |||
yield self.title, child | |||
def __strip__(self, normalize, collapse): | |||
return self.title | |||
return self.title.strip_code(normalize, collapse) | |||
def __showtree__(self, write, get, mark): | |||
write("=" * self.level) | |||
@@ -0,0 +1,81 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
from ..utils import parse_anything | |||
__all__ = ["Wikilink"] | |||
class Wikilink(Node): | |||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||
def __init__(self, title, text=None): | |||
super(Wikilink, self).__init__() | |||
self._title = title | |||
self._text = text | |||
def __unicode__(self): | |||
if self.text is not None: | |||
return "[[" + str(self.title) + "|" + str(self.text) + "]]" | |||
return "[[" + str(self.title) + "]]" | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
for child in getter(self.title): | |||
yield self.title, child | |||
if self.text is not None: | |||
for child in getter(self.text): | |||
yield self.text, child | |||
def __strip__(self, normalize, collapse): | |||
if self.text is not None: | |||
return self.text.strip_code(normalize, collapse) | |||
return self.title.strip_code(normalize, collapse) | |||
def __showtree__(self, write, get, mark): | |||
write("[[") | |||
get(self.title) | |||
if self.text is not None: | |||
write(" | ") | |||
mark() | |||
get(self.text) | |||
write("]]") | |||
@property | |||
def title(self): | |||
"""The title of the linked page, as a :py:class:`~.Wikicode` object.""" | |||
return self._title | |||
@property | |||
def text(self): | |||
"""The text to display (if any), as a :py:class:`~.Wikicode` object.""" | |||
return self._text | |||
@title.setter | |||
def title(self, value): | |||
self._title = parse_anything(value) | |||
@text.setter | |||
def text(self, value): | |||
self._text = parse_anything(value) |
@@ -24,7 +24,8 @@ from __future__ import unicode_literals | |||
from . import tokens | |||
from ..compat import str | |||
from ..nodes import Argument, Comment, Heading, HTMLEntity, Tag, Template, Text | |||
from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, | |||
Text, Wikilink) | |||
from ..nodes.extras import Attribute, Parameter | |||
from ..smart_list import SmartList | |||
from ..wikicode import Wikicode | |||
@@ -125,6 +126,22 @@ class Builder(object): | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_wikilink(self): | |||
"""Handle a case where a wikilink is at the head of the tokens.""" | |||
title = None | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.WikilinkSeparator): | |||
title = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.WikilinkClose): | |||
if title is not None: | |||
return Wikilink(title, self._pop()) | |||
return Wikilink(self._pop()) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_entity(self): | |||
"""Handle a case where an HTML entity is at the head of the tokens.""" | |||
token = self._tokens.pop() | |||
@@ -216,6 +233,8 @@ class Builder(object): | |||
return self._handle_template() | |||
elif isinstance(token, tokens.ArgumentOpen): | |||
return self._handle_argument() | |||
elif isinstance(token, tokens.WikilinkOpen): | |||
return self._handle_wikilink() | |||
elif isinstance(token, tokens.HTMLEntityStart): | |||
return self._handle_entity() | |||
elif isinstance(token, tokens.HeadingStart): | |||
@@ -46,6 +46,11 @@ Local (stack-specific) contexts: | |||
* :py:const:`ARGUMENT_NAME` | |||
* :py:const:`ARGUMENT_DEFAULT` | |||
* :py:const:`WIKILINK` | |||
* :py:const:`WIKILINK_TITLE` | |||
* :py:const:`WIKILINK_TEXT` | |||
* :py:const:`HEADING` | |||
* :py:const:`HEADING_LEVEL_1` | |||
@@ -64,24 +69,28 @@ Global contexts: | |||
# Local contexts: | |||
TEMPLATE = 0b000000000111 | |||
TEMPLATE_NAME = 0b000000000001 | |||
TEMPLATE_PARAM_KEY = 0b000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b000000000100 | |||
TEMPLATE = 0b00000000000111 | |||
TEMPLATE_NAME = 0b00000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||
ARGUMENT = 0b00000000011000 | |||
ARGUMENT_NAME = 0b00000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000010000 | |||
ARGUMENT = 0b000000011000 | |||
ARGUMENT_NAME = 0b000000001000 | |||
ARGUMENT_DEFAULT = 0b000000010000 | |||
WIKILINK = 0b00000001100000 | |||
WIKILINK_TITLE = 0b00000000100000 | |||
WIKILINK_TEXT = 0b00000001000000 | |||
HEADING = 0b011111100000 | |||
HEADING_LEVEL_1 = 0b000000100000 | |||
HEADING_LEVEL_2 = 0b000001000000 | |||
HEADING_LEVEL_3 = 0b000010000000 | |||
HEADING_LEVEL_4 = 0b000100000000 | |||
HEADING_LEVEL_5 = 0b001000000000 | |||
HEADING_LEVEL_6 = 0b010000000000 | |||
HEADING = 0b01111110000000 | |||
HEADING_LEVEL_1 = 0b00000010000000 | |||
HEADING_LEVEL_2 = 0b00000100000000 | |||
HEADING_LEVEL_3 = 0b00001000000000 | |||
HEADING_LEVEL_4 = 0b00010000000000 | |||
HEADING_LEVEL_5 = 0b00100000000000 | |||
HEADING_LEVEL_6 = 0b01000000000000 | |||
COMMENT = 0b100000000000 | |||
COMMENT = 0b10000000000000 | |||
# Global contexts: | |||
@@ -278,6 +278,34 @@ class Tokenizer(object): | |||
self._head += 2 | |||
return self._pop() | |||
def _parse_wikilink(self): | |||
"""Parse an internal wikilink at the head of the wikicode string.""" | |||
self._head += 2 | |||
reset = self._head - 1 | |||
try: | |||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text("[[") | |||
else: | |||
self._write(tokens.WikilinkOpen()) | |||
self._write_all(wikilink) | |||
self._write(tokens.WikilinkClose()) | |||
def _handle_wikilink_separator(self): | |||
"""Handle the separator between a wikilink's title and its text.""" | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
self._context ^= contexts.WIKILINK_TITLE | |||
self._context |= contexts.WIKILINK_TEXT | |||
self._write(tokens.WikilinkSeparator()) | |||
def _handle_wikilink_end(self): | |||
"""Handle the end of a wikilink at the head of the string.""" | |||
if self._context & contexts.WIKILINK_TITLE: | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
self._head += 1 | |||
return self._pop() | |||
def _parse_heading(self): | |||
"""Parse a section heading at the head of the wikicode string.""" | |||
self._global |= contexts.GL_HEADING | |||
@@ -431,6 +459,15 @@ class Tokenizer(object): | |||
return self._handle_argument_end() | |||
else: | |||
self._write_text("}") | |||
elif this == next == "[": | |||
if not self._context & contexts.WIKILINK_TITLE: | |||
self._parse_wikilink() | |||
else: | |||
self._write_text("[") | |||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | |||
self._handle_wikilink_separator() | |||
elif this == next == "]" and self._context & contexts.WIKILINK: | |||
return self._handle_wikilink_end() | |||
elif this == "=" and not self._global & contexts.GL_HEADING: | |||
if self._read(-1) in ("\n", self.START): | |||
self._parse_heading() | |||
@@ -65,38 +65,43 @@ class Token(object): | |||
def make(name): | |||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | |||
token = type(name if py3k else name.encode("utf8"), (Token,), {}) | |||
globals()[name] = token | |||
__all__.append(name) | |||
return type(name if py3k else name.encode("utf8"), (Token,), {}) | |||
Text = make("Text") | |||
TemplateOpen = make("TemplateOpen") # {{ | |||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||
TemplateParamEquals = make("TemplateParamEquals") # = | |||
TemplateClose = make("TemplateClose") # }} | |||
ArgumentOpen = make("ArgumentOpen") # {{{ | |||
ArgumentSeparator = make("ArgumentSeparator") # | | |||
ArgumentClose = make("ArgumentClose") # }}} | |||
HTMLEntityStart = make("HTMLEntityStart") # & | |||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||
HTMLEntityHex = make("HTMLEntityHex") # x | |||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||
HeadingStart = make("HeadingStart") # =... | |||
HeadingEnd = make("HeadingEnd") # =... | |||
CommentStart = make("CommentStart") # <!-- | |||
CommentEnd = make("CommentEnd") # --> | |||
TagOpenOpen = make("TagOpenOpen") # < | |||
TagAttrStart = make("TagAttrStart") | |||
TagAttrEquals = make("TagAttrEquals") # = | |||
TagAttrQuote = make("TagAttrQuote") # " | |||
TagCloseOpen = make("TagCloseOpen") # > | |||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||
TagOpenClose = make("TagOpenClose") # </ | |||
TagCloseClose = make("TagCloseClose") # > | |||
make("Text") | |||
make("TemplateOpen") # {{ | |||
make("TemplateParamSeparator") # | | |||
make("TemplateParamEquals") # = | |||
make("TemplateClose") # }} | |||
make("ArgumentOpen") # {{{ | |||
make("ArgumentSeparator") # | | |||
make("ArgumentClose") # }}} | |||
make("WikilinkOpen") # [[ | |||
make("WikilinkSeparator") # | | |||
make("WikilinkClose") # ]] | |||
make("HTMLEntityStart") # & | |||
make("HTMLEntityNumeric") # # | |||
make("HTMLEntityHex") # x | |||
make("HTMLEntityEnd") # ; | |||
make("HeadingStart") # =... | |||
make("HeadingEnd") # =... | |||
make("CommentStart") # <!-- | |||
make("CommentEnd") # --> | |||
make("TagOpenOpen") # < | |||
make("TagAttrStart") | |||
make("TagAttrEquals") # = | |||
make("TagAttrQuote") # " | |||
make("TagCloseOpen") # > | |||
make("TagCloseSelfclose") # /> | |||
make("TagOpenClose") # </ | |||
make("TagCloseClose") # > | |||
del make |
@@ -24,7 +24,7 @@ from __future__ import unicode_literals | |||
import re | |||
from .compat import maxsize, str | |||
from .nodes import Heading, Node, Tag, Template, Text | |||
from .nodes import Heading, Node, Tag, Template, Text, Wikilink | |||
from .string_mixin import StringMixIn | |||
from .utils import parse_anything | |||
@@ -303,6 +303,14 @@ class Wikicode(StringMixIn): | |||
if not matches or re.search(matches, str(node), flags): | |||
yield node | |||
def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Iterate over wikilink nodes. | |||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||
:py:class:`~.Wikilink`. | |||
""" | |||
return self.ifilter(recursive, matches, flags, forcetype=Wikilink) | |||
def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Iterate over template nodes. | |||
@@ -335,6 +343,14 @@ class Wikicode(StringMixIn): | |||
""" | |||
return list(self.ifilter(recursive, matches, flags, forcetype)) | |||
def filter_links(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Return a list of wikilink nodes. | |||
This is equivalent to calling :py:func:`list` on | |||
:py:meth:`ifilter_links`. | |||
""" | |||
return list(self.ifilter_links(recursive, matches, flags)) | |||
def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||
"""Return a list of template nodes. | |||