@@ -17,6 +17,14 @@ nodes Package | |||||
:undoc-members: | :undoc-members: | ||||
:show-inheritance: | :show-inheritance: | ||||
:mod:`comment` Module | |||||
--------------------- | |||||
.. automodule:: mwparserfromhell.nodes.comment | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`heading` Module | :mod:`heading` Module | ||||
--------------------- | --------------------- | ||||
@@ -56,6 +64,14 @@ nodes Package | |||||
:undoc-members: | :undoc-members: | ||||
:show-inheritance: | :show-inheritance: | ||||
:mod:`wikilink` Module | |||||
---------------------- | |||||
.. automodule:: mwparserfromhell.nodes.wikilink | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
Subpackages | Subpackages | ||||
----------- | ----------- | ||||
@@ -73,3 +73,4 @@ from .heading import Heading | |||||
from .html_entity import HTMLEntity | from .html_entity import HTMLEntity | ||||
from .tag import Tag | from .tag import Tag | ||||
from .template import Template | from .template import Template | ||||
from .wikilink import Wikilink |
@@ -45,7 +45,7 @@ class Heading(Node): | |||||
yield self.title, child | yield self.title, child | ||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
return self.title | |||||
return self.title.strip_code(normalize, collapse) | |||||
def __showtree__(self, write, get, mark): | def __showtree__(self, write, get, mark): | ||||
write("=" * self.level) | write("=" * self.level) | ||||
@@ -0,0 +1,81 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
from . import Node | |||||
from ..compat import str | |||||
from ..utils import parse_anything | |||||
__all__ = ["Wikilink"] | |||||
class Wikilink(Node): | |||||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||||
def __init__(self, title, text=None): | |||||
super(Wikilink, self).__init__() | |||||
self._title = title | |||||
self._text = text | |||||
def __unicode__(self): | |||||
if self.text is not None: | |||||
return "[[" + str(self.title) + "|" + str(self.text) + "]]" | |||||
return "[[" + str(self.title) + "]]" | |||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.title): | |||||
yield self.title, child | |||||
if self.text is not None: | |||||
for child in getter(self.text): | |||||
yield self.text, child | |||||
def __strip__(self, normalize, collapse): | |||||
if self.text is not None: | |||||
return self.text.strip_code(normalize, collapse) | |||||
return self.title.strip_code(normalize, collapse) | |||||
def __showtree__(self, write, get, mark): | |||||
write("[[") | |||||
get(self.title) | |||||
if self.text is not None: | |||||
write(" | ") | |||||
mark() | |||||
get(self.text) | |||||
write("]]") | |||||
@property | |||||
def title(self): | |||||
"""The title of the linked page, as a :py:class:`~.Wikicode` object.""" | |||||
return self._title | |||||
@property | |||||
def text(self): | |||||
"""The text to display (if any), as a :py:class:`~.Wikicode` object.""" | |||||
return self._text | |||||
@title.setter | |||||
def title(self, value): | |||||
self._title = parse_anything(value) | |||||
@text.setter | |||||
def text(self, value): | |||||
self._text = parse_anything(value) |
@@ -24,7 +24,8 @@ from __future__ import unicode_literals | |||||
from . import tokens | from . import tokens | ||||
from ..compat import str | from ..compat import str | ||||
from ..nodes import Argument, Comment, Heading, HTMLEntity, Tag, Template, Text | |||||
from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, | |||||
Text, Wikilink) | |||||
from ..nodes.extras import Attribute, Parameter | from ..nodes.extras import Attribute, Parameter | ||||
from ..smart_list import SmartList | from ..smart_list import SmartList | ||||
from ..wikicode import Wikicode | from ..wikicode import Wikicode | ||||
@@ -125,6 +126,22 @@ class Builder(object): | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
def _handle_wikilink(self): | |||||
"""Handle a case where a wikilink is at the head of the tokens.""" | |||||
title = None | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.WikilinkSeparator): | |||||
title = self._pop() | |||||
self._push() | |||||
elif isinstance(token, tokens.WikilinkClose): | |||||
if title is not None: | |||||
return Wikilink(title, self._pop()) | |||||
return Wikilink(self._pop()) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_entity(self): | def _handle_entity(self): | ||||
"""Handle a case where an HTML entity is at the head of the tokens.""" | """Handle a case where an HTML entity is at the head of the tokens.""" | ||||
token = self._tokens.pop() | token = self._tokens.pop() | ||||
@@ -216,6 +233,8 @@ class Builder(object): | |||||
return self._handle_template() | return self._handle_template() | ||||
elif isinstance(token, tokens.ArgumentOpen): | elif isinstance(token, tokens.ArgumentOpen): | ||||
return self._handle_argument() | return self._handle_argument() | ||||
elif isinstance(token, tokens.WikilinkOpen): | |||||
return self._handle_wikilink() | |||||
elif isinstance(token, tokens.HTMLEntityStart): | elif isinstance(token, tokens.HTMLEntityStart): | ||||
return self._handle_entity() | return self._handle_entity() | ||||
elif isinstance(token, tokens.HeadingStart): | elif isinstance(token, tokens.HeadingStart): | ||||
@@ -46,6 +46,11 @@ Local (stack-specific) contexts: | |||||
* :py:const:`ARGUMENT_NAME` | * :py:const:`ARGUMENT_NAME` | ||||
* :py:const:`ARGUMENT_DEFAULT` | * :py:const:`ARGUMENT_DEFAULT` | ||||
* :py:const:`WIKILINK` | |||||
* :py:const:`WIKILINK_TITLE` | |||||
* :py:const:`WIKILINK_TEXT` | |||||
* :py:const:`HEADING` | * :py:const:`HEADING` | ||||
* :py:const:`HEADING_LEVEL_1` | * :py:const:`HEADING_LEVEL_1` | ||||
@@ -64,24 +69,28 @@ Global contexts: | |||||
# Local contexts: | # Local contexts: | ||||
TEMPLATE = 0b000000000111 | |||||
TEMPLATE_NAME = 0b000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b000000000100 | |||||
TEMPLATE = 0b00000000000111 | |||||
TEMPLATE_NAME = 0b00000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||||
ARGUMENT = 0b00000000011000 | |||||
ARGUMENT_NAME = 0b00000000001000 | |||||
ARGUMENT_DEFAULT = 0b00000000010000 | |||||
ARGUMENT = 0b000000011000 | |||||
ARGUMENT_NAME = 0b000000001000 | |||||
ARGUMENT_DEFAULT = 0b000000010000 | |||||
WIKILINK = 0b00000001100000 | |||||
WIKILINK_TITLE = 0b00000000100000 | |||||
WIKILINK_TEXT = 0b00000001000000 | |||||
HEADING = 0b011111100000 | |||||
HEADING_LEVEL_1 = 0b000000100000 | |||||
HEADING_LEVEL_2 = 0b000001000000 | |||||
HEADING_LEVEL_3 = 0b000010000000 | |||||
HEADING_LEVEL_4 = 0b000100000000 | |||||
HEADING_LEVEL_5 = 0b001000000000 | |||||
HEADING_LEVEL_6 = 0b010000000000 | |||||
HEADING = 0b01111110000000 | |||||
HEADING_LEVEL_1 = 0b00000010000000 | |||||
HEADING_LEVEL_2 = 0b00000100000000 | |||||
HEADING_LEVEL_3 = 0b00001000000000 | |||||
HEADING_LEVEL_4 = 0b00010000000000 | |||||
HEADING_LEVEL_5 = 0b00100000000000 | |||||
HEADING_LEVEL_6 = 0b01000000000000 | |||||
COMMENT = 0b100000000000 | |||||
COMMENT = 0b10000000000000 | |||||
# Global contexts: | # Global contexts: | ||||
@@ -278,6 +278,34 @@ class Tokenizer(object): | |||||
self._head += 2 | self._head += 2 | ||||
return self._pop() | return self._pop() | ||||
def _parse_wikilink(self): | |||||
"""Parse an internal wikilink at the head of the wikicode string.""" | |||||
self._head += 2 | |||||
reset = self._head - 1 | |||||
try: | |||||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._write_text("[[") | |||||
else: | |||||
self._write(tokens.WikilinkOpen()) | |||||
self._write_all(wikilink) | |||||
self._write(tokens.WikilinkClose()) | |||||
def _handle_wikilink_separator(self): | |||||
"""Handle the separator between a wikilink's title and its text.""" | |||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._context ^= contexts.WIKILINK_TITLE | |||||
self._context |= contexts.WIKILINK_TEXT | |||||
self._write(tokens.WikilinkSeparator()) | |||||
def _handle_wikilink_end(self): | |||||
"""Handle the end of a wikilink at the head of the string.""" | |||||
if self._context & contexts.WIKILINK_TITLE: | |||||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||||
self._head += 1 | |||||
return self._pop() | |||||
def _parse_heading(self): | def _parse_heading(self): | ||||
"""Parse a section heading at the head of the wikicode string.""" | """Parse a section heading at the head of the wikicode string.""" | ||||
self._global |= contexts.GL_HEADING | self._global |= contexts.GL_HEADING | ||||
@@ -431,6 +459,15 @@ class Tokenizer(object): | |||||
return self._handle_argument_end() | return self._handle_argument_end() | ||||
else: | else: | ||||
self._write_text("}") | self._write_text("}") | ||||
elif this == next == "[": | |||||
if not self._context & contexts.WIKILINK_TITLE: | |||||
self._parse_wikilink() | |||||
else: | |||||
self._write_text("[") | |||||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | |||||
self._handle_wikilink_separator() | |||||
elif this == next == "]" and self._context & contexts.WIKILINK: | |||||
return self._handle_wikilink_end() | |||||
elif this == "=" and not self._global & contexts.GL_HEADING: | elif this == "=" and not self._global & contexts.GL_HEADING: | ||||
if self._read(-1) in ("\n", self.START): | if self._read(-1) in ("\n", self.START): | ||||
self._parse_heading() | self._parse_heading() | ||||
@@ -65,38 +65,43 @@ class Token(object): | |||||
def make(name): | def make(name): | ||||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | """Create a new Token class using ``type()`` and add it to ``__all__``.""" | ||||
token = type(name if py3k else name.encode("utf8"), (Token,), {}) | |||||
globals()[name] = token | |||||
__all__.append(name) | __all__.append(name) | ||||
return type(name if py3k else name.encode("utf8"), (Token,), {}) | |||||
Text = make("Text") | |||||
TemplateOpen = make("TemplateOpen") # {{ | |||||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||||
TemplateParamEquals = make("TemplateParamEquals") # = | |||||
TemplateClose = make("TemplateClose") # }} | |||||
ArgumentOpen = make("ArgumentOpen") # {{{ | |||||
ArgumentSeparator = make("ArgumentSeparator") # | | |||||
ArgumentClose = make("ArgumentClose") # }}} | |||||
HTMLEntityStart = make("HTMLEntityStart") # & | |||||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||||
HTMLEntityHex = make("HTMLEntityHex") # x | |||||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||||
HeadingStart = make("HeadingStart") # =... | |||||
HeadingEnd = make("HeadingEnd") # =... | |||||
CommentStart = make("CommentStart") # <!-- | |||||
CommentEnd = make("CommentEnd") # --> | |||||
TagOpenOpen = make("TagOpenOpen") # < | |||||
TagAttrStart = make("TagAttrStart") | |||||
TagAttrEquals = make("TagAttrEquals") # = | |||||
TagAttrQuote = make("TagAttrQuote") # " | |||||
TagCloseOpen = make("TagCloseOpen") # > | |||||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||||
TagOpenClose = make("TagOpenClose") # </ | |||||
TagCloseClose = make("TagCloseClose") # > | |||||
make("Text") | |||||
make("TemplateOpen") # {{ | |||||
make("TemplateParamSeparator") # | | |||||
make("TemplateParamEquals") # = | |||||
make("TemplateClose") # }} | |||||
make("ArgumentOpen") # {{{ | |||||
make("ArgumentSeparator") # | | |||||
make("ArgumentClose") # }}} | |||||
make("WikilinkOpen") # [[ | |||||
make("WikilinkSeparator") # | | |||||
make("WikilinkClose") # ]] | |||||
make("HTMLEntityStart") # & | |||||
make("HTMLEntityNumeric") # # | |||||
make("HTMLEntityHex") # x | |||||
make("HTMLEntityEnd") # ; | |||||
make("HeadingStart") # =... | |||||
make("HeadingEnd") # =... | |||||
make("CommentStart") # <!-- | |||||
make("CommentEnd") # --> | |||||
make("TagOpenOpen") # < | |||||
make("TagAttrStart") | |||||
make("TagAttrEquals") # = | |||||
make("TagAttrQuote") # " | |||||
make("TagCloseOpen") # > | |||||
make("TagCloseSelfclose") # /> | |||||
make("TagOpenClose") # </ | |||||
make("TagCloseClose") # > | |||||
del make | del make |
@@ -24,7 +24,7 @@ from __future__ import unicode_literals | |||||
import re | import re | ||||
from .compat import maxsize, str | from .compat import maxsize, str | ||||
from .nodes import Heading, Node, Tag, Template, Text | |||||
from .nodes import Heading, Node, Tag, Template, Text, Wikilink | |||||
from .string_mixin import StringMixIn | from .string_mixin import StringMixIn | ||||
from .utils import parse_anything | from .utils import parse_anything | ||||
@@ -303,6 +303,14 @@ class Wikicode(StringMixIn): | |||||
if not matches or re.search(matches, str(node), flags): | if not matches or re.search(matches, str(node), flags): | ||||
yield node | yield node | ||||
def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Iterate over wikilink nodes. | |||||
This is equivalent to :py:meth:`ifilter` with *forcetype* set to | |||||
:py:class:`~.Wikilink`. | |||||
""" | |||||
return self.ifilter(recursive, matches, flags, forcetype=Wikilink) | |||||
def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | ||||
"""Iterate over template nodes. | """Iterate over template nodes. | ||||
@@ -335,6 +343,14 @@ class Wikicode(StringMixIn): | |||||
""" | """ | ||||
return list(self.ifilter(recursive, matches, flags, forcetype)) | return list(self.ifilter(recursive, matches, flags, forcetype)) | ||||
def filter_links(self, recursive=False, matches=None, flags=FLAGS): | |||||
"""Return a list of wikilink nodes. | |||||
This is equivalent to calling :py:func:`list` on | |||||
:py:meth:`ifilter_links`. | |||||
""" | |||||
return list(self.ifilter_links(recursive, matches, flags)) | |||||
def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | def filter_templates(self, recursive=False, matches=None, flags=FLAGS): | ||||
"""Return a list of template nodes. | """Return a list of template nodes. | ||||