diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst index 9db797d..d1016f9 100644 --- a/docs/api/mwparserfromhell.nodes.rst +++ b/docs/api/mwparserfromhell.nodes.rst @@ -17,6 +17,14 @@ nodes Package :undoc-members: :show-inheritance: +:mod:`comment` Module +--------------------- + +.. automodule:: mwparserfromhell.nodes.comment + :members: + :undoc-members: + :show-inheritance: + :mod:`heading` Module --------------------- @@ -56,6 +64,14 @@ nodes Package :undoc-members: :show-inheritance: +:mod:`wikilink` Module +---------------------- + +.. automodule:: mwparserfromhell.nodes.wikilink + :members: + :undoc-members: + :show-inheritance: + Subpackages ----------- diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index a56e916..86a8746 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -73,3 +73,4 @@ from .heading import Heading from .html_entity import HTMLEntity from .tag import Tag from .template import Template +from .wikilink import Wikilink diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 97878b2..8f389d3 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -45,7 +45,7 @@ class Heading(Node): yield self.title, child def __strip__(self, normalize, collapse): - return self.title + return self.title.strip_code(normalize, collapse) def __showtree__(self, write, get, mark): write("=" * self.level) diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py new file mode 100644 index 0000000..73f2a8d --- /dev/null +++ b/mwparserfromhell/nodes/wikilink.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals + +from . import Node +from ..compat import str +from ..utils import parse_anything + +__all__ = ["Wikilink"] + +class Wikilink(Node): + """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): + super(Wikilink, self).__init__() + self._title = title + self._text = text + + def __unicode__(self): + if self.text is not None: + return "[[" + str(self.title) + "|" + str(self.text) + "]]" + return "[[" + str(self.title) + "]]" + + def __iternodes__(self, getter): + yield None, self + for child in getter(self.title): + yield self.title, child + if self.text is not None: + for child in getter(self.text): + yield self.text, child + + def __strip__(self, normalize, collapse): + if self.text is not None: + return self.text.strip_code(normalize, collapse) + return self.title.strip_code(normalize, collapse) + + def __showtree__(self, write, get, mark): + write("[[") + get(self.title) + if self.text is not None: + write(" | ") + mark() + get(self.text) + write("]]") + + @property + def title(self): + """The title of the linked page, as a :py:class:`~.Wikicode` object.""" + return self._title + + @property + def text(self): + """The text to display (if any), as a :py:class:`~.Wikicode` object.""" + return self._text + + @title.setter + def title(self, value): + self._title = parse_anything(value) + + @text.setter + def text(self, value): + self._text = parse_anything(value) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index e03d94f..61a8209 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -24,7 +24,8 @@ from __future__ import unicode_literals from . import tokens from ..compat import str -from ..nodes import Argument, Comment, Heading, HTMLEntity, Tag, Template, Text +from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, + Text, Wikilink) from ..nodes.extras import Attribute, Parameter from ..smart_list import SmartList from ..wikicode import Wikicode @@ -125,6 +126,22 @@ class Builder(object): else: self._write(self._handle_token(token)) + def _handle_wikilink(self): + """Handle a case where a wikilink is at the head of the tokens.""" + title = None + self._push() + while self._tokens: + token = self._tokens.pop() + if isinstance(token, tokens.WikilinkSeparator): + title = self._pop() + self._push() + elif isinstance(token, tokens.WikilinkClose): + if title is not None: + return Wikilink(title, self._pop()) + return Wikilink(self._pop()) + else: + self._write(self._handle_token(token)) + def _handle_entity(self): """Handle a case where an HTML entity is at the head of the tokens.""" token = self._tokens.pop() @@ -216,6 +233,8 @@ class Builder(object): return self._handle_template() elif isinstance(token, tokens.ArgumentOpen): return self._handle_argument() + elif isinstance(token, tokens.WikilinkOpen): + return self._handle_wikilink() elif isinstance(token, tokens.HTMLEntityStart): return self._handle_entity() elif isinstance(token, tokens.HeadingStart): diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index e1e96e1..9d41870 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -46,6 +46,11 @@ Local (stack-specific) contexts: * :py:const:`ARGUMENT_NAME` * :py:const:`ARGUMENT_DEFAULT` +* :py:const:`WIKILINK` + + * :py:const:`WIKILINK_TITLE` + * :py:const:`WIKILINK_TEXT` + * :py:const:`HEADING` * :py:const:`HEADING_LEVEL_1` @@ -64,24 +69,28 @@ Global contexts: # Local contexts: -TEMPLATE = 0b000000000111 -TEMPLATE_NAME = 0b000000000001 -TEMPLATE_PARAM_KEY = 0b000000000010 -TEMPLATE_PARAM_VALUE = 0b000000000100 +TEMPLATE = 0b00000000000111 +TEMPLATE_NAME = 0b00000000000001 +TEMPLATE_PARAM_KEY = 0b00000000000010 +TEMPLATE_PARAM_VALUE = 0b00000000000100 + +ARGUMENT = 0b00000000011000 +ARGUMENT_NAME = 0b00000000001000 +ARGUMENT_DEFAULT = 0b00000000010000 -ARGUMENT = 0b000000011000 -ARGUMENT_NAME = 0b000000001000 -ARGUMENT_DEFAULT = 0b000000010000 +WIKILINK = 0b00000001100000 +WIKILINK_TITLE = 0b00000000100000 +WIKILINK_TEXT = 0b00000001000000 -HEADING = 0b011111100000 -HEADING_LEVEL_1 = 0b000000100000 -HEADING_LEVEL_2 = 0b000001000000 -HEADING_LEVEL_3 = 0b000010000000 -HEADING_LEVEL_4 = 0b000100000000 -HEADING_LEVEL_5 = 0b001000000000 -HEADING_LEVEL_6 = 0b010000000000 +HEADING = 0b01111110000000 +HEADING_LEVEL_1 = 0b00000010000000 +HEADING_LEVEL_2 = 0b00000100000000 +HEADING_LEVEL_3 = 0b00001000000000 +HEADING_LEVEL_4 = 0b00010000000000 +HEADING_LEVEL_5 = 0b00100000000000 +HEADING_LEVEL_6 = 0b01000000000000 -COMMENT = 0b100000000000 +COMMENT = 0b10000000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index e51a081..a8ce88f 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -278,6 +278,34 @@ class Tokenizer(object): self._head += 2 return self._pop() + def _parse_wikilink(self): + """Parse an internal wikilink at the head of the wikicode string.""" + self._head += 2 + reset = self._head - 1 + try: + wikilink = self._parse(contexts.WIKILINK_TITLE) + except BadRoute: + self._head = reset + self._write_text("[[") + else: + self._write(tokens.WikilinkOpen()) + self._write_all(wikilink) + self._write(tokens.WikilinkClose()) + + def _handle_wikilink_separator(self): + """Handle the separator between a wikilink's title and its text.""" + self._verify_safe(["\n", "{", "}", "[", "]"]) + self._context ^= contexts.WIKILINK_TITLE + self._context |= contexts.WIKILINK_TEXT + self._write(tokens.WikilinkSeparator()) + + def _handle_wikilink_end(self): + """Handle the end of a wikilink at the head of the string.""" + if self._context & contexts.WIKILINK_TITLE: + self._verify_safe(["\n", "{", "}", "[", "]"]) + self._head += 1 + return self._pop() + def _parse_heading(self): """Parse a section heading at the head of the wikicode string.""" self._global |= contexts.GL_HEADING @@ -431,6 +459,15 @@ class Tokenizer(object): return self._handle_argument_end() else: self._write_text("}") + elif this == next == "[": + if not self._context & contexts.WIKILINK_TITLE: + self._parse_wikilink() + else: + self._write_text("[") + elif this == "|" and self._context & contexts.WIKILINK_TITLE: + self._handle_wikilink_separator() + elif this == next == "]" and self._context & contexts.WIKILINK: + return self._handle_wikilink_end() elif this == "=" and not self._global & contexts.GL_HEADING: if self._read(-1) in ("\n", self.START): self._parse_heading() diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index ab6f356..4410df5 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -65,38 +65,43 @@ class Token(object): def make(name): """Create a new Token class using ``type()`` and add it to ``__all__``.""" + token = type(name if py3k else name.encode("utf8"), (Token,), {}) + globals()[name] = token __all__.append(name) - return type(name if py3k else name.encode("utf8"), (Token,), {}) - -Text = make("Text") - -TemplateOpen = make("TemplateOpen") # {{ -TemplateParamSeparator = make("TemplateParamSeparator") # | -TemplateParamEquals = make("TemplateParamEquals") # = -TemplateClose = make("TemplateClose") # }} - -ArgumentOpen = make("ArgumentOpen") # {{{ -ArgumentSeparator = make("ArgumentSeparator") # | -ArgumentClose = make("ArgumentClose") # }}} - -HTMLEntityStart = make("HTMLEntityStart") # & -HTMLEntityNumeric = make("HTMLEntityNumeric") # # -HTMLEntityHex = make("HTMLEntityHex") # x -HTMLEntityEnd = make("HTMLEntityEnd") # ; - -HeadingStart = make("HeadingStart") # =... -HeadingEnd = make("HeadingEnd") # =... - -CommentStart = make("CommentStart") # - -TagOpenOpen = make("TagOpenOpen") # < -TagAttrStart = make("TagAttrStart") -TagAttrEquals = make("TagAttrEquals") # = -TagAttrQuote = make("TagAttrQuote") # " -TagCloseOpen = make("TagCloseOpen") # > -TagCloseSelfclose = make("TagCloseSelfclose") # /> -TagOpenClose = make("TagOpenClose") # + +make("Text") + +make("TemplateOpen") # {{ +make("TemplateParamSeparator") # | +make("TemplateParamEquals") # = +make("TemplateClose") # }} + +make("ArgumentOpen") # {{{ +make("ArgumentSeparator") # | +make("ArgumentClose") # }}} + +make("WikilinkOpen") # [[ +make("WikilinkSeparator") # | +make("WikilinkClose") # ]] + +make("HTMLEntityStart") # & +make("HTMLEntityNumeric") # # +make("HTMLEntityHex") # x +make("HTMLEntityEnd") # ; + +make("HeadingStart") # =... +make("HeadingEnd") # =... + +make("CommentStart") # + +make("TagOpenOpen") # < +make("TagAttrStart") +make("TagAttrEquals") # = +make("TagAttrQuote") # " +make("TagCloseOpen") # > +make("TagCloseSelfclose") # /> +make("TagOpenClose") # del make diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index cebc61b..e0f5acd 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -24,7 +24,7 @@ from __future__ import unicode_literals import re from .compat import maxsize, str -from .nodes import Heading, Node, Tag, Template, Text +from .nodes import Heading, Node, Tag, Template, Text, Wikilink from .string_mixin import StringMixIn from .utils import parse_anything @@ -303,6 +303,14 @@ class Wikicode(StringMixIn): if not matches or re.search(matches, str(node), flags): yield node + def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): + """Iterate over wikilink nodes. + + This is equivalent to :py:meth:`ifilter` with *forcetype* set to + :py:class:`~.Wikilink`. + """ + return self.ifilter(recursive, matches, flags, forcetype=Wikilink) + def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): """Iterate over template nodes. @@ -335,6 +343,14 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) + def filter_links(self, recursive=False, matches=None, flags=FLAGS): + """Return a list of wikilink nodes. + + This is equivalent to calling :py:func:`list` on + :py:meth:`ifilter_links`. + """ + return list(self.ifilter_links(recursive, matches, flags)) + def filter_templates(self, recursive=False, matches=None, flags=FLAGS): """Return a list of template nodes.