From 21d33e1802d2d179cfc161430f2224d9d29225a2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 16 Sep 2012 23:42:08 -0400 Subject: [PATCH 1/4] Adding Wikilink class. --- mwparserfromhell/nodes/wikilink.py | 81 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 mwparserfromhell/nodes/wikilink.py diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py new file mode 100644 index 0000000..7619590 --- /dev/null +++ b/mwparserfromhell/nodes/wikilink.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals + +from . import Node +from ..compat import str +from ..utils import parse_anything + +__all__ = ["Wikilink"] + +class Wikilink(Node): + """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): + super(Wikilink, self).__init__() + self._title = title + self._text = text + + def __unicode__(self): + if self.text is not None: + return "[[" + str(self.title) + "|" + str(self.text) + "]]" + return "[[" + str(self.title) + "]]" + + def __iternodes__(self, getter): + yield None, self + for child in getter(self.title): + yield self.title, child + if self.text is not None: + for child in getter(self.text): + yield self.text, child + + def __strip__(self, normalize, collapse): + if self.text is not None: + return self.text + return self.title + + def __showtree__(self, write, get, mark): + write("[[") + get(self.title) + if self.text is not None: + write(" | ") + mark() + get(self.text) + write("]]") + + @property + def title(self): + """The title of the linked page, as a :py:class:`~.Wikicode` object.""" + return self._title + + @property + def text(self): + """The text to display (if any), as a :py:class:`~.Wikicode` object.""" + return self._text + + @title.setter + def title(self, value): + self._title = parse_anything(value) + + @text.setter + def text(self, value): + self._text = parse_anything(value) From aa29a8bd2809f898363a8a733447672158b03145 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 17 Sep 2012 00:14:48 -0400 Subject: [PATCH 2/4] Wikilinks in builder; some additions; cleanup. --- docs/api/mwparserfromhell.nodes.rst | 16 +++++++++ mwparserfromhell/nodes/__init__.py | 1 + mwparserfromhell/parser/builder.py | 21 ++++++++++- mwparserfromhell/parser/tokens.py | 69 ++++++++++++++++++++----------------- mwparserfromhell/wikicode.py | 18 +++++++++- 5 files changed, 91 insertions(+), 34 deletions(-) diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst index 9db797d..d1016f9 100644 --- a/docs/api/mwparserfromhell.nodes.rst +++ b/docs/api/mwparserfromhell.nodes.rst @@ -17,6 +17,14 @@ nodes Package :undoc-members: :show-inheritance: +:mod:`comment` Module +--------------------- + +.. automodule:: mwparserfromhell.nodes.comment + :members: + :undoc-members: + :show-inheritance: + :mod:`heading` Module --------------------- @@ -56,6 +64,14 @@ nodes Package :undoc-members: :show-inheritance: +:mod:`wikilink` Module +---------------------- + +.. automodule:: mwparserfromhell.nodes.wikilink + :members: + :undoc-members: + :show-inheritance: + Subpackages ----------- diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index a56e916..86a8746 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -73,3 +73,4 @@ from .heading import Heading from .html_entity import HTMLEntity from .tag import Tag from .template import Template +from .wikilink import Wikilink diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index e03d94f..61a8209 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -24,7 +24,8 @@ from __future__ import unicode_literals from . import tokens from ..compat import str -from ..nodes import Argument, Comment, Heading, HTMLEntity, Tag, Template, Text +from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, + Text, Wikilink) from ..nodes.extras import Attribute, Parameter from ..smart_list import SmartList from ..wikicode import Wikicode @@ -125,6 +126,22 @@ class Builder(object): else: self._write(self._handle_token(token)) + def _handle_wikilink(self): + """Handle a case where a wikilink is at the head of the tokens.""" + title = None + self._push() + while self._tokens: + token = self._tokens.pop() + if isinstance(token, tokens.WikilinkSeparator): + title = self._pop() + self._push() + elif isinstance(token, tokens.WikilinkClose): + if title is not None: + return Wikilink(title, self._pop()) + return Wikilink(self._pop()) + else: + self._write(self._handle_token(token)) + def _handle_entity(self): """Handle a case where an HTML entity is at the head of the tokens.""" token = self._tokens.pop() @@ -216,6 +233,8 @@ class Builder(object): return self._handle_template() elif isinstance(token, tokens.ArgumentOpen): return self._handle_argument() + elif isinstance(token, tokens.WikilinkOpen): + return self._handle_wikilink() elif isinstance(token, tokens.HTMLEntityStart): return self._handle_entity() elif isinstance(token, tokens.HeadingStart): diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index ab6f356..4410df5 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -65,38 +65,43 @@ class Token(object): def make(name): """Create a new Token class using ``type()`` and add it to ``__all__``.""" + token = type(name if py3k else name.encode("utf8"), (Token,), {}) + globals()[name] = token __all__.append(name) - return type(name if py3k else name.encode("utf8"), (Token,), {}) - -Text = make("Text") - -TemplateOpen = make("TemplateOpen") # {{ -TemplateParamSeparator = make("TemplateParamSeparator") # | -TemplateParamEquals = make("TemplateParamEquals") # = -TemplateClose = make("TemplateClose") # }} - -ArgumentOpen = make("ArgumentOpen") # {{{ -ArgumentSeparator = make("ArgumentSeparator") # | -ArgumentClose = make("ArgumentClose") # }}} - -HTMLEntityStart = make("HTMLEntityStart") # & -HTMLEntityNumeric = make("HTMLEntityNumeric") # # -HTMLEntityHex = make("HTMLEntityHex") # x -HTMLEntityEnd = make("HTMLEntityEnd") # ; - -HeadingStart = make("HeadingStart") # =... -HeadingEnd = make("HeadingEnd") # =... - -CommentStart = make("CommentStart") # - -TagOpenOpen = make("TagOpenOpen") # < -TagAttrStart = make("TagAttrStart") -TagAttrEquals = make("TagAttrEquals") # = -TagAttrQuote = make("TagAttrQuote") # " -TagCloseOpen = make("TagCloseOpen") # > -TagCloseSelfclose = make("TagCloseSelfclose") # /> -TagOpenClose = make("TagOpenClose") # + +make("Text") + +make("TemplateOpen") # {{ +make("TemplateParamSeparator") # | +make("TemplateParamEquals") # = +make("TemplateClose") # }} + +make("ArgumentOpen") # {{{ +make("ArgumentSeparator") # | +make("ArgumentClose") # }}} + +make("WikilinkOpen") # [[ +make("WikilinkSeparator") # | +make("WikilinkClose") # ]] + +make("HTMLEntityStart") # & +make("HTMLEntityNumeric") # # +make("HTMLEntityHex") # x +make("HTMLEntityEnd") # ; + +make("HeadingStart") # =... +make("HeadingEnd") # =... + +make("CommentStart") # + +make("TagOpenOpen") # < +make("TagAttrStart") +make("TagAttrEquals") # = +make("TagAttrQuote") # " +make("TagCloseOpen") # > +make("TagCloseSelfclose") # /> +make("TagOpenClose") # del make diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index cebc61b..e0f5acd 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -24,7 +24,7 @@ from __future__ import unicode_literals import re from .compat import maxsize, str -from .nodes import Heading, Node, Tag, Template, Text +from .nodes import Heading, Node, Tag, Template, Text, Wikilink from .string_mixin import StringMixIn from .utils import parse_anything @@ -303,6 +303,14 @@ class Wikicode(StringMixIn): if not matches or re.search(matches, str(node), flags): yield node + def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): + """Iterate over wikilink nodes. + + This is equivalent to :py:meth:`ifilter` with *forcetype* set to + :py:class:`~.Wikilink`. + """ + return self.ifilter(recursive, matches, flags, forcetype=Wikilink) + def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): """Iterate over template nodes. @@ -335,6 +343,14 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) + def filter_links(self, recursive=False, matches=None, flags=FLAGS): + """Return a list of wikilink nodes. + + This is equivalent to calling :py:func:`list` on + :py:meth:`ifilter_links`. + """ + return list(self.ifilter_links(recursive, matches, flags)) + def filter_templates(self, recursive=False, matches=None, flags=FLAGS): """Return a list of template nodes. From 99e466857bbe58739828416e5c235e6c661726fc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 21 Sep 2012 22:33:54 -0400 Subject: [PATCH 3/4] Support wikilinks in tokenizer. --- mwparserfromhell/parser/contexts.py | 40 ++++++++++++++++++++---------------- mwparserfromhell/parser/tokenizer.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index e1e96e1..8187c6f 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -64,24 +64,28 @@ Global contexts: # Local contexts: -TEMPLATE = 0b000000000111 -TEMPLATE_NAME = 0b000000000001 -TEMPLATE_PARAM_KEY = 0b000000000010 -TEMPLATE_PARAM_VALUE = 0b000000000100 - -ARGUMENT = 0b000000011000 -ARGUMENT_NAME = 0b000000001000 -ARGUMENT_DEFAULT = 0b000000010000 - -HEADING = 0b011111100000 -HEADING_LEVEL_1 = 0b000000100000 -HEADING_LEVEL_2 = 0b000001000000 -HEADING_LEVEL_3 = 0b000010000000 -HEADING_LEVEL_4 = 0b000100000000 -HEADING_LEVEL_5 = 0b001000000000 -HEADING_LEVEL_6 = 0b010000000000 - -COMMENT = 0b100000000000 +TEMPLATE = 0b00000000000111 +TEMPLATE_NAME = 0b00000000000001 +TEMPLATE_PARAM_KEY = 0b00000000000010 +TEMPLATE_PARAM_VALUE = 0b00000000000100 + +ARGUMENT = 0b00000000011000 +ARGUMENT_NAME = 0b00000000001000 +ARGUMENT_DEFAULT = 0b00000000010000 + +WIKILINK = 0b00000001100000 +WIKILINK_TITLE = 0b00000000100000 +WIKILINK_TEXT = 0b00000001000000 + +HEADING = 0b01111110000000 +HEADING_LEVEL_1 = 0b00000010000000 +HEADING_LEVEL_2 = 0b00000100000000 +HEADING_LEVEL_3 = 0b00001000000000 +HEADING_LEVEL_4 = 0b00010000000000 +HEADING_LEVEL_5 = 0b00100000000000 +HEADING_LEVEL_6 = 0b01000000000000 + +COMMENT = 0b10000000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index e51a081..a8ce88f 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -278,6 +278,34 @@ class Tokenizer(object): self._head += 2 return self._pop() + def _parse_wikilink(self): + """Parse an internal wikilink at the head of the wikicode string.""" + self._head += 2 + reset = self._head - 1 + try: + wikilink = self._parse(contexts.WIKILINK_TITLE) + except BadRoute: + self._head = reset + self._write_text("[[") + else: + self._write(tokens.WikilinkOpen()) + self._write_all(wikilink) + self._write(tokens.WikilinkClose()) + + def _handle_wikilink_separator(self): + """Handle the separator between a wikilink's title and its text.""" + self._verify_safe(["\n", "{", "}", "[", "]"]) + self._context ^= contexts.WIKILINK_TITLE + self._context |= contexts.WIKILINK_TEXT + self._write(tokens.WikilinkSeparator()) + + def _handle_wikilink_end(self): + """Handle the end of a wikilink at the head of the string.""" + if self._context & contexts.WIKILINK_TITLE: + self._verify_safe(["\n", "{", "}", "[", "]"]) + self._head += 1 + return self._pop() + def _parse_heading(self): """Parse a section heading at the head of the wikicode string.""" self._global |= contexts.GL_HEADING @@ -431,6 +459,15 @@ class Tokenizer(object): return self._handle_argument_end() else: self._write_text("}") + elif this == next == "[": + if not self._context & contexts.WIKILINK_TITLE: + self._parse_wikilink() + else: + self._write_text("[") + elif this == "|" and self._context & contexts.WIKILINK_TITLE: + self._handle_wikilink_separator() + elif this == next == "]" and self._context & contexts.WIKILINK: + return self._handle_wikilink_end() elif this == "=" and not self._global & contexts.GL_HEADING: if self._read(-1) in ("\n", self.START): self._parse_heading() From 3dbf0bc9bb896d53916b83b28e5b31a541219b86 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 21 Sep 2012 22:41:34 -0400 Subject: [PATCH 4/4] Fix heading/link code stripping; add WIKILINK contexts to documentation. --- mwparserfromhell/nodes/heading.py | 2 +- mwparserfromhell/nodes/wikilink.py | 4 ++-- mwparserfromhell/parser/contexts.py | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 97878b2..8f389d3 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -45,7 +45,7 @@ class Heading(Node): yield self.title, child def __strip__(self, normalize, collapse): - return self.title + return self.title.strip_code(normalize, collapse) def __showtree__(self, write, get, mark): write("=" * self.level) diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 7619590..73f2a8d 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -50,8 +50,8 @@ class Wikilink(Node): def __strip__(self, normalize, collapse): if self.text is not None: - return self.text - return self.title + return self.text.strip_code(normalize, collapse) + return self.title.strip_code(normalize, collapse) def __showtree__(self, write, get, mark): write("[[") diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 8187c6f..9d41870 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -46,6 +46,11 @@ Local (stack-specific) contexts: * :py:const:`ARGUMENT_NAME` * :py:const:`ARGUMENT_DEFAULT` +* :py:const:`WIKILINK` + + * :py:const:`WIKILINK_TITLE` + * :py:const:`WIKILINK_TEXT` + * :py:const:`HEADING` * :py:const:`HEADING_LEVEL_1`