Browse Source

Merge branch 'feature/links' into develop (#10)

Push early, push often!
tags/v0.1.1
Ben Kurtovic 11 years ago
parent
commit
53950e619a
9 changed files with 234 additions and 50 deletions
  1. +16
    -0
      docs/api/mwparserfromhell.nodes.rst
  2. +1
    -0
      mwparserfromhell/nodes/__init__.py
  3. +1
    -1
      mwparserfromhell/nodes/heading.py
  4. +81
    -0
      mwparserfromhell/nodes/wikilink.py
  5. +20
    -1
      mwparserfromhell/parser/builder.py
  6. +24
    -15
      mwparserfromhell/parser/contexts.py
  7. +37
    -0
      mwparserfromhell/parser/tokenizer.py
  8. +37
    -32
      mwparserfromhell/parser/tokens.py
  9. +17
    -1
      mwparserfromhell/wikicode.py

+ 16
- 0
docs/api/mwparserfromhell.nodes.rst View File

@@ -17,6 +17,14 @@ nodes Package
:undoc-members:
:show-inheritance:

:mod:`comment` Module
---------------------

.. automodule:: mwparserfromhell.nodes.comment
:members:
:undoc-members:
:show-inheritance:

:mod:`heading` Module
---------------------

@@ -56,6 +64,14 @@ nodes Package
:undoc-members:
:show-inheritance:

:mod:`wikilink` Module
----------------------

.. automodule:: mwparserfromhell.nodes.wikilink
:members:
:undoc-members:
:show-inheritance:

Subpackages
-----------



+ 1
- 0
mwparserfromhell/nodes/__init__.py View File

@@ -73,3 +73,4 @@ from .heading import Heading
from .html_entity import HTMLEntity
from .tag import Tag
from .template import Template
from .wikilink import Wikilink

+ 1
- 1
mwparserfromhell/nodes/heading.py View File

@@ -45,7 +45,7 @@ class Heading(Node):
yield self.title, child

def __strip__(self, normalize, collapse):
return self.title
return self.title.strip_code(normalize, collapse)

def __showtree__(self, write, get, mark):
write("=" * self.level)


+ 81
- 0
mwparserfromhell/nodes/wikilink.py View File

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals

from . import Node
from ..compat import str
from ..utils import parse_anything

__all__ = ["Wikilink"]

class Wikilink(Node):
"""Represents an internal wikilink, like ``[[Foo|Bar]]``."""
def __init__(self, title, text=None):
super(Wikilink, self).__init__()
self._title = title
self._text = text

def __unicode__(self):
if self.text is not None:
return "[[" + str(self.title) + "|" + str(self.text) + "]]"
return "[[" + str(self.title) + "]]"

def __iternodes__(self, getter):
yield None, self
for child in getter(self.title):
yield self.title, child
if self.text is not None:
for child in getter(self.text):
yield self.text, child

def __strip__(self, normalize, collapse):
if self.text is not None:
return self.text.strip_code(normalize, collapse)
return self.title.strip_code(normalize, collapse)

def __showtree__(self, write, get, mark):
write("[[")
get(self.title)
if self.text is not None:
write(" | ")
mark()
get(self.text)
write("]]")

@property
def title(self):
"""The title of the linked page, as a :py:class:`~.Wikicode` object."""
return self._title

@property
def text(self):
"""The text to display (if any), as a :py:class:`~.Wikicode` object."""
return self._text

@title.setter
def title(self, value):
self._title = parse_anything(value)

@text.setter
def text(self, value):
self._text = parse_anything(value)

+ 20
- 1
mwparserfromhell/parser/builder.py View File

@@ -24,7 +24,8 @@ from __future__ import unicode_literals

from . import tokens
from ..compat import str
from ..nodes import Argument, Comment, Heading, HTMLEntity, Tag, Template, Text
from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template,
Text, Wikilink)
from ..nodes.extras import Attribute, Parameter
from ..smart_list import SmartList
from ..wikicode import Wikicode
@@ -125,6 +126,22 @@ class Builder(object):
else:
self._write(self._handle_token(token))

def _handle_wikilink(self):
"""Handle a case where a wikilink is at the head of the tokens."""
title = None
self._push()
while self._tokens:
token = self._tokens.pop()
if isinstance(token, tokens.WikilinkSeparator):
title = self._pop()
self._push()
elif isinstance(token, tokens.WikilinkClose):
if title is not None:
return Wikilink(title, self._pop())
return Wikilink(self._pop())
else:
self._write(self._handle_token(token))

def _handle_entity(self):
"""Handle a case where an HTML entity is at the head of the tokens."""
token = self._tokens.pop()
@@ -216,6 +233,8 @@ class Builder(object):
return self._handle_template()
elif isinstance(token, tokens.ArgumentOpen):
return self._handle_argument()
elif isinstance(token, tokens.WikilinkOpen):
return self._handle_wikilink()
elif isinstance(token, tokens.HTMLEntityStart):
return self._handle_entity()
elif isinstance(token, tokens.HeadingStart):


+ 24
- 15
mwparserfromhell/parser/contexts.py View File

@@ -46,6 +46,11 @@ Local (stack-specific) contexts:
* :py:const:`ARGUMENT_NAME`
* :py:const:`ARGUMENT_DEFAULT`

* :py:const:`WIKILINK`

* :py:const:`WIKILINK_TITLE`
* :py:const:`WIKILINK_TEXT`

* :py:const:`HEADING`

* :py:const:`HEADING_LEVEL_1`
@@ -64,24 +69,28 @@ Global contexts:

# Local contexts:

TEMPLATE = 0b000000000111
TEMPLATE_NAME = 0b000000000001
TEMPLATE_PARAM_KEY = 0b000000000010
TEMPLATE_PARAM_VALUE = 0b000000000100
TEMPLATE = 0b00000000000111
TEMPLATE_NAME = 0b00000000000001
TEMPLATE_PARAM_KEY = 0b00000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000100

ARGUMENT = 0b00000000011000
ARGUMENT_NAME = 0b00000000001000
ARGUMENT_DEFAULT = 0b00000000010000

ARGUMENT = 0b000000011000
ARGUMENT_NAME = 0b000000001000
ARGUMENT_DEFAULT = 0b000000010000
WIKILINK = 0b00000001100000
WIKILINK_TITLE = 0b00000000100000
WIKILINK_TEXT = 0b00000001000000

HEADING = 0b011111100000
HEADING_LEVEL_1 = 0b000000100000
HEADING_LEVEL_2 = 0b000001000000
HEADING_LEVEL_3 = 0b000010000000
HEADING_LEVEL_4 = 0b000100000000
HEADING_LEVEL_5 = 0b001000000000
HEADING_LEVEL_6 = 0b010000000000
HEADING = 0b01111110000000
HEADING_LEVEL_1 = 0b00000010000000
HEADING_LEVEL_2 = 0b00000100000000
HEADING_LEVEL_3 = 0b00001000000000
HEADING_LEVEL_4 = 0b00010000000000
HEADING_LEVEL_5 = 0b00100000000000
HEADING_LEVEL_6 = 0b01000000000000

COMMENT = 0b100000000000
COMMENT = 0b10000000000000


# Global contexts:


+ 37
- 0
mwparserfromhell/parser/tokenizer.py View File

@@ -278,6 +278,34 @@ class Tokenizer(object):
self._head += 2
return self._pop()

def _parse_wikilink(self):
"""Parse an internal wikilink at the head of the wikicode string."""
self._head += 2
reset = self._head - 1
try:
wikilink = self._parse(contexts.WIKILINK_TITLE)
except BadRoute:
self._head = reset
self._write_text("[[")
else:
self._write(tokens.WikilinkOpen())
self._write_all(wikilink)
self._write(tokens.WikilinkClose())

def _handle_wikilink_separator(self):
"""Handle the separator between a wikilink's title and its text."""
self._verify_safe(["\n", "{", "}", "[", "]"])
self._context ^= contexts.WIKILINK_TITLE
self._context |= contexts.WIKILINK_TEXT
self._write(tokens.WikilinkSeparator())

def _handle_wikilink_end(self):
"""Handle the end of a wikilink at the head of the string."""
if self._context & contexts.WIKILINK_TITLE:
self._verify_safe(["\n", "{", "}", "[", "]"])
self._head += 1
return self._pop()

def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string."""
self._global |= contexts.GL_HEADING
@@ -431,6 +459,15 @@ class Tokenizer(object):
return self._handle_argument_end()
else:
self._write_text("}")
elif this == next == "[":
if not self._context & contexts.WIKILINK_TITLE:
self._parse_wikilink()
else:
self._write_text("[")
elif this == "|" and self._context & contexts.WIKILINK_TITLE:
self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end()
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()


+ 37
- 32
mwparserfromhell/parser/tokens.py View File

@@ -65,38 +65,43 @@ class Token(object):

def make(name):
"""Create a new Token class using ``type()`` and add it to ``__all__``."""
token = type(name if py3k else name.encode("utf8"), (Token,), {})
globals()[name] = token
__all__.append(name)
return type(name if py3k else name.encode("utf8"), (Token,), {})

Text = make("Text")

TemplateOpen = make("TemplateOpen") # {{
TemplateParamSeparator = make("TemplateParamSeparator") # |
TemplateParamEquals = make("TemplateParamEquals") # =
TemplateClose = make("TemplateClose") # }}

ArgumentOpen = make("ArgumentOpen") # {{{
ArgumentSeparator = make("ArgumentSeparator") # |
ArgumentClose = make("ArgumentClose") # }}}

HTMLEntityStart = make("HTMLEntityStart") # &
HTMLEntityNumeric = make("HTMLEntityNumeric") # #
HTMLEntityHex = make("HTMLEntityHex") # x
HTMLEntityEnd = make("HTMLEntityEnd") # ;

HeadingStart = make("HeadingStart") # =...
HeadingEnd = make("HeadingEnd") # =...

CommentStart = make("CommentStart") # <!--
CommentEnd = make("CommentEnd") # -->

TagOpenOpen = make("TagOpenOpen") # <
TagAttrStart = make("TagAttrStart")
TagAttrEquals = make("TagAttrEquals") # =
TagAttrQuote = make("TagAttrQuote") # "
TagCloseOpen = make("TagCloseOpen") # >
TagCloseSelfclose = make("TagCloseSelfclose") # />
TagOpenClose = make("TagOpenClose") # </
TagCloseClose = make("TagCloseClose") # >

make("Text")

make("TemplateOpen") # {{
make("TemplateParamSeparator") # |
make("TemplateParamEquals") # =
make("TemplateClose") # }}

make("ArgumentOpen") # {{{
make("ArgumentSeparator") # |
make("ArgumentClose") # }}}

make("WikilinkOpen") # [[
make("WikilinkSeparator") # |
make("WikilinkClose") # ]]

make("HTMLEntityStart") # &
make("HTMLEntityNumeric") # #
make("HTMLEntityHex") # x
make("HTMLEntityEnd") # ;

make("HeadingStart") # =...
make("HeadingEnd") # =...

make("CommentStart") # <!--
make("CommentEnd") # -->

make("TagOpenOpen") # <
make("TagAttrStart")
make("TagAttrEquals") # =
make("TagAttrQuote") # "
make("TagCloseOpen") # >
make("TagCloseSelfclose") # />
make("TagOpenClose") # </
make("TagCloseClose") # >

del make

+ 17
- 1
mwparserfromhell/wikicode.py View File

@@ -24,7 +24,7 @@ from __future__ import unicode_literals
import re

from .compat import maxsize, str
from .nodes import Heading, Node, Tag, Template, Text
from .nodes import Heading, Node, Tag, Template, Text, Wikilink
from .string_mixin import StringMixIn
from .utils import parse_anything

@@ -303,6 +303,14 @@ class Wikicode(StringMixIn):
if not matches or re.search(matches, str(node), flags):
yield node

def ifilter_links(self, recursive=False, matches=None, flags=FLAGS):
"""Iterate over wikilink nodes.

This is equivalent to :py:meth:`ifilter` with *forcetype* set to
:py:class:`~.Wikilink`.
"""
return self.ifilter(recursive, matches, flags, forcetype=Wikilink)

def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
"""Iterate over template nodes.

@@ -335,6 +343,14 @@ class Wikicode(StringMixIn):
"""
return list(self.ifilter(recursive, matches, flags, forcetype))

def filter_links(self, recursive=False, matches=None, flags=FLAGS):
"""Return a list of wikilink nodes.

This is equivalent to calling :py:func:`list` on
:py:meth:`ifilter_links`.
"""
return list(self.ifilter_links(recursive, matches, flags))

def filter_templates(self, recursive=False, matches=None, flags=FLAGS):
"""Return a list of template nodes.



Loading…
Cancel
Save