瀏覽代碼

Merge branch 'develop'

tags/v0.3
Ben Kurtovic 12 年之前
父節點
當前提交
828e795777
共有 16 個檔案被更改,包括 352 行新增54 行删除
  1. +1
    -0
      .gitignore
  2. +5
    -5
      README.rst
  3. +16
    -0
      docs/api/mwparserfromhell.nodes.rst
  4. +1
    -1
      docs/conf.py
  5. +1
    -1
      docs/index.rst
  6. +1
    -1
      docs/usage.rst
  7. +1
    -1
      mwparserfromhell/__init__.py
  8. +2
    -0
      mwparserfromhell/nodes/__init__.py
  9. +46
    -0
      mwparserfromhell/nodes/comment.py
  10. +1
    -1
      mwparserfromhell/nodes/heading.py
  11. +81
    -0
      mwparserfromhell/nodes/wikilink.py
  12. +34
    -2
      mwparserfromhell/parser/builder.py
  13. +44
    -31
      mwparserfromhell/parser/contexts.py
  14. +93
    -10
      mwparserfromhell/parser/tokenizer.py
  15. +8
    -0
      mwparserfromhell/parser/tokens.py
  16. +17
    -1
      mwparserfromhell/wikicode.py

+ 1
- 0
.gitignore 查看文件

@@ -4,4 +4,5 @@
.DS_Store
__pycache__
build
dist
docs/_build

+ 5
- 5
README.rst 查看文件

@@ -28,9 +28,9 @@ Normal usage is rather straightforward (where ``text`` is page text)::
>>> import mwparserfromhell
>>> wikicode = mwparserfromhell.parse(text)

``wikicode`` is a ``mwparserfromhell.wikicode.Wikicode`` object, which acts
like an ordinary ``unicode`` object (or ``str`` in Python 3) with some extra
methods. For example::
``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an
ordinary ``unicode`` object (or ``str`` in Python 3) with some extra methods.
For example::

>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
>>> wikicode = mwparserfromhell.parse(text)
@@ -70,7 +70,7 @@ passing ``recursive=True``::
>>> mwparserfromhell.parse(text).filter_templates(recursive=True)
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']

Templates can be easily modified to add, remove, alter or params. ``Wikicode``
Templates can be easily modified to add, remove, or alter params. ``Wikicode``
can also be treated like a list with ``append()``, ``insert()``, ``remove()``,
``replace()``, and more::

@@ -131,7 +131,7 @@ following code (via the API_)::

.. _MediaWiki: http://mediawiki.org
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig
.. _Σ: http://en.wikipedia.org/wiki/User:Σ
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3
.. _Python Package Index: http://pypi.python.org
.. _get pip: http://pypi.python.org/pypi/pip
.. _EarwigBot: https://github.com/earwig/earwigbot


+ 16
- 0
docs/api/mwparserfromhell.nodes.rst 查看文件

@@ -17,6 +17,14 @@ nodes Package
:undoc-members:
:show-inheritance:

:mod:`comment` Module
---------------------

.. automodule:: mwparserfromhell.nodes.comment
:members:
:undoc-members:
:show-inheritance:

:mod:`heading` Module
---------------------

@@ -56,6 +64,14 @@ nodes Package
:undoc-members:
:show-inheritance:

:mod:`wikilink` Module
----------------------

.. automodule:: mwparserfromhell.nodes.wikilink
:members:
:undoc-members:
:show-inheritance:

Subpackages
-----------



+ 1
- 1
docs/conf.py 查看文件

@@ -50,7 +50,7 @@ copyright = u'2012 Ben Kurtovic'
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1'
release = '0.1.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.


+ 1
- 1
docs/index.rst 查看文件

@@ -9,7 +9,7 @@ Developed by Earwig_ with help from `Σ`_.

.. _MediaWiki: http://mediawiki.org
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig
.. _Σ: http://en.wikipedia.org/wiki/User:Σ
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3

Installation
------------


+ 1
- 1
docs/usage.rst 查看文件

@@ -48,7 +48,7 @@ by passing *recursive=True*::
>>> mwparserfromhell.parse(text).filter_templates(recursive=True)
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']

Templates can be easily modified to add, remove alter or params.
Templates can be easily modified to add, remove, or alter params.
:py:class:`~.Wikicode` can also be treated like a list with
:py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`,
:py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more::


+ 1
- 1
mwparserfromhell/__init__.py 查看文件

@@ -31,7 +31,7 @@ from __future__ import unicode_literals
__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012 Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.1"
__version__ = "0.1.1"
__email__ = "ben.kurtovic@verizon.net"

from . import nodes, parser, smart_list, string_mixin, wikicode


+ 2
- 0
mwparserfromhell/nodes/__init__.py 查看文件

@@ -68,7 +68,9 @@ class Node(StringMixIn):
from . import extras
from .text import Text
from .argument import Argument
from .comment import Comment
from .heading import Heading
from .html_entity import HTMLEntity
from .tag import Tag
from .template import Template
from .wikilink import Wikilink

+ 46
- 0
mwparserfromhell/nodes/comment.py 查看文件

@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals

from . import Node
from ..compat import str

__all__ = ["Comment"]

class Comment(Node):
"""Represents a hidden HTML comment, like ``<!-- foobar -->``."""
def __init__(self, contents):
super(Comment, self).__init__()
self._contents = contents

def __unicode__(self):
return "<!--" + str(self.contents) + "-->"

@property
def contents(self):
"""The hidden text contained between ``<!--`` and ``-->``."""
return self._contents

@contents.setter
def contents(self, value):
self._contents = str(value)

+ 1
- 1
mwparserfromhell/nodes/heading.py 查看文件

@@ -45,7 +45,7 @@ class Heading(Node):
yield self.title, child

def __strip__(self, normalize, collapse):
return self.title
return self.title.strip_code(normalize, collapse)

def __showtree__(self, write, get, mark):
write("=" * self.level)


+ 81
- 0
mwparserfromhell/nodes/wikilink.py 查看文件

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals

from . import Node
from ..compat import str
from ..utils import parse_anything

__all__ = ["Wikilink"]

class Wikilink(Node):
"""Represents an internal wikilink, like ``[[Foo|Bar]]``."""
def __init__(self, title, text=None):
super(Wikilink, self).__init__()
self._title = title
self._text = text

def __unicode__(self):
if self.text is not None:
return "[[" + str(self.title) + "|" + str(self.text) + "]]"
return "[[" + str(self.title) + "]]"

def __iternodes__(self, getter):
yield None, self
for child in getter(self.title):
yield self.title, child
if self.text is not None:
for child in getter(self.text):
yield self.text, child

def __strip__(self, normalize, collapse):
if self.text is not None:
return self.text.strip_code(normalize, collapse)
return self.title.strip_code(normalize, collapse)

def __showtree__(self, write, get, mark):
write("[[")
get(self.title)
if self.text is not None:
write(" | ")
mark()
get(self.text)
write("]]")

@property
def title(self):
"""The title of the linked page, as a :py:class:`~.Wikicode` object."""
return self._title

@property
def text(self):
"""The text to display (if any), as a :py:class:`~.Wikicode` object."""
return self._text

@title.setter
def title(self, value):
self._title = parse_anything(value)

@text.setter
def text(self, value):
self._text = parse_anything(value)

+ 34
- 2
mwparserfromhell/parser/builder.py 查看文件

@@ -24,7 +24,8 @@ from __future__ import unicode_literals

from . import tokens
from ..compat import str
from ..nodes import Argument, Heading, HTMLEntity, Tag, Template, Text
from ..nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template,
Text, Wikilink)
from ..nodes.extras import Attribute, Parameter
from ..smart_list import SmartList
from ..wikicode import Wikicode
@@ -125,8 +126,24 @@ class Builder(object):
else:
self._write(self._handle_token(token))

def _handle_wikilink(self):
"""Handle a case where a wikilink is at the head of the tokens."""
title = None
self._push()
while self._tokens:
token = self._tokens.pop()
if isinstance(token, tokens.WikilinkSeparator):
title = self._pop()
self._push()
elif isinstance(token, tokens.WikilinkClose):
if title is not None:
return Wikilink(title, self._pop())
return Wikilink(self._pop())
else:
self._write(self._handle_token(token))

def _handle_entity(self):
"""Handle a case where a HTML entity is at the head of the tokens."""
"""Handle a case where an HTML entity is at the head of the tokens."""
token = self._tokens.pop()
if isinstance(token, tokens.HTMLEntityNumeric):
token = self._tokens.pop()
@@ -152,6 +169,17 @@ class Builder(object):
else:
self._write(self._handle_token(token))

def _handle_comment(self):
"""Handle a case where a hidden comment is at the head of the tokens."""
self._push()
while self._tokens:
token = self._tokens.pop()
if isinstance(token, tokens.CommentEnd):
contents = self._pop()
return Comment(contents)
else:
self._write(self._handle_token(token))

def _handle_attribute(self):
"""Handle a case where a tag attribute is at the head of the tokens."""
name, quoted = None, False
@@ -205,10 +233,14 @@ class Builder(object):
return self._handle_template()
elif isinstance(token, tokens.ArgumentOpen):
return self._handle_argument()
elif isinstance(token, tokens.WikilinkOpen):
return self._handle_wikilink()
elif isinstance(token, tokens.HTMLEntityStart):
return self._handle_entity()
elif isinstance(token, tokens.HeadingStart):
return self._handle_heading(token)
elif isinstance(token, tokens.CommentStart):
return self._handle_comment()
elif isinstance(token, tokens.TagOpenOpen):
return self._handle_tag(token)



+ 44
- 31
mwparserfromhell/parser/contexts.py 查看文件

@@ -35,49 +35,62 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``).

Local (stack-specific) contexts:

* :py:const:`TEMPLATE` (``0b00000000111``)
* :py:const:`TEMPLATE`

* :py:const:`TEMPLATE_NAME` (``0b00000000001``)
* :py:const:`TEMPLATE_PARAM_KEY` (``0b00000000010``)
* :py:const:`TEMPLATE_PARAM_VALUE` (``0b00000000100``)
* :py:const:`TEMPLATE_NAME`
* :py:const:`TEMPLATE_PARAM_KEY`
* :py:const:`TEMPLATE_PARAM_VALUE`

* :py:const:`ARGUMENT` (``0b00000011000``)
* :py:const:`ARGUMENT`

* :py:const:`ARGUMENT_NAME` (``0b00000001000``)
* :py:const:`ARGUMENT_DEFAULT` (``0b00000010000``)
* :py:const:`ARGUMENT_NAME`
* :py:const:`ARGUMENT_DEFAULT`

* :py:const:`HEADING` (``0b111111000``)
* :py:const:`WIKILINK`

* :py:const:`HEADING_LEVEL_1` (``0b00000100000``)
* :py:const:`HEADING_LEVEL_2` (``0b00001000000``)
* :py:const:`HEADING_LEVEL_3` (``0b00010000000``)
* :py:const:`HEADING_LEVEL_4` (``0b00100000000``)
* :py:const:`HEADING_LEVEL_5` (``0b01000000000``)
* :py:const:`HEADING_LEVEL_6` (``0b10000000000``)
* :py:const:`WIKILINK_TITLE`
* :py:const:`WIKILINK_TEXT`

* :py:const:`HEADING`

* :py:const:`HEADING_LEVEL_1`
* :py:const:`HEADING_LEVEL_2`
* :py:const:`HEADING_LEVEL_3`
* :py:const:`HEADING_LEVEL_4`
* :py:const:`HEADING_LEVEL_5`
* :py:const:`HEADING_LEVEL_6`

* :py:const:`COMMENT`

Global contexts:

* :py:const:`GL_HEADING` (``0b1``)
* :py:const:`GL_HEADING`
"""

# Local contexts:

TEMPLATE = 0b00000000111
TEMPLATE_NAME = 0b00000000001
TEMPLATE_PARAM_KEY = 0b00000000010
TEMPLATE_PARAM_VALUE = 0b00000000100

ARGUMENT = 0b00000011000
ARGUMENT_NAME = 0b00000001000
ARGUMENT_DEFAULT = 0b00000010000

HEADING = 0b11111100000
HEADING_LEVEL_1 = 0b00000100000
HEADING_LEVEL_2 = 0b00001000000
HEADING_LEVEL_3 = 0b00010000000
HEADING_LEVEL_4 = 0b00100000000
HEADING_LEVEL_5 = 0b01000000000
HEADING_LEVEL_6 = 0b10000000000
TEMPLATE = 0b00000000000111
TEMPLATE_NAME = 0b00000000000001
TEMPLATE_PARAM_KEY = 0b00000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000100

ARGUMENT = 0b00000000011000
ARGUMENT_NAME = 0b00000000001000
ARGUMENT_DEFAULT = 0b00000000010000

WIKILINK = 0b00000001100000
WIKILINK_TITLE = 0b00000000100000
WIKILINK_TEXT = 0b00000001000000

HEADING = 0b01111110000000
HEADING_LEVEL_1 = 0b00000010000000
HEADING_LEVEL_2 = 0b00000100000000
HEADING_LEVEL_3 = 0b00001000000000
HEADING_LEVEL_4 = 0b00010000000000
HEADING_LEVEL_5 = 0b00100000000000
HEADING_LEVEL_6 = 0b01000000000000

COMMENT = 0b10000000000000


# Global contexts:


+ 93
- 10
mwparserfromhell/parser/tokenizer.py 查看文件

@@ -41,8 +41,8 @@ class Tokenizer(object):
START = object()
END = object()
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
"/", "-", "\n", END]
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-\n])", flags=re.IGNORECASE)
"/", "-", "!", "\n", END]
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE)

def __init__(self):
self._text = None
@@ -83,9 +83,18 @@ class Tokenizer(object):
self._stack.append(tokens.Text(text="".join(self._textbuffer)))
self._textbuffer = []

def _pop(self):
"""Pop the current stack/context/textbuffer, returing the stack."""
def _pop(self, keep_context=False):
"""Pop the current stack/context/textbuffer, returing the stack.

If *keep_context is ``True``, then we will replace the underlying
stack's context with the current stack's.
"""
self._push_textbuffer()
if keep_context:
context = self._context
stack = self._stacks.pop()[0]
self._context = context
return stack
return self._stacks.pop()[0]

def _fail_route(self):
@@ -225,14 +234,23 @@ class Tokenizer(object):
if self._context & contexts.TEMPLATE_NAME:
self._verify_safe(["\n", "{", "}", "[", "]"])
self._context ^= contexts.TEMPLATE_NAME
if self._context & contexts.TEMPLATE_PARAM_VALUE:
elif self._context & contexts.TEMPLATE_PARAM_VALUE:
self._context ^= contexts.TEMPLATE_PARAM_VALUE
elif self._context & contexts.TEMPLATE_PARAM_KEY:
self._write_all(self._pop(keep_context=True))
self._context |= contexts.TEMPLATE_PARAM_KEY
self._write(tokens.TemplateParamSeparator())
self._push(self._context)

def _handle_template_param_value(self):
"""Handle a template parameter's value at the head of the string."""
self._verify_safe(["\n", "{{", "}}"])
try:
self._verify_safe(["\n", "{{", "}}"])
except BadRoute:
self._pop()
raise
else:
self._write_all(self._pop(keep_context=True))
self._context ^= contexts.TEMPLATE_PARAM_KEY
self._context |= contexts.TEMPLATE_PARAM_VALUE
self._write(tokens.TemplateParamEquals())
@@ -241,6 +259,8 @@ class Tokenizer(object):
"""Handle the end of a template at the head of the string."""
if self._context & contexts.TEMPLATE_NAME:
self._verify_safe(["\n", "{", "}", "[", "]"])
elif self._context & contexts.TEMPLATE_PARAM_KEY:
self._write_all(self._pop(keep_context=True))
self._head += 1
return self._pop()

@@ -258,6 +278,34 @@ class Tokenizer(object):
self._head += 2
return self._pop()

def _parse_wikilink(self):
"""Parse an internal wikilink at the head of the wikicode string."""
self._head += 2
reset = self._head - 1
try:
wikilink = self._parse(contexts.WIKILINK_TITLE)
except BadRoute:
self._head = reset
self._write_text("[[")
else:
self._write(tokens.WikilinkOpen())
self._write_all(wikilink)
self._write(tokens.WikilinkClose())

def _handle_wikilink_separator(self):
"""Handle the separator between a wikilink's title and its text."""
self._verify_safe(["\n", "{", "}", "[", "]"])
self._context ^= contexts.WIKILINK_TITLE
self._context |= contexts.WIKILINK_TEXT
self._write(tokens.WikilinkSeparator())

def _handle_wikilink_end(self):
"""Handle the end of a wikilink at the head of the string."""
if self._context & contexts.WIKILINK_TITLE:
self._verify_safe(["\n", "{", "}", "[", "]"])
self._head += 1
return self._pop()

def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string."""
self._global |= contexts.GL_HEADING
@@ -307,7 +355,7 @@ class Tokenizer(object):
return self._pop(), after_level

def _really_parse_entity(self):
"""Actually parse a HTML entity and ensure that it is valid."""
"""Actually parse an HTML entity and ensure that it is valid."""
self._write(tokens.HTMLEntityStart())
self._head += 1

@@ -349,7 +397,7 @@ class Tokenizer(object):
self._write(tokens.HTMLEntityEnd())

def _parse_entity(self):
"""Parse a HTML entity at the head of the wikicode string."""
"""Parse an HTML entity at the head of the wikicode string."""
reset = self._head
self._push()
try:
@@ -360,6 +408,21 @@ class Tokenizer(object):
else:
self._write_all(self._pop())

def _parse_comment(self):
"""Parse an HTML comment at the head of the wikicode string."""
self._head += 4
reset = self._head - 1
try:
comment = self._parse(contexts.COMMENT)
except BadRoute:
self._head = reset
self._write_text("<!--")
else:
self._write(tokens.CommentStart())
self._write_all(comment)
self._write(tokens.CommentEnd())
self._head += 2

def _parse(self, context=0):
"""Parse the wikicode string, using *context* for when to stop."""
self._push(context)
@@ -370,12 +433,18 @@ class Tokenizer(object):
self._head += 1
continue
if this is self.END:
fail = contexts.TEMPLATE | contexts.ARGUMENT | contexts.HEADING
fail = (contexts.TEMPLATE | contexts.ARGUMENT |
contexts.HEADING | contexts.COMMENT)
if self._context & fail:
self._fail_route()
return self._pop()
next = self._read(1)
if this == next == "{":
if self._context & contexts.COMMENT:
if this == next == "-" and self._read(2) == ">":
return self._pop()
else:
self._write_text(this)
elif this == next == "{":
self._parse_template_or_argument()
elif this == "|" and self._context & contexts.TEMPLATE:
self._handle_template_param()
@@ -390,6 +459,15 @@ class Tokenizer(object):
return self._handle_argument_end()
else:
self._write_text("}")
elif this == next == "[":
if not self._context & contexts.WIKILINK_TITLE:
self._parse_wikilink()
else:
self._write_text("[")
elif this == "|" and self._context & contexts.WIKILINK_TITLE:
self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end()
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()
@@ -401,6 +479,11 @@ class Tokenizer(object):
self._fail_route()
elif this == "&":
self._parse_entity()
elif this == "<" and next == "!":
if self._read(2) == self._read(3) == "-":
self._parse_comment()
else:
self._write_text(this)
else:
self._write_text(this)
self._head += 1


+ 8
- 0
mwparserfromhell/parser/tokens.py 查看文件

@@ -63,6 +63,7 @@ class Token(object):
def __delattr__(self, key):
del self._kwargs[key]


def make(name):
"""Create a new Token class using ``type()`` and add it to ``__all__``."""
__all__.append(name)
@@ -79,6 +80,10 @@ ArgumentOpen = make("ArgumentOpen") # {{{
ArgumentSeparator = make("ArgumentSeparator") # |
ArgumentClose = make("ArgumentClose") # }}}

WikilinkOpen = make("WikilinkOpen") # [[
WikilinkSeparator = make("WikilinkSeparator") # |
WikilinkClose = make("WikilinkClose") # ]]

HTMLEntityStart = make("HTMLEntityStart") # &
HTMLEntityNumeric = make("HTMLEntityNumeric") # #
HTMLEntityHex = make("HTMLEntityHex") # x
@@ -87,6 +92,9 @@ HTMLEntityEnd = make("HTMLEntityEnd") # ;
HeadingStart = make("HeadingStart") # =...
HeadingEnd = make("HeadingEnd") # =...

CommentStart = make("CommentStart") # <!--
CommentEnd = make("CommentEnd") # -->

TagOpenOpen = make("TagOpenOpen") # <
TagAttrStart = make("TagAttrStart")
TagAttrEquals = make("TagAttrEquals") # =


+ 17
- 1
mwparserfromhell/wikicode.py 查看文件

@@ -24,7 +24,7 @@ from __future__ import unicode_literals
import re

from .compat import maxsize, str
from .nodes import Heading, Node, Tag, Template, Text
from .nodes import Heading, Node, Tag, Template, Text, Wikilink
from .string_mixin import StringMixIn
from .utils import parse_anything

@@ -303,6 +303,14 @@ class Wikicode(StringMixIn):
if not matches or re.search(matches, str(node), flags):
yield node

def ifilter_links(self, recursive=False, matches=None, flags=FLAGS):
"""Iterate over wikilink nodes.

This is equivalent to :py:meth:`ifilter` with *forcetype* set to
:py:class:`~.Wikilink`.
"""
return self.ifilter(recursive, matches, flags, forcetype=Wikilink)

def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
"""Iterate over template nodes.

@@ -335,6 +343,14 @@ class Wikicode(StringMixIn):
"""
return list(self.ifilter(recursive, matches, flags, forcetype))

def filter_links(self, recursive=False, matches=None, flags=FLAGS):
"""Return a list of wikilink nodes.

This is equivalent to calling :py:func:`list` on
:py:meth:`ifilter_links`.
"""
return list(self.ifilter_links(recursive, matches, flags))

def filter_templates(self, recursive=False, matches=None, flags=FLAGS):
"""Return a list of template nodes.



Loading…
取消
儲存