瀏覽代碼

Implement Heading, HTMLTag, HTMLTagAttribute, plus some fixes.

tags/v0.1
Ben Kurtovic 12 年之前
父節點
當前提交
fca7e9dd80
共有 7 個文件被更改,包括 250 次插入8 次删除
  1. +2
    -0
      mwparserfromhell/nodes/__init__.py
  2. +1
    -0
      mwparserfromhell/nodes/extras/__init__.py
  3. +50
    -0
      mwparserfromhell/nodes/extras/attribute.py
  4. +1
    -1
      mwparserfromhell/nodes/extras/parameter.py
  5. +41
    -0
      mwparserfromhell/nodes/heading.py
  6. +98
    -0
      mwparserfromhell/nodes/tag.py
  7. +57
    -7
      mwparserfromhell/wikicode.py

+ 2
- 0
mwparserfromhell/nodes/__init__.py 查看文件

@@ -29,5 +29,7 @@ class Node(StringMixIn):

from mwparserfromhell.nodes import extras
from mwparserfromhell.nodes.text import Text
from mwparserfromhell.nodes.heading import Heading
from mwparserfromhell.nodes.html_entity import HTMLEntity
from mwparserfromhell.nodes.tag import Tag
from mwparserfromhell.nodes.template import Template

+ 1
- 0
mwparserfromhell/nodes/extras/__init__.py 查看文件

@@ -20,4 +20,5 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.nodes.extras.attribute import Attribute
from mwparserfromhell.nodes.extras.parameter import Parameter

+ 50
- 0
mwparserfromhell/nodes/extras/attribute.py 查看文件

@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.string_mixin import StringMixIn

__all__ = ["Attribute"]

class Attribute(StringMixIn):
def __init__(self, name, value=None, quoted=True):
self._name = name
self._value = value
self._quoted = quoted

def __unicode__(self):
if self.value:
if self.quoted:
return unicode(self.name) + '="' + unicode(self.value) + '"'
return unicode(self.name) + "=" + unicode(self.value)
return unicode(self.name)

@property
def name(self):
return self._name

@property
def value(self):
return self._value

@property
def quoted(self):
return self._quoted

+ 1
- 1
mwparserfromhell/nodes/extras/parameter.py 查看文件

@@ -26,7 +26,7 @@ from mwparserfromhell.utils import parse_anything
__all__ = ["Parameter"]

class Parameter(StringMixIn):
def __init__(self, name, value=None, showkey=True):
def __init__(self, name, value, showkey=True):
self._name = name
self._value = value
self._showkey = showkey


+ 41
- 0
mwparserfromhell/nodes/heading.py 查看文件

@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.nodes import Node

__all__ = ["Heading"]

class Heading(Node):
def __init__(self, title, level):
self._title = title
self._level = level

def __unicode__(self):
return ("=" * self.level) + self.title + ("=" * self.level)

@property
def title(self):
return self._title

@property
def level(self):
return self._level

+ 98
- 0
mwparserfromhell/nodes/tag.py 查看文件

@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.nodes import Node
from mwparserfromhell.nodes.extras import Attribute

__all__ = ["Tag"]

class Tag(Node):
TAG_UNKNOWN = 0
TAG_BOLD = 1
TAG_ITALIC = 2

TAG_REF

TAG_MISC_HTML = 99

TAGS_VISIBLE = []
TAGS_INVISIBLE = []

def __init__(self, type_, tag, contents, attrs=None, showtag=True,
self_closing=False, open_padding=0, close_padding=0):
self._type = type_
self._tag = tag
self._contents = contents
if attrs:
self._attrs = attrs
else:
self._attrs = []
self._showtag = showtag
self._self_closing = self_closing
self._open_padding = open_padding
self._close_padding = close_padding

def __unicode__(self):
if not self.showtag:
raise NotImplementedError()

result = "<" + unicode(self.tag)
if self.attrs:
result += " " + u" ".join([unicode(attr) for attr in self.attrs])
if self.self_closing:
result += " " * self.open_padding + "/>"
else:
result += " " * self.open_padding + ">" + unicode(self.contents)
result += "</" + unicode(self.tag) + " " * self.close_padding + ">"
return result

@property
def type(self):
return self._type

@property
def tag(self):
return self._tag

@property
def contents(self):
return self._contents

@property
def attrs(self):
return self._attrs

@property
def showtag(self):
return self._showtag

@property
def self_closing(self):
return self._self_closing

@property
def open_padding(self):
return self._open_padding

@property
def close_padding(self):
return self._close_padding

+ 57
- 7
mwparserfromhell/wikicode.py 查看文件

@@ -22,7 +22,9 @@

import re

from mwparserfromhell.nodes import HTMLEntity, Node, Template, Text
from mwparserfromhell.nodes import (
Heading, HTMLEntity, Node, Tag, Template, Text
)
from mwparserfromhell.string_mixin import StringMixIn
from mwparserfromhell.utils import parse_anything

@@ -39,7 +41,22 @@ class Wikicode(StringMixIn):

def _iterate_over_children(self, node):
yield (None, node)
if isinstance(node, Template):
if isinstance(node, Heading):
for child in self._get_all_nodes(node.title):
yield (node.title, child)
elif isinstance(node, Tag):
if node.showtag:
for child in self._get_all_nodes(node.tag):
yield (node.tag, tag)
for attr in node.attrs:
for child in self._get_all_nodes(attr.name):
yield (attr.name, child)
if attr.value:
for child in self._get_all_nodes(attr.value):
yield (attr.value, child)
for child in self._get_all_nodes(node.contents):
yield (node.contents, child)
elif isinstance(node, Template):
for child in self._get_all_nodes(node.name):
yield (node.name, child)
for param in node.params:
@@ -103,11 +120,38 @@ class Wikicode(StringMixIn):
last = lines.pop()
lines.append(last + " ".join(args))
else:
lines.append(" " * indent + " ".join(args))
lines.append(" " * 6 * indent + " ".join(args))

for node in code.nodes:
if isinstance(node, Template):
write("{{", )
if isinstance(node, Heading):
write("=" * node.level)
self._get_tree(node.title, lines, marker, indent + 1)
write("=" * node.level)
elif isinstance(node, Tag):
tagnodes = node.tag.nodes
if (not node.attrs and len(tagnodes) == 1 and
isinstance(tagnodes[0], Text)):
write("<" + unicode(tagnodes[0]) + ">")
else:
write("<")
self._get_tree(node.tag, lines, marker, indent + 1)
for attr in node.attrs:
self._get_tree(attr.name, lines, marker, indent + 1)
if not attr.value:
continue
write(" = ")
lines.append(marker) # Continue from this line
self._get_tree(attr.value, lines, marker, indent + 1)
write(">")
self._get_tree(node.contents, lines, marker, indent + 1)
if len(tagnodes) == 1 and isinstance(tagnodes[0], Text):
write("</" + unicode(tagnodes[0]) + ">")
else:
write("</")
self._get_tree(node.tag, lines, marker, indent + 1)
write(">")
elif isinstance(node, Template):
write("{{")
self._get_tree(node.name, lines, marker, indent + 1)
for param in node.params:
write(" | ")
@@ -209,14 +253,20 @@ class Wikicode(StringMixIn):
def strip_code(self, normalize=True, collapse=True):
nodes = []
for node in self.nodes:
if isinstance(node, Text):
nodes.append(node)
if isinstance(node, Heading):
nodes.append(child.title)
elif isinstance(node, HTMLEntity):
if normalize:
nodes.append(node.normalize())
else:
nodes.append(node)
elif isinstance(node, Tag):
if node.type in node.TAGS_VISIBLE:
nodes.append(node.contents.strip_code(normalize, collapse))
elif isinstance(node, Text):
nodes.append(node)

nodes = map(unicode, nodes)
if collapse:
stripped = u"".join(nodes).strip("\n")
while "\n\n\n" in stripped:


Loading…
取消
儲存