From fca7e9dd80e7a815556ced4b2a38d41cad450f83 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 27 Jul 2012 17:01:41 -0400 Subject: [PATCH] Implement Heading, HTMLTag, HTMLTagAttribute, plus some fixes. --- mwparserfromhell/nodes/__init__.py | 2 + mwparserfromhell/nodes/extras/__init__.py | 1 + mwparserfromhell/nodes/extras/attribute.py | 50 +++++++++++++++ mwparserfromhell/nodes/extras/parameter.py | 2 +- mwparserfromhell/nodes/heading.py | 41 +++++++++++++ mwparserfromhell/nodes/tag.py | 98 ++++++++++++++++++++++++++++++ mwparserfromhell/wikicode.py | 64 ++++++++++++++++--- 7 files changed, 250 insertions(+), 8 deletions(-) create mode 100644 mwparserfromhell/nodes/extras/attribute.py create mode 100644 mwparserfromhell/nodes/heading.py create mode 100644 mwparserfromhell/nodes/tag.py diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 6f4a471..2d5d1c2 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -29,5 +29,7 @@ class Node(StringMixIn): from mwparserfromhell.nodes import extras from mwparserfromhell.nodes.text import Text +from mwparserfromhell.nodes.heading import Heading from mwparserfromhell.nodes.html_entity import HTMLEntity +from mwparserfromhell.nodes.tag import Tag from mwparserfromhell.nodes.template import Template diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 8a52268..4677456 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -20,4 +20,5 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from mwparserfromhell.nodes.extras.attribute import Attribute from mwparserfromhell.nodes.extras.parameter import Parameter diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py new file mode 100644 index 0000000..5e1ea9c --- /dev/null +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from mwparserfromhell.string_mixin import StringMixIn + +__all__ = ["Attribute"] + +class Attribute(StringMixIn): + def __init__(self, name, value=None, quoted=True): + self._name = name + self._value = value + self._quoted = quoted + + def __unicode__(self): + if self.value: + if self.quoted: + return unicode(self.name) + '="' + unicode(self.value) + '"' + return unicode(self.name) + "=" + unicode(self.value) + return unicode(self.name) + + @property + def name(self): + return self._name + + @property + def value(self): + return self._value + + @property + def quoted(self): + return self._quoted diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 45051d9..56eb064 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -26,7 +26,7 @@ from mwparserfromhell.utils import parse_anything __all__ = ["Parameter"] class Parameter(StringMixIn): - def __init__(self, name, value=None, showkey=True): + def __init__(self, name, value, showkey=True): self._name = name self._value = value self._showkey = showkey diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py new file mode 100644 index 0000000..71181e6 --- /dev/null +++ b/mwparserfromhell/nodes/heading.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from mwparserfromhell.nodes import Node + +__all__ = ["Heading"] + +class Heading(Node): + def __init__(self, title, level): + self._title = title + self._level = level + + def __unicode__(self): + return ("=" * self.level) + self.title + ("=" * self.level) + + @property + def title(self): + return self._title + + @property + def level(self): + return self._level diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py new file mode 100644 index 0000000..7a21ee3 --- /dev/null +++ b/mwparserfromhell/nodes/tag.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from mwparserfromhell.nodes import Node +from mwparserfromhell.nodes.extras import Attribute + +__all__ = ["Tag"] + +class Tag(Node): + TAG_UNKNOWN = 0 + TAG_BOLD = 1 + TAG_ITALIC = 2 + + TAG_REF + + TAG_MISC_HTML = 99 + + TAGS_VISIBLE = [] + TAGS_INVISIBLE = [] + + def __init__(self, type_, tag, contents, attrs=None, showtag=True, + self_closing=False, open_padding=0, close_padding=0): + self._type = type_ + self._tag = tag + self._contents = contents + if attrs: + self._attrs = attrs + else: + self._attrs = [] + self._showtag = showtag + self._self_closing = self_closing + self._open_padding = open_padding + self._close_padding = close_padding + + def __unicode__(self): + if not self.showtag: + raise NotImplementedError() + + result = "<" + unicode(self.tag) + if self.attrs: + result += " " + u" ".join([unicode(attr) for attr in self.attrs]) + if self.self_closing: + result += " " * self.open_padding + "/>" + else: + result += " " * self.open_padding + ">" + unicode(self.contents) + result += "" + return result + + @property + def type(self): + return self._type + + @property + def tag(self): + return self._tag + + @property + def contents(self): + return self._contents + + @property + def attrs(self): + return self._attrs + + @property + def showtag(self): + return self._showtag + + @property + def self_closing(self): + return self._self_closing + + @property + def open_padding(self): + return self._open_padding + + @property + def close_padding(self): + return self._close_padding diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index c49d30f..044d635 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -22,7 +22,9 @@ import re -from mwparserfromhell.nodes import HTMLEntity, Node, Template, Text +from mwparserfromhell.nodes import ( + Heading, HTMLEntity, Node, Tag, Template, Text +) from mwparserfromhell.string_mixin import StringMixIn from mwparserfromhell.utils import parse_anything @@ -39,7 +41,22 @@ class Wikicode(StringMixIn): def _iterate_over_children(self, node): yield (None, node) - if isinstance(node, Template): + if isinstance(node, Heading): + for child in self._get_all_nodes(node.title): + yield (node.title, child) + elif isinstance(node, Tag): + if node.showtag: + for child in self._get_all_nodes(node.tag): + yield (node.tag, tag) + for attr in node.attrs: + for child in self._get_all_nodes(attr.name): + yield (attr.name, child) + if attr.value: + for child in self._get_all_nodes(attr.value): + yield (attr.value, child) + for child in self._get_all_nodes(node.contents): + yield (node.contents, child) + elif isinstance(node, Template): for child in self._get_all_nodes(node.name): yield (node.name, child) for param in node.params: @@ -103,11 +120,38 @@ class Wikicode(StringMixIn): last = lines.pop() lines.append(last + " ".join(args)) else: - lines.append(" " * indent + " ".join(args)) + lines.append(" " * 6 * indent + " ".join(args)) for node in code.nodes: - if isinstance(node, Template): - write("{{", ) + if isinstance(node, Heading): + write("=" * node.level) + self._get_tree(node.title, lines, marker, indent + 1) + write("=" * node.level) + elif isinstance(node, Tag): + tagnodes = node.tag.nodes + if (not node.attrs and len(tagnodes) == 1 and + isinstance(tagnodes[0], Text)): + write("<" + unicode(tagnodes[0]) + ">") + else: + write("<") + self._get_tree(node.tag, lines, marker, indent + 1) + for attr in node.attrs: + self._get_tree(attr.name, lines, marker, indent + 1) + if not attr.value: + continue + write(" = ") + lines.append(marker) # Continue from this line + self._get_tree(attr.value, lines, marker, indent + 1) + write(">") + self._get_tree(node.contents, lines, marker, indent + 1) + if len(tagnodes) == 1 and isinstance(tagnodes[0], Text): + write("") + else: + write("") + elif isinstance(node, Template): + write("{{") self._get_tree(node.name, lines, marker, indent + 1) for param in node.params: write(" | ") @@ -209,14 +253,20 @@ class Wikicode(StringMixIn): def strip_code(self, normalize=True, collapse=True): nodes = [] for node in self.nodes: - if isinstance(node, Text): - nodes.append(node) + if isinstance(node, Heading): + nodes.append(child.title) elif isinstance(node, HTMLEntity): if normalize: nodes.append(node.normalize()) else: nodes.append(node) + elif isinstance(node, Tag): + if node.type in node.TAGS_VISIBLE: + nodes.append(node.contents.strip_code(normalize, collapse)) + elif isinstance(node, Text): + nodes.append(node) + nodes = map(unicode, nodes) if collapse: stripped = u"".join(nodes).strip("\n") while "\n\n\n" in stripped: