Browse Source

Support single and single-only tags like <br>.

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
7d1a28a249
4 changed files with 81 additions and 36 deletions
  1. +53
    -26
      mwparserfromhell/nodes/tag.py
  2. +14
    -9
      mwparserfromhell/parser/builder.py
  3. +4
    -0
      mwparserfromhell/parser/tokens.py
  4. +10
    -1
      mwparserfromhell/tag_defs.py

+ 53
- 26
mwparserfromhell/nodes/tag.py View File

@@ -33,20 +33,20 @@ class Tag(Node):
"""Represents an HTML-style tag in wikicode, like ``<ref>``."""

def __init__(self, tag, contents=None, attrs=None, showtag=True,
self_closing=False, padding="", closing_tag=None):
self_closing=False, invalid=False, implicit=False, padding="",
closing_tag=None):
super(Tag, self).__init__()
self._tag = tag
self._contents = contents
if attrs:
self._attrs = attrs
else:
self._attrs = []
self._attrs = attrs if attrs else []
self._showtag = showtag
self._self_closing = self_closing
self._invalid = invalid
self._implicit = implicit
self._padding = padding
if closing_tag:
self._closing_tag = closing_tag
else:
elif not self_closing:
self._closing_tag = tag

def __unicode__(self):
@@ -57,11 +57,11 @@ class Tag(Node):
else:
return open_ + str(self.contents) + close

result = "<" + str(self.tag)
result = ("</" if self.invalid else "<") + str(self.tag)
if self.attributes:
result += "".join([str(attr) for attr in self.attributes])
if self.self_closing:
result += self.padding + "/>"
result += self.padding + (">" if self.implicit else "/>")
else:
result += self.padding + ">" + str(self.contents)
result += "</" + str(self.closing_tag) + ">"
@@ -81,6 +81,9 @@ class Tag(Node):
if self.contents:
for child in getter(self.contents):
yield self.contents, child
if not self.self_closing and self.closing_tag:
for child in getter(self.closing_tag):
yield self.closing_tag, child

def __strip__(self, normalize, collapse):
if is_visible(self.tag):
@@ -88,27 +91,22 @@ class Tag(Node):
return None

def __showtree__(self, write, get, mark):
tagnodes = self.tag.nodes
if not self.attributes and (len(tagnodes) == 1 and
isinstance(tagnodes[0], Text)):
write("<" + str(tagnodes[0]) + ">")
write("</" if self.invalid else "<")
get(self.tag)
for attr in self.attributes:
get(attr.name)
if not attr.value:
continue
write(" = ")
mark()
get(attr.value)
if self.self_closing:
write(">" if self.implicit else "/>")
else:
write("<")
get(self.tag)
for attr in self.attributes:
get(attr.name)
if not attr.value:
continue
write(" = ")
mark()
get(attr.value)
write(">")
get(self.contents)
if len(tagnodes) == 1 and isinstance(tagnodes[0], Text):
write("</" + str(tagnodes[0]) + ">")
else:
get(self.contents)
write("</")
get(self.tag)
get(self.closing_tag)
write(">")

@property
@@ -140,6 +138,27 @@ class Tag(Node):
return self._self_closing

@property
def invalid(self):
"""Whether the tag starts with a backslash after the opening bracket.

This makes the tag look like a lone close tag. It is technically
invalid and is only parsable Wikicode when the tag itself is
single-only, like ``<br>`` and ``<img>``. See
:py:func:`tag_defs.is_single_only`.
"""
return self._invalid

@property
def implicit(self):
"""Whether the tag is implicitly self-closing, with no ending slash.

This is only possible for specific "single" tags like ``<br>`` and
``<li>``. See :py:func:`tag_defs.is_single`. This field only has an
effect if :py:attr:`self_closing` is also ``True``.
"""
return self._implicit

@property
def padding(self):
"""Spacing to insert before the first closing ``>``."""
return self._padding
@@ -169,6 +188,14 @@ class Tag(Node):
def self_closing(self, value):
self._self_closing = bool(value)

@invalid.setter
def invalid(self, value):
self._invalid = bool(value)

@implicit.setter
def implicit(self, value):
self._implicit = bool(value)

@padding.setter
def padding(self, value):
self._padding = str(value)


+ 14
- 9
mwparserfromhell/parser/builder.py View File

@@ -205,8 +205,9 @@ class Builder(object):

def _handle_tag(self, token):
"""Handle a case where a tag is at the head of the tokens."""
showtag = token.showtag
attrs = []
showtag, invalid = token.showtag, token.get("invalid", False)
implicit, attrs, contents, closing_tag = False, [], None, None
close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose)
self._push()
while self._tokens:
token = self._tokens.pop()
@@ -216,16 +217,20 @@ class Builder(object):
padding = token.padding
tag = self._pop()
self._push()
elif isinstance(token, tokens.TagCloseSelfclose):
tag = self._pop()
return Tag(tag, attrs=attrs, showtag=showtag,
self_closing=True, padding=token.padding)
elif isinstance(token, tokens.TagOpenClose):
contents = self._pop()
self._push()
elif isinstance(token, tokens.TagCloseClose):
return Tag(tag, contents, attrs, showtag, False, padding,
self._pop())
elif isinstance(token, close_tokens):
if isinstance(token, tokens.TagCloseSelfclose):
tag = self._pop()
self_closing = True
padding = token.padding
implicit = token.get("implicit", False)
else:
self_closing = False
closing_tag = self._pop()
return Tag(tag, contents, attrs, showtag, self_closing,
invalid, implicit, padding, closing_tag)
else:
self._write(self._handle_token(token))



+ 4
- 0
mwparserfromhell/parser/tokens.py View File

@@ -63,6 +63,10 @@ class Token(object):
def __delattr__(self, key):
del self._kwargs[key]

def get(self, key, default=None):
"""Same as :py:meth:`__getattr__`, but has a *default* if missing."""
return self._kwargs.get(key, default)


def make(name):
"""Create a new Token class using ``type()`` and add it to ``__all__``."""


+ 10
- 1
mwparserfromhell/tag_defs.py View File

@@ -24,7 +24,8 @@

from __future__ import unicode_literals

__all__ = ["get_wikicode", "is_parsable", "is_visible"]
__all__ = ["get_wikicode", "is_parsable", "is_visible", "is_single",
"is_single_only"]

PARSER_BLACKLIST = [
# enwiki extensions @ 2013-06-28
@@ -65,3 +66,11 @@ def is_parsable(tag):
def is_visible(tag):
"""Return whether or not the given *tag* contains visible text."""
return tag.lower() not in INVISIBLE_TAGS

def is_single(tag):
"""Return whether or not the given *tag* can exist without a close tag."""
return tag.lower() in SINGLE

def is_single_only(tag):
"""Return whether or not the given *tag* must exist without a close tag."""
return tag.lower() in SINGLE_ONLY

Loading…
Cancel
Save