From 7d1a28a249d9c4e0dedc406154a1482a40fed9a2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 9 Jul 2013 19:38:34 -0400 Subject: [PATCH] Support single and single-only tags like
. --- mwparserfromhell/nodes/tag.py | 79 +++++++++++++++++++++++++------------- mwparserfromhell/parser/builder.py | 23 ++++++----- mwparserfromhell/parser/tokens.py | 4 ++ mwparserfromhell/tag_defs.py | 11 +++++- 4 files changed, 81 insertions(+), 36 deletions(-) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 76b412c..dc78b34 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -33,20 +33,20 @@ class Tag(Node): """Represents an HTML-style tag in wikicode, like ````.""" def __init__(self, tag, contents=None, attrs=None, showtag=True, - self_closing=False, padding="", closing_tag=None): + self_closing=False, invalid=False, implicit=False, padding="", + closing_tag=None): super(Tag, self).__init__() self._tag = tag self._contents = contents - if attrs: - self._attrs = attrs - else: - self._attrs = [] + self._attrs = attrs if attrs else [] self._showtag = showtag self._self_closing = self_closing + self._invalid = invalid + self._implicit = implicit self._padding = padding if closing_tag: self._closing_tag = closing_tag - else: + elif not self_closing: self._closing_tag = tag def __unicode__(self): @@ -57,11 +57,11 @@ class Tag(Node): else: return open_ + str(self.contents) + close - result = "<" + str(self.tag) + result = ("" + result += self.padding + (">" if self.implicit else "/>") else: result += self.padding + ">" + str(self.contents) result += "" @@ -81,6 +81,9 @@ class Tag(Node): if self.contents: for child in getter(self.contents): yield self.contents, child + if not self.self_closing and self.closing_tag: + for child in getter(self.closing_tag): + yield self.closing_tag, child def __strip__(self, normalize, collapse): if is_visible(self.tag): @@ -88,27 +91,22 @@ class Tag(Node): return None def __showtree__(self, write, get, mark): - tagnodes = self.tag.nodes - if not self.attributes and (len(tagnodes) == 1 and - isinstance(tagnodes[0], Text)): - write("<" + str(tagnodes[0]) + ">") + write("" if self.implicit else "/>") else: - write("<") - get(self.tag) - for attr in self.attributes: - get(attr.name) - if not attr.value: - continue - write(" = ") - mark() - get(attr.value) write(">") - get(self.contents) - if len(tagnodes) == 1 and isinstance(tagnodes[0], Text): - write("") - else: + get(self.contents) write("") @property @@ -140,6 +138,27 @@ class Tag(Node): return self._self_closing @property + def invalid(self): + """Whether the tag starts with a backslash after the opening bracket. + + This makes the tag look like a lone close tag. It is technically + invalid and is only parsable Wikicode when the tag itself is + single-only, like ``
`` and ````. See + :py:func:`tag_defs.is_single_only`. + """ + return self._invalid + + @property + def implicit(self): + """Whether the tag is implicitly self-closing, with no ending slash. + + This is only possible for specific "single" tags like ``
`` and + ``
  • ``. See :py:func:`tag_defs.is_single`. This field only has an + effect if :py:attr:`self_closing` is also ``True``. + """ + return self._implicit + + @property def padding(self): """Spacing to insert before the first closing ``>``.""" return self._padding @@ -169,6 +188,14 @@ class Tag(Node): def self_closing(self, value): self._self_closing = bool(value) + @invalid.setter + def invalid(self, value): + self._invalid = bool(value) + + @implicit.setter + def implicit(self, value): + self._implicit = bool(value) + @padding.setter def padding(self, value): self._padding = str(value) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index d92b845..6d31060 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -205,8 +205,9 @@ class Builder(object): def _handle_tag(self, token): """Handle a case where a tag is at the head of the tokens.""" - showtag = token.showtag - attrs = [] + showtag, invalid = token.showtag, token.get("invalid", False) + implicit, attrs, contents, closing_tag = False, [], None, None + close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) self._push() while self._tokens: token = self._tokens.pop() @@ -216,16 +217,20 @@ class Builder(object): padding = token.padding tag = self._pop() self._push() - elif isinstance(token, tokens.TagCloseSelfclose): - tag = self._pop() - return Tag(tag, attrs=attrs, showtag=showtag, - self_closing=True, padding=token.padding) elif isinstance(token, tokens.TagOpenClose): contents = self._pop() self._push() - elif isinstance(token, tokens.TagCloseClose): - return Tag(tag, contents, attrs, showtag, False, padding, - self._pop()) + elif isinstance(token, close_tokens): + if isinstance(token, tokens.TagCloseSelfclose): + tag = self._pop() + self_closing = True + padding = token.padding + implicit = token.get("implicit", False) + else: + self_closing = False + closing_tag = self._pop() + return Tag(tag, contents, attrs, showtag, self_closing, + invalid, implicit, padding, closing_tag) else: self._write(self._handle_token(token)) diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index b11ca15..f3d89fc 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -63,6 +63,10 @@ class Token(object): def __delattr__(self, key): del self._kwargs[key] + def get(self, key, default=None): + """Same as :py:meth:`__getattr__`, but has a *default* if missing.""" + return self._kwargs.get(key, default) + def make(name): """Create a new Token class using ``type()`` and add it to ``__all__``.""" diff --git a/mwparserfromhell/tag_defs.py b/mwparserfromhell/tag_defs.py index 369692b..73493d3 100644 --- a/mwparserfromhell/tag_defs.py +++ b/mwparserfromhell/tag_defs.py @@ -24,7 +24,8 @@ from __future__ import unicode_literals -__all__ = ["get_wikicode", "is_parsable", "is_visible"] +__all__ = ["get_wikicode", "is_parsable", "is_visible", "is_single", + "is_single_only"] PARSER_BLACKLIST = [ # enwiki extensions @ 2013-06-28 @@ -65,3 +66,11 @@ def is_parsable(tag): def is_visible(tag): """Return whether or not the given *tag* contains visible text.""" return tag.lower() not in INVISIBLE_TAGS + +def is_single(tag): + """Return whether or not the given *tag* can exist without a close tag.""" + return tag.lower() in SINGLE + +def is_single_only(tag): + """Return whether or not the given *tag* must exist without a close tag.""" + return tag.lower() in SINGLE_ONLY