@@ -24,18 +24,17 @@ from __future__ import unicode_literals | |||
from . import Node, Text | |||
from ..compat import str | |||
from ..tag_defs import TagDefinitions | |||
from ..tag_defs import get_wikicode, is_visible | |||
from ..utils import parse_anything | |||
__all__ = ["Tag"] | |||
class Tag(TagDefinitions, Node): | |||
class Tag(Node): | |||
"""Represents an HTML-style tag in wikicode, like ``<ref>``.""" | |||
def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | |||
def __init__(self, tag, contents=None, attrs=None, showtag=True, | |||
self_closing=False, padding="", closing_tag=None): | |||
super(Tag, self).__init__() | |||
self._type = type_ | |||
self._tag = tag | |||
self._contents = contents | |||
if attrs: | |||
@@ -52,7 +51,7 @@ class Tag(TagDefinitions, Node): | |||
def __unicode__(self): | |||
if not self.showtag: | |||
open_, close = self.WIKICODE[self.type] | |||
open_, close = get_wikicode[self.tag] | |||
if self.self_closing: | |||
return open_ | |||
else: | |||
@@ -84,7 +83,7 @@ class Tag(TagDefinitions, Node): | |||
yield self.contents, child | |||
def __strip__(self, normalize, collapse): | |||
if self.type in self.TAGS_VISIBLE: | |||
if is_visible(self.tag): | |||
return self.contents.strip_code(normalize, collapse) | |||
return None | |||
@@ -113,11 +112,6 @@ class Tag(TagDefinitions, Node): | |||
write(">") | |||
@property | |||
def type(self): | |||
"""The tag type.""" | |||
return self._type | |||
@property | |||
def tag(self): | |||
"""The tag itself, as a :py:class:`~.Wikicode` object.""" | |||
return self._tag | |||
@@ -159,23 +153,9 @@ class Tag(TagDefinitions, Node): | |||
""" | |||
return self._closing_tag | |||
@type.setter | |||
def type(self, value): | |||
value = int(value) | |||
if value not in self.TAGS_ALL: | |||
raise ValueError(value) | |||
self._type = value | |||
for key in self.TRANSLATIONS: | |||
if self.TRANSLATIONS[key] == value: | |||
self._tag = self._closing_tag = parse_anything(key) | |||
@tag.setter | |||
def tag(self, value): | |||
self._tag = self._closing_tag = parse_anything(value) | |||
try: | |||
self._type = self.TRANSLATIONS[text] | |||
except KeyError: | |||
self._type = self.TAG_UNKNOWN | |||
@contents.setter | |||
def contents(self, value): | |||
@@ -202,7 +202,7 @@ class Builder(object): | |||
def _handle_tag(self, token): | |||
"""Handle a case where a tag is at the head of the tokens.""" | |||
type_, showtag = token.type, token.showtag | |||
showtag = token.showtag | |||
attrs = [] | |||
self._push() | |||
while self._tokens: | |||
@@ -215,14 +215,14 @@ class Builder(object): | |||
self._push() | |||
elif isinstance(token, tokens.TagCloseSelfclose): | |||
tag = self._pop() | |||
return Tag(type_, tag, attrs=attrs, showtag=showtag, | |||
return Tag(tag, attrs=attrs, showtag=showtag, | |||
self_closing=True, padding=token.padding) | |||
elif isinstance(token, tokens.TagOpenClose): | |||
contents = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.TagCloseClose): | |||
return Tag(type_, tag, contents, attrs, showtag, False, | |||
padding, self._pop()) | |||
return Tag(tag, contents, attrs, showtag, False, padding, | |||
self._pop()) | |||
else: | |||
self._write(self._handle_token(token)) | |||
@@ -27,7 +27,7 @@ import re | |||
from . import contexts | |||
from . import tokens | |||
from ..compat import htmlentities | |||
from ..nodes.tag import Tag | |||
from ..tag_defs import is_parsable | |||
__all__ = ["Tokenizer"] | |||
@@ -416,8 +416,8 @@ class Tokenizer(object): | |||
else: | |||
self._write_all(tokens) | |||
def _get_tag_type_from_stack(self, stack=None): | |||
"""Return the tag type based on the text in *stack*. | |||
def _get_tag_from_stack(self, stack=None): | |||
"""Return the tag based on the text in *stack*. | |||
If *stack* is ``None``, we will use the current, topmost one. | |||
""" | |||
@@ -427,11 +427,7 @@ class Tokenizer(object): | |||
if not stack: | |||
self._fail_route() # Tag has an empty name? | |||
text = [tok for tok in stack if isinstance(tok, tokens.Text)] | |||
text = "".join([token.text for token in text]).rstrip().lower() | |||
try: | |||
return Tag.TRANSLATIONS[text] | |||
except KeyError: | |||
return Tag.TAG_UNKNOWN | |||
return "".join([token.text for token in text]).rstrip().lower() | |||
def _actually_close_tag_opening(self): | |||
"""Handle cleanup at the end of a opening tag. | |||
@@ -447,8 +443,7 @@ class Tokenizer(object): | |||
if self._context & contexts.TAG_OPEN_ATTR_BODY: | |||
self._context ^= contexts.TAG_OPEN_ATTR_BODY | |||
else: | |||
tag = self._get_tag_type_from_stack() | |||
self._write_first(tokens.TagOpenOpen(type=tag, showtag=True)) | |||
self._write_first(tokens.TagOpenOpen(showtag=True)) | |||
self._context ^= contexts.TAG_OPEN_NAME | |||
self._context |= contexts.TAG_BODY | |||
@@ -509,8 +504,7 @@ class Tokenizer(object): | |||
is_quoted = False | |||
if self._context & contexts.TAG_OPEN_NAME: | |||
self._write_text(chunks.pop(0)) | |||
tag = self._get_tag_type_from_stack() | |||
self._write_first(tokens.TagOpenOpen(type=tag, showtag=True)) | |||
self._write_first(tokens.TagOpenOpen(showtag=True)) | |||
self._context ^= contexts.TAG_OPEN_NAME | |||
self._context |= contexts.TAG_OPEN_ATTR_NAME | |||
self._actually_handle_chunk(chunks, True) | |||
@@ -584,8 +578,7 @@ class Tokenizer(object): | |||
def _handle_tag_close_close(self): | |||
"""Handle the ending of a closing tag (``</foo>``).""" | |||
closing = self._pop() | |||
tag = self._get_tag_type_from_stack(closing) | |||
if tag != self._stack[0].type: | |||
if self._get_tag_from_stack(closing) != self._get_tag_from_stack(): | |||
# Closing and opening tags are not the same, so fail this route: | |||
self._fail_route() | |||
self._write_all(closing) | |||
@@ -20,99 +20,48 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
"""Contains data regarding certain HTML tags.""" | |||
class TagDefinitions(object): | |||
"""Contains numerical definitions for valid HTML (and wikicode) tags. | |||
from __future__ import unicode_literals | |||
Base class for :py:class:`~.Tag` objects. | |||
""" | |||
__all__ = ["get_wikicode", "is_parsable", "is_visible"] | |||
TAG_UNKNOWN = 0 | |||
PARSER_BLACKLIST = [ | |||
# enwiki extensions @ 2013-06-28 | |||
"categorytree", "gallery", "hiero", "imagemap", "inputbox", "math", | |||
"nowiki", "pre", "score", "section", "source", "syntaxhighlight", | |||
"templatedata", "timeline" | |||
] | |||
# Basic HTML: | |||
TAG_ITALIC = 1 | |||
TAG_BOLD = 2 | |||
TAG_UNDERLINE = 3 | |||
TAG_STRIKETHROUGH = 4 | |||
TAG_UNORDERED_LIST = 5 | |||
TAG_ORDERED_LIST = 6 | |||
TAG_DEF_TERM = 7 | |||
TAG_DEF_ITEM = 8 | |||
TAG_BLOCKQUOTE = 9 | |||
TAG_RULE = 10 | |||
TAG_BREAK = 11 | |||
TAG_ABBR = 12 | |||
TAG_PRE = 13 | |||
TAG_MONOSPACE = 14 | |||
TAG_CODE = 15 | |||
TAG_SPAN = 16 | |||
TAG_DIV = 17 | |||
TAG_FONT = 18 | |||
TAG_SMALL = 19 | |||
TAG_BIG = 20 | |||
TAG_CENTER = 21 | |||
INVISIBLE_TAGS = [ | |||
# enwiki extensions @ 2013-06-28 | |||
"categorytree", "gallery", "imagemap", "inputbox", "math", "score", | |||
"section", "templatedata", "timeline" | |||
] | |||
# MediaWiki parser hooks: | |||
TAG_REF = 101 | |||
TAG_GALLERY = 102 | |||
TAG_MATH = 103 | |||
TAG_NOWIKI = 104 | |||
TAG_NOINCLUDE = 105 | |||
TAG_INCLUDEONLY = 106 | |||
TAG_ONLYINCLUDE = 107 | |||
# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | |||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd"] | |||
# Additional parser hooks: | |||
TAG_SYNTAXHIGHLIGHT = 201 | |||
TAG_POEM = 202 | |||
WIKICODE = { | |||
"i": {"open": "''", "close": "''"}, | |||
"b": {"open": "'''", "close": "'''"}, | |||
"ul": {"open": "*"}, | |||
"ol": {"open": "#"}, | |||
"dt": {"open": ";"}, | |||
"dd": {"open": ":"}, | |||
"hr": {"open": "----"}, | |||
} | |||
# Lists of tags: | |||
TAGS_ALL = set(range(300)) | |||
TAGS_INVISIBLE = {TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE} | |||
TAGS_VISIBLE = TAGS_ALL - TAGS_INVISIBLE | |||
def get_wikicode(tag): | |||
"""Return the appropriate wikicode before and after the given *tag*.""" | |||
data = WIKICODE[tag.lower()] | |||
return (data.get("open"), data.get("close")) | |||
TRANSLATIONS = { | |||
"i": TAG_ITALIC, | |||
"em": TAG_ITALIC, | |||
"b": TAG_BOLD, | |||
"strong": TAG_BOLD, | |||
"u": TAG_UNDERLINE, | |||
"s": TAG_STRIKETHROUGH, | |||
"ul": TAG_UNORDERED_LIST, | |||
"ol": TAG_ORDERED_LIST, | |||
"dt": TAG_DEF_TERM, | |||
"dd": TAG_DEF_ITEM, | |||
"blockquote": TAG_BLOCKQUOTE, | |||
"hl": TAG_RULE, | |||
"br": TAG_BREAK, | |||
"abbr": TAG_ABBR, | |||
"pre": TAG_PRE, | |||
"tt": TAG_MONOSPACE, | |||
"code": TAG_CODE, | |||
"span": TAG_SPAN, | |||
"div": TAG_DIV, | |||
"font": TAG_FONT, | |||
"small": TAG_SMALL, | |||
"big": TAG_BIG, | |||
"center": TAG_CENTER, | |||
"ref": TAG_REF, | |||
"gallery": TAG_GALLERY, | |||
"math": TAG_MATH, | |||
"nowiki": TAG_NOWIKI, | |||
"noinclude": TAG_NOINCLUDE, | |||
"includeonly": TAG_INCLUDEONLY, | |||
"onlyinclude": TAG_ONLYINCLUDE, | |||
"syntaxhighlight": TAG_SYNTAXHIGHLIGHT, | |||
"source": TAG_SYNTAXHIGHLIGHT, | |||
"poem": TAG_POEM, | |||
} | |||
def is_parsable(tag): | |||
"""Return if the given *tag*'s contents should be passed to the parser.""" | |||
return tag.lower() not in PARSER_BLACKLIST | |||
WIKICODE = { | |||
TAG_ITALIC: ("''", "''"), | |||
TAG_BOLD: ("'''", "'''"), | |||
TAG_UNORDERED_LIST: ("*", ""), | |||
TAG_ORDERED_LIST: ("#", ""), | |||
TAG_DEF_TERM: (";", ""), | |||
TAG_DEF_ITEM: (":", ""), | |||
TAG_RULE: ("----", ""), | |||
} | |||
def is_visible(tag): | |||
"""Return whether or not the given *tag* contains visible text.""" | |||
return tag.lower() not in INVISIBLE_TAGS |
@@ -31,6 +31,8 @@ from .compat import bytes, str | |||
from .nodes import Node | |||
from .smart_list import SmartList | |||
__all__ = ["parse_anything"] | |||
def parse_anything(value): | |||
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | |||
@@ -193,11 +193,10 @@ class TestBuilder(TreeEqualityTestCase): | |||
def test_tag(self): | |||
"""tests for building Tag nodes""" | |||
tests = [ | |||
([tokens.TagOpenOpen(showtag=True, type=101), | |||
tokens.Text(text="ref"), tokens.TagCloseOpen(padding=""), | |||
tokens.TagOpenClose(), tokens.Text(text="ref"), | |||
tokens.TagCloseClose()], | |||
wrap([Tag(101, wraptext("ref"), wrap([]), [], True, False, "", | |||
([tokens.TagOpenOpen(showtag=True), tokens.Text(text="ref"), | |||
tokens.TagCloseOpen(padding=""), tokens.TagOpenClose(), | |||
tokens.Text(text="ref"), tokens.TagCloseClose()], | |||
wrap([Tag(wraptext("ref"), wrap([]), [], True, False, "", | |||
wraptext("ref"))])), | |||
] | |||
for test, valid in tests: | |||
@@ -1,98 +1,98 @@ | |||
name: basic | |||
label: a basic tag with an open and close | |||
input: "<ref></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: basic_selfclosing | |||
label: a basic self-closing tag | |||
input: "<ref/>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagCloseSelfclose(padding="")] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: content | |||
label: a tag with some content in the middle | |||
input: "<ref>this is a reference</ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagCloseOpen(padding=""), Text(text="this is a reference"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagCloseOpen(padding=""), Text(text="this is a reference"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: padded_open | |||
label: a tag with some padding in the open tag | |||
input: "<ref ></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagCloseOpen(padding=" "), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagCloseOpen(padding=" "), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: padded_close | |||
label: a tag with some padding in the close tag | |||
input: "<ref></ref >" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref "), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref "), TagCloseClose()] | |||
--- | |||
name: padded_selfclosing | |||
label: a self-closing tag with padding | |||
input: "<ref />" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagCloseSelfclose(padding=" ")] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagCloseSelfclose(padding=" ")] | |||
--- | |||
name: attribute | |||
label: a tag with a single attribute | |||
input: "<ref name></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_value | |||
label: a tag with a single attribute with a value | |||
input: "<ref name=foo></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_quoted | |||
label: a tag with a single quoted attribute | |||
input: "<ref name="foo"></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_hyphen | |||
label: a tag with a single attribute, containing a hyphen | |||
input: "<ref name=foo-bar></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_quoted_hyphen | |||
label: a tag with a single quoted attribute, containing a hyphen | |||
input: "<ref name="foo-bar"></ref>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||
--- | |||
name: attribute_selfclosing | |||
label: a self-closing tag with a single attribute | |||
input: "<ref name/>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagCloseSelfclose(padding="")] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: attribute_selfclosing_value | |||
label: a self-closing tag with a single attribute with a value | |||
input: "<ref name=foo/>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
--- | |||
name: attribute_selfclosing_value_quoted | |||
label: a self-closing tag with a single quoted attribute | |||
input: "<ref name="foo"/>" | |||
output: [TagOpenOpen(showtag=True, type=101), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
output: [TagOpenOpen(showtag=True), Text(text="ref"), TagAttrStart(padding=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||
--- | |||