浏览代码

Improve padding support for Tags; more code for tags in tokenizer.

tags/v0.3
Ben Kurtovic 12 年前
父节点
当前提交
05ec7a1a92
共有 3 个文件被更改,包括 100 次插入61 次删除
  1. +19
    -8
      mwparserfromhell/nodes/extras/attribute.py
  2. +9
    -9
      mwparserfromhell/nodes/tag.py
  3. +72
    -44
      mwparserfromhell/parser/tokenizer.py

+ 19
- 8
mwparserfromhell/nodes/extras/attribute.py 查看文件

@@ -36,18 +36,20 @@ class Attribute(StringMixIn):
whose value is ``"foo"``.
"""

def __init__(self, name, value=None, quoted=True):
def __init__(self, name, value=None, quoted=True, padding=""):
super(Attribute, self).__init__()
self._name = name
self._value = value
self._quoted = quoted
self._padding = padding

def __unicode__(self):
base = self.padding + str(self.name)
if self.value:
if self.quoted:
return str(self.name) + '="' + str(self.value) + '"'
return str(self.name) + "=" + str(self.value)
return str(self.name)
return base + '="' + str(self.value) + '"'
return base + "=" + str(self.value)
return base

@property
def name(self):
@@ -64,14 +66,23 @@ class Attribute(StringMixIn):
"""Whether the attribute's value is quoted with double quotes."""
return self._quoted

@property
def padding(self):
"""Spacing to insert right before the attribute."""
return self._padding

@name.setter
def name(self, newval):
self._name = parse_anything(newval)
def name(self, value):
self._name = parse_anything(value)

@value.setter
def value(self, newval):
self._value = parse_anything(newval)

@quoted.setter
def quoted(self, newval):
self._quoted = bool(newval)
def quoted(self, value):
self._quoted = bool(value)

@padding.setter
def padding(self, value):
self._padding = str(value)

+ 9
- 9
mwparserfromhell/nodes/tag.py 查看文件

@@ -110,7 +110,7 @@ class Tag(Node):
}

def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
self_closing=False, open_padding=0, close_padding=0):
self_closing=False, open_padding="", close_padding=""):
super(Tag, self).__init__()
self._type = type_
self._tag = tag
@@ -136,10 +136,10 @@ class Tag(Node):
if self.attrs:
result += " " + " ".join([str(attr) for attr in self.attrs])
if self.self_closing:
result += " " * self.open_padding + "/>"
result += self.open_padding + "/>"
else:
result += " " * self.open_padding + ">" + str(self.contents)
result += "</" + str(self.tag) + " " * self.close_padding + ">"
result += self.open_padding + ">" + str(self.contents)
result += "</" + str(self.tag) + self.close_padding + ">"
return result

def __iternodes__(self, getter):
@@ -232,17 +232,17 @@ class Tag(Node):

@property
def self_closing(self):
"""Whether the tag is self-closing with no content."""
"""Whether the tag is self-closing with no content (like ``<br/>``)."""
return self._self_closing

@property
def open_padding(self):
"""How much spacing to insert before the first closing >."""
"""Spacing to insert before the first closing >."""
return self._open_padding

@property
def close_padding(self):
"""How much spacing to insert before the last closing >."""
"""Spacing to insert before the last closing > (excl. self-closing)."""
return self._close_padding

@type.setter
@@ -270,8 +270,8 @@ class Tag(Node):

@open_padding.setter
def open_padding(self, value):
self._open_padding = int(value)
self._open_padding = str(value)

@close_padding.setter
def close_padding(self, value):
self._close_padding = int(value)
self._close_padding = str(value)

+ 72
- 44
mwparserfromhell/parser/tokenizer.py 查看文件

@@ -425,52 +425,77 @@ class Tokenizer(object):
"""Parse an HTML tag at the head of the wikicode string."""
self._head += 1
reset = self._head
self._push()
try:
t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN)
if not self_close:
t_body = self._parse(contexts.TAG_BODY)
t_close, c_pad = self._parse(contexts.TAG_CLOSE)
tokens = self._parse(contexts.TAG_OPEN)
except BadRoute:
self._head = reset
self._pop()
self._write_text("<")
else:
self._pop()
self._write(tokens.TagOpenOpen(type=type_, showtag=False))
self._write_all(t_open)
if self_close:
self._write(tokens.TagCloseSelfclose(padding=o_pad))
else:
self._write(tokens.TagCloseOpen(padding=o_pad))
self._write_all(t_body)
self._write(tokens.TagOpenClose())
self._write_all(t_close)
self._write(tokens.TagCloseClose(padding=c_pad))
self._write_all(tokens)

def _handle_attribute(self):
if not self._context & contexts.TAG_ATTR:
## check name is valid
def _get_tag_type_from_stack(self):
self._push_textbuffer()
if not self._stack:
return None # Tag has an empty name?
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)]
text = "".join([token.text for token in text]).strip().lower()
try:
return Tag.TRANSLATIONS[text]
except KeyError:
return Tag.TAG_UNKNOWN

def _handle_tag_close_name(self):
tag = self._get_tag_type_from_stack()
if tag is None:
self._fail_route()
self._write(tokens.TagOpenOpen(type=tag, showtag=False))

def _handle_attribute_name(self):
## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
pass
# def _handle_attribute(self):
# if not self._context & contexts.TAG_ATTR:
# self._handle_tag_close_name()

def _handle_quoted_attribute_close(self):
pass
# def _handle_attribute_name(self):
# ## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
# pass

# def _handle_quoted_attribute_close(self):
# pass

def _handle_tag_close_open(self):
pass ## .padding
if not self._context & contexts.TAG_ATTR:
self._handle_tag_close_name()

self._context ^= contexts.TAG_OPEN # also TAG_ATTR_*
self._context |= contexts.TAG_BODY

padding = "" # TODO
self._write(tokens.TagCloseOpen(padding=padding))

def _handle_tag_selfclose(self):
pass ## .padding
self._context ^= contexts.TAG_OPEN # also TAG_ATTR_*
self._context |= contexts.TAG_BODY

def _handle_tag_close_open(self):
pass
padding = "" # TODO
self._write(tokens.TagCloseSelfclose(padding=padding))
self._pop()

def _handle_tag_open_close(self):
self._context ^= contexts.TAG_BODY
self._context |= contexts.TAG_CLOSE
self._write(tokens.TagOpenClose())
self._push()
self._head += 1

def _handle_tag_close_close(self):
## check that the closing name is the same as the opening name
pass ## .padding
tag = self._get_tag_type_from_stack()
closing = self._pop()
if tag != self._stack[0].type:
# Closing and opening tags are not the same, so fail this route:
self._fail_route()
self._write_all(closing)
padding = "" # TODO
self._write(tokens.TagCloseClose(padding=padding))
return self._pop()

def _parse(self, context=0):
"""Parse the wikicode string, using *context* for when to stop."""
@@ -485,7 +510,8 @@ class Tokenizer(object):
fail = (contexts.TEMPLATE | contexts.ARGUMENT |
contexts.WIKILINK | contexts.HEADING |
contexts.COMMENT | contexts.TAG)
if self._context & contexts.TEMPLATE_PARAM_KEY:
double_fail = contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE
if self._context & double_fail:
self._pop()
if self._context & fail:
self._fail_route()
@@ -538,27 +564,29 @@ class Tokenizer(object):
self._write_text(this)
elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY):
self._parse_tag()
elif this == " " and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
self._handle_attribute()
elif this == "=" and self._context & contexts.TAG_ATTR_NAME:
self._handle_attribute_name()
elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
self._handle_quoted_attribute_close()
# elif this == " " and (self._context & contexts.TAG_OPEN and not
# self._context & contexts.TAG_ATTR_BODY_QUOTED):
# self._handle_attribute()
# elif this == "=" and self._context & contexts.TAG_ATTR_NAME:
# self._handle_attribute_name()
# elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
# self._handle_quoted_attribute_close()
elif this == "\n" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
if self._context & contexts.TAG_CLOSE:
self._pop()
self._fail_route()
elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not
elif this == ">" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_close_open()
self._handle_tag_close_open()
elif this == "/" and next == ">" and (
self._context & contexts.TAG_ATTR_OPEN and not
self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_selfclose()
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY:
self._handle_tag_close_open()
self._handle_tag_open_close()
elif this == ">" and self._context & contexts.TAG_CLOSE:
self._handle_tag_close_close()
return self._handle_tag_close_close()
else:
self._write_text(this)
self._head += 1


正在加载...
取消
保存