Переглянути джерело

Improve padding support for Tags; more code for tags in tokenizer.

tags/v0.3
Ben Kurtovic 12 роки тому
джерело
коміт
05ec7a1a92
3 змінених файлів з 100 додано та 61 видалено
  1. +19
    -8
      mwparserfromhell/nodes/extras/attribute.py
  2. +9
    -9
      mwparserfromhell/nodes/tag.py
  3. +72
    -44
      mwparserfromhell/parser/tokenizer.py

+ 19
- 8
mwparserfromhell/nodes/extras/attribute.py Переглянути файл

@@ -36,18 +36,20 @@ class Attribute(StringMixIn):
whose value is ``"foo"``. whose value is ``"foo"``.
""" """


def __init__(self, name, value=None, quoted=True):
def __init__(self, name, value=None, quoted=True, padding=""):
super(Attribute, self).__init__() super(Attribute, self).__init__()
self._name = name self._name = name
self._value = value self._value = value
self._quoted = quoted self._quoted = quoted
self._padding = padding


def __unicode__(self): def __unicode__(self):
base = self.padding + str(self.name)
if self.value: if self.value:
if self.quoted: if self.quoted:
return str(self.name) + '="' + str(self.value) + '"'
return str(self.name) + "=" + str(self.value)
return str(self.name)
return base + '="' + str(self.value) + '"'
return base + "=" + str(self.value)
return base


@property @property
def name(self): def name(self):
@@ -64,14 +66,23 @@ class Attribute(StringMixIn):
"""Whether the attribute's value is quoted with double quotes.""" """Whether the attribute's value is quoted with double quotes."""
return self._quoted return self._quoted


@property
def padding(self):
"""Spacing to insert right before the attribute."""
return self._padding

@name.setter @name.setter
def name(self, newval):
self._name = parse_anything(newval)
def name(self, value):
self._name = parse_anything(value)


@value.setter @value.setter
def value(self, newval): def value(self, newval):
self._value = parse_anything(newval) self._value = parse_anything(newval)


@quoted.setter @quoted.setter
def quoted(self, newval):
self._quoted = bool(newval)
def quoted(self, value):
self._quoted = bool(value)

@padding.setter
def padding(self, value):
self._padding = str(value)

+ 9
- 9
mwparserfromhell/nodes/tag.py Переглянути файл

@@ -110,7 +110,7 @@ class Tag(Node):
} }


def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
self_closing=False, open_padding=0, close_padding=0):
self_closing=False, open_padding="", close_padding=""):
super(Tag, self).__init__() super(Tag, self).__init__()
self._type = type_ self._type = type_
self._tag = tag self._tag = tag
@@ -136,10 +136,10 @@ class Tag(Node):
if self.attrs: if self.attrs:
result += " " + " ".join([str(attr) for attr in self.attrs]) result += " " + " ".join([str(attr) for attr in self.attrs])
if self.self_closing: if self.self_closing:
result += " " * self.open_padding + "/>"
result += self.open_padding + "/>"
else: else:
result += " " * self.open_padding + ">" + str(self.contents)
result += "</" + str(self.tag) + " " * self.close_padding + ">"
result += self.open_padding + ">" + str(self.contents)
result += "</" + str(self.tag) + self.close_padding + ">"
return result return result


def __iternodes__(self, getter): def __iternodes__(self, getter):
@@ -232,17 +232,17 @@ class Tag(Node):


@property @property
def self_closing(self): def self_closing(self):
"""Whether the tag is self-closing with no content."""
"""Whether the tag is self-closing with no content (like ``<br/>``)."""
return self._self_closing return self._self_closing


@property @property
def open_padding(self): def open_padding(self):
"""How much spacing to insert before the first closing >."""
"""Spacing to insert before the first closing >."""
return self._open_padding return self._open_padding


@property @property
def close_padding(self): def close_padding(self):
"""How much spacing to insert before the last closing >."""
"""Spacing to insert before the last closing > (excl. self-closing)."""
return self._close_padding return self._close_padding


@type.setter @type.setter
@@ -270,8 +270,8 @@ class Tag(Node):


@open_padding.setter @open_padding.setter
def open_padding(self, value): def open_padding(self, value):
self._open_padding = int(value)
self._open_padding = str(value)


@close_padding.setter @close_padding.setter
def close_padding(self, value): def close_padding(self, value):
self._close_padding = int(value)
self._close_padding = str(value)

+ 72
- 44
mwparserfromhell/parser/tokenizer.py Переглянути файл

@@ -425,52 +425,77 @@ class Tokenizer(object):
"""Parse an HTML tag at the head of the wikicode string.""" """Parse an HTML tag at the head of the wikicode string."""
self._head += 1 self._head += 1
reset = self._head reset = self._head
self._push()
try: try:
t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN)
if not self_close:
t_body = self._parse(contexts.TAG_BODY)
t_close, c_pad = self._parse(contexts.TAG_CLOSE)
tokens = self._parse(contexts.TAG_OPEN)
except BadRoute: except BadRoute:
self._head = reset self._head = reset
self._pop()
self._write_text("<") self._write_text("<")
else: else:
self._pop()
self._write(tokens.TagOpenOpen(type=type_, showtag=False))
self._write_all(t_open)
if self_close:
self._write(tokens.TagCloseSelfclose(padding=o_pad))
else:
self._write(tokens.TagCloseOpen(padding=o_pad))
self._write_all(t_body)
self._write(tokens.TagOpenClose())
self._write_all(t_close)
self._write(tokens.TagCloseClose(padding=c_pad))
self._write_all(tokens)


def _handle_attribute(self):
if not self._context & contexts.TAG_ATTR:
## check name is valid
def _get_tag_type_from_stack(self):
self._push_textbuffer()
if not self._stack:
return None # Tag has an empty name?
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)]
text = "".join([token.text for token in text]).strip().lower()
try:
return Tag.TRANSLATIONS[text]
except KeyError:
return Tag.TAG_UNKNOWN

def _handle_tag_close_name(self):
tag = self._get_tag_type_from_stack()
if tag is None:
self._fail_route()
self._write(tokens.TagOpenOpen(type=tag, showtag=False))


def _handle_attribute_name(self):
## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
pass
# def _handle_attribute(self):
# if not self._context & contexts.TAG_ATTR:
# self._handle_tag_close_name()


def _handle_quoted_attribute_close(self):
pass
# def _handle_attribute_name(self):
# ## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
# pass

# def _handle_quoted_attribute_close(self):
# pass


def _handle_tag_close_open(self): def _handle_tag_close_open(self):
pass ## .padding
if not self._context & contexts.TAG_ATTR:
self._handle_tag_close_name()

self._context ^= contexts.TAG_OPEN # also TAG_ATTR_*
self._context |= contexts.TAG_BODY

padding = "" # TODO
self._write(tokens.TagCloseOpen(padding=padding))


def _handle_tag_selfclose(self): def _handle_tag_selfclose(self):
pass ## .padding
self._context ^= contexts.TAG_OPEN # also TAG_ATTR_*
self._context |= contexts.TAG_BODY


def _handle_tag_close_open(self):
pass
padding = "" # TODO
self._write(tokens.TagCloseSelfclose(padding=padding))
self._pop()

def _handle_tag_open_close(self):
self._context ^= contexts.TAG_BODY
self._context |= contexts.TAG_CLOSE
self._write(tokens.TagOpenClose())
self._push()
self._head += 1


def _handle_tag_close_close(self): def _handle_tag_close_close(self):
## check that the closing name is the same as the opening name
pass ## .padding
tag = self._get_tag_type_from_stack()
closing = self._pop()
if tag != self._stack[0].type:
# Closing and opening tags are not the same, so fail this route:
self._fail_route()
self._write_all(closing)
padding = "" # TODO
self._write(tokens.TagCloseClose(padding=padding))
return self._pop()


def _parse(self, context=0): def _parse(self, context=0):
"""Parse the wikicode string, using *context* for when to stop.""" """Parse the wikicode string, using *context* for when to stop."""
@@ -485,7 +510,8 @@ class Tokenizer(object):
fail = (contexts.TEMPLATE | contexts.ARGUMENT | fail = (contexts.TEMPLATE | contexts.ARGUMENT |
contexts.WIKILINK | contexts.HEADING | contexts.WIKILINK | contexts.HEADING |
contexts.COMMENT | contexts.TAG) contexts.COMMENT | contexts.TAG)
if self._context & contexts.TEMPLATE_PARAM_KEY:
double_fail = contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE
if self._context & double_fail:
self._pop() self._pop()
if self._context & fail: if self._context & fail:
self._fail_route() self._fail_route()
@@ -538,27 +564,29 @@ class Tokenizer(object):
self._write_text(this) self._write_text(this)
elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY): elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY):
self._parse_tag() self._parse_tag()
elif this == " " and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
self._handle_attribute()
elif this == "=" and self._context & contexts.TAG_ATTR_NAME:
self._handle_attribute_name()
elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
self._handle_quoted_attribute_close()
# elif this == " " and (self._context & contexts.TAG_OPEN and not
# self._context & contexts.TAG_ATTR_BODY_QUOTED):
# self._handle_attribute()
# elif this == "=" and self._context & contexts.TAG_ATTR_NAME:
# self._handle_attribute_name()
# elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
# self._handle_quoted_attribute_close()
elif this == "\n" and (self._context & contexts.TAG_OPEN and not elif this == "\n" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED): self._context & contexts.TAG_ATTR_BODY_QUOTED):
if self._context & contexts.TAG_CLOSE:
self._pop()
self._fail_route() self._fail_route()
elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not
elif this == ">" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED): self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_close_open()
self._handle_tag_close_open()
elif this == "/" and next == ">" and ( elif this == "/" and next == ">" and (
self._context & contexts.TAG_ATTR_OPEN and not
self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED): self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_selfclose() return self._handle_tag_selfclose()
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY: elif this == "<" and next == "/" and self._context & contexts.TAG_BODY:
self._handle_tag_close_open()
self._handle_tag_open_close()
elif this == ">" and self._context & contexts.TAG_CLOSE: elif this == ">" and self._context & contexts.TAG_CLOSE:
self._handle_tag_close_close()
return self._handle_tag_close_close()
else: else:
self._write_text(this) self._write_text(this)
self._head += 1 self._head += 1


Завантаження…
Відмінити
Зберегти