diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 648bca0..58a99a8 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -36,18 +36,20 @@ class Attribute(StringMixIn): whose value is ``"foo"``. """ - def __init__(self, name, value=None, quoted=True): + def __init__(self, name, value=None, quoted=True, padding=""): super(Attribute, self).__init__() self._name = name self._value = value self._quoted = quoted + self._padding = padding def __unicode__(self): + base = self.padding + str(self.name) if self.value: if self.quoted: - return str(self.name) + '="' + str(self.value) + '"' - return str(self.name) + "=" + str(self.value) - return str(self.name) + return base + '="' + str(self.value) + '"' + return base + "=" + str(self.value) + return base @property def name(self): @@ -64,14 +66,23 @@ class Attribute(StringMixIn): """Whether the attribute's value is quoted with double quotes.""" return self._quoted + @property + def padding(self): + """Spacing to insert right before the attribute.""" + return self._padding + @name.setter - def name(self, newval): - self._name = parse_anything(newval) + def name(self, value): + self._name = parse_anything(value) @value.setter def value(self, newval): self._value = parse_anything(newval) @quoted.setter - def quoted(self, newval): - self._quoted = bool(newval) + def quoted(self, value): + self._quoted = bool(value) + + @padding.setter + def padding(self, value): + self._padding = str(value) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index c32f398..681a17a 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -110,7 +110,7 @@ class Tag(Node): } def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, - self_closing=False, open_padding=0, close_padding=0): + self_closing=False, open_padding="", close_padding=""): super(Tag, self).__init__() self._type = type_ self._tag = tag @@ -136,10 +136,10 @@ class Tag(Node): if self.attrs: result += " " + " ".join([str(attr) for attr in self.attrs]) if self.self_closing: - result += " " * self.open_padding + "/>" + result += self.open_padding + "/>" else: - result += " " * self.open_padding + ">" + str(self.contents) - result += "" + result += self.open_padding + ">" + str(self.contents) + result += "" return result def __iternodes__(self, getter): @@ -232,17 +232,17 @@ class Tag(Node): @property def self_closing(self): - """Whether the tag is self-closing with no content.""" + """Whether the tag is self-closing with no content (like ``
``).""" return self._self_closing @property def open_padding(self): - """How much spacing to insert before the first closing >.""" + """Spacing to insert before the first closing >.""" return self._open_padding @property def close_padding(self): - """How much spacing to insert before the last closing >.""" + """Spacing to insert before the last closing > (excl. self-closing).""" return self._close_padding @type.setter @@ -270,8 +270,8 @@ class Tag(Node): @open_padding.setter def open_padding(self, value): - self._open_padding = int(value) + self._open_padding = str(value) @close_padding.setter def close_padding(self, value): - self._close_padding = int(value) + self._close_padding = str(value) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index f640aa2..80d7610 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -425,52 +425,77 @@ class Tokenizer(object): """Parse an HTML tag at the head of the wikicode string.""" self._head += 1 reset = self._head - self._push() try: - t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN) - if not self_close: - t_body = self._parse(contexts.TAG_BODY) - t_close, c_pad = self._parse(contexts.TAG_CLOSE) + tokens = self._parse(contexts.TAG_OPEN) except BadRoute: self._head = reset - self._pop() self._write_text("<") else: - self._pop() - self._write(tokens.TagOpenOpen(type=type_, showtag=False)) - self._write_all(t_open) - if self_close: - self._write(tokens.TagCloseSelfclose(padding=o_pad)) - else: - self._write(tokens.TagCloseOpen(padding=o_pad)) - self._write_all(t_body) - self._write(tokens.TagOpenClose()) - self._write_all(t_close) - self._write(tokens.TagCloseClose(padding=c_pad)) + self._write_all(tokens) - def _handle_attribute(self): - if not self._context & contexts.TAG_ATTR: - ## check name is valid + def _get_tag_type_from_stack(self): + self._push_textbuffer() + if not self._stack: + return None # Tag has an empty name? + text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] + text = "".join([token.text for token in text]).strip().lower() + try: + return Tag.TRANSLATIONS[text] + except KeyError: + return Tag.TAG_UNKNOWN + + def _handle_tag_close_name(self): + tag = self._get_tag_type_from_stack() + if tag is None: + self._fail_route() + self._write(tokens.TagOpenOpen(type=tag, showtag=False)) - def _handle_attribute_name(self): - ## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED - pass + # def _handle_attribute(self): + # if not self._context & contexts.TAG_ATTR: + # self._handle_tag_close_name() - def _handle_quoted_attribute_close(self): - pass + # def _handle_attribute_name(self): + # ## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED + # pass + + # def _handle_quoted_attribute_close(self): + # pass def _handle_tag_close_open(self): - pass ## .padding + if not self._context & contexts.TAG_ATTR: + self._handle_tag_close_name() + + self._context ^= contexts.TAG_OPEN # also TAG_ATTR_* + self._context |= contexts.TAG_BODY + + padding = "" # TODO + self._write(tokens.TagCloseOpen(padding=padding)) def _handle_tag_selfclose(self): - pass ## .padding + self._context ^= contexts.TAG_OPEN # also TAG_ATTR_* + self._context |= contexts.TAG_BODY - def _handle_tag_close_open(self): - pass + padding = "" # TODO + self._write(tokens.TagCloseSelfclose(padding=padding)) + self._pop() + + def _handle_tag_open_close(self): + self._context ^= contexts.TAG_BODY + self._context |= contexts.TAG_CLOSE + self._write(tokens.TagOpenClose()) + self._push() + self._head += 1 def _handle_tag_close_close(self): - ## check that the closing name is the same as the opening name - pass ## .padding + tag = self._get_tag_type_from_stack() + closing = self._pop() + if tag != self._stack[0].type: + # Closing and opening tags are not the same, so fail this route: + self._fail_route() + self._write_all(closing) + padding = "" # TODO + self._write(tokens.TagCloseClose(padding=padding)) + return self._pop() def _parse(self, context=0): """Parse the wikicode string, using *context* for when to stop.""" @@ -485,7 +510,8 @@ class Tokenizer(object): fail = (contexts.TEMPLATE | contexts.ARGUMENT | contexts.WIKILINK | contexts.HEADING | contexts.COMMENT | contexts.TAG) - if self._context & contexts.TEMPLATE_PARAM_KEY: + double_fail = contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE + if self._context & double_fail: self._pop() if self._context & fail: self._fail_route() @@ -538,27 +564,29 @@ class Tokenizer(object): self._write_text(this) elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY): self._parse_tag() - elif this == " " and (self._context & contexts.TAG_OPEN and not - self._context & contexts.TAG_ATTR_BODY_QUOTED): - self._handle_attribute() - elif this == "=" and self._context & contexts.TAG_ATTR_NAME: - self._handle_attribute_name() - elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED: - self._handle_quoted_attribute_close() + # elif this == " " and (self._context & contexts.TAG_OPEN and not + # self._context & contexts.TAG_ATTR_BODY_QUOTED): + # self._handle_attribute() + # elif this == "=" and self._context & contexts.TAG_ATTR_NAME: + # self._handle_attribute_name() + # elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED: + # self._handle_quoted_attribute_close() elif this == "\n" and (self._context & contexts.TAG_OPEN and not self._context & contexts.TAG_ATTR_BODY_QUOTED): + if self._context & contexts.TAG_CLOSE: + self._pop() self._fail_route() - elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not + elif this == ">" and (self._context & contexts.TAG_OPEN and not self._context & contexts.TAG_ATTR_BODY_QUOTED): - return self._handle_tag_close_open() + self._handle_tag_close_open() elif this == "/" and next == ">" and ( - self._context & contexts.TAG_ATTR_OPEN and not + self._context & contexts.TAG_OPEN and not self._context & contexts.TAG_ATTR_BODY_QUOTED): return self._handle_tag_selfclose() elif this == "<" and next == "/" and self._context & contexts.TAG_BODY: - self._handle_tag_close_open() + self._handle_tag_open_close() elif this == ">" and self._context & contexts.TAG_CLOSE: - self._handle_tag_close_close() + return self._handle_tag_close_close() else: self._write_text(this) self._head += 1