From 7e46601b1d358a09dfa8641b03d6bb2a5eeb63c3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 9 Dec 2012 00:20:21 -0500 Subject: [PATCH] Tags should fully work now in tokenizer and builder. Still need to do attributes. --- mwparserfromhell/nodes/tag.py | 5 +-- mwparserfromhell/parser/builder.py | 2 ++ mwparserfromhell/parser/tokenizer.py | 62 ++++++++++++++++++++---------------- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 681a17a..48effa1 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -70,8 +70,9 @@ class Tag(Node): TAG_POEM = 202 # Lists of tags: + TAGS_ALL = set(range(300)) TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) - TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE + TAGS_VISIBLE = TAGS_ALL - TAGS_INVISIBLE TRANSLATIONS = { "i": TAG_ITALIC, @@ -248,7 +249,7 @@ class Tag(Node): @type.setter def type(self, value): value = int(value) - if value not in self.TAGS_INVISIBLE | self.TAGS_VISIBLE: + if value not in self.TAGS_ALL: raise ValueError(value) self._type = value diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 61a8209..648842c 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -219,7 +219,9 @@ class Builder(object): self_closing=True, open_padding=token.padding) elif isinstance(token, tokens.TagOpenClose): contents = self._pop() + self._push() elif isinstance(token, tokens.TagCloseClose): + self._pop() return Tag(type_, tag, contents, attrs, showtag, False, open_pad, token.padding) else: diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 80d7610..2e72951 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -423,8 +423,8 @@ class Tokenizer(object): def _parse_tag(self): """Parse an HTML tag at the head of the wikicode string.""" - self._head += 1 reset = self._head + self._head += 1 try: tokens = self._parse(contexts.TAG_OPEN) except BadRoute: @@ -444,11 +444,24 @@ class Tokenizer(object): except KeyError: return Tag.TAG_UNKNOWN - def _handle_tag_close_name(self): - tag = self._get_tag_type_from_stack() - if tag is None: - self._fail_route() - self._write(tokens.TagOpenOpen(type=tag, showtag=False)) + def _actually_close_tag_opening(self): + if self._context & contexts.TAG_ATTR: + if self._context & contexts.TAG_ATTR_BODY: + self._context ^= contexts.TAG_ATTR_BODY + if self._context & contexts.TAG_ATTR_BODY_QUOTED: + self._context ^= contexts.TAG_ATTR_BODY_QUOTED + else: + self._context ^= contexts.TAG_ATTR_NAME + else: + tag = self._get_tag_type_from_stack() + if tag is None: + self._fail_route() + self._write_first(tokens.TagOpenOpen(type=tag, showtag=True)) + + self._context ^= contexts.TAG_OPEN + self._context |= contexts.TAG_BODY + padding = "" # TODO + return padding # def _handle_attribute(self): # if not self._context & contexts.TAG_ATTR: @@ -462,28 +475,18 @@ class Tokenizer(object): # pass def _handle_tag_close_open(self): - if not self._context & contexts.TAG_ATTR: - self._handle_tag_close_name() - - self._context ^= contexts.TAG_OPEN # also TAG_ATTR_* - self._context |= contexts.TAG_BODY - - padding = "" # TODO + padding = self._actually_close_tag_opening() self._write(tokens.TagCloseOpen(padding=padding)) def _handle_tag_selfclose(self): - self._context ^= contexts.TAG_OPEN # also TAG_ATTR_* - self._context |= contexts.TAG_BODY - - padding = "" # TODO + padding = self._actually_close_tag_opening() self._write(tokens.TagCloseSelfclose(padding=padding)) - self._pop() + self._head += 1 + return self._pop() def _handle_tag_open_close(self): - self._context ^= contexts.TAG_BODY - self._context |= contexts.TAG_CLOSE self._write(tokens.TagOpenClose()) - self._push() + self._push(contexts.TAG_CLOSE) self._head += 1 def _handle_tag_close_close(self): @@ -562,7 +565,8 @@ class Tokenizer(object): self._parse_comment() else: self._write_text(this) - elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY): + elif this == "<" and next != "/" and ( + not self._context & (contexts.TAG ^ contexts.TAG_BODY)): self._parse_tag() # elif this == " " and (self._context & contexts.TAG_OPEN and not # self._context & contexts.TAG_ATTR_BODY_QUOTED): @@ -571,17 +575,19 @@ class Tokenizer(object): # self._handle_attribute_name() # elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED: # self._handle_quoted_attribute_close() - elif this == "\n" and (self._context & contexts.TAG_OPEN and not - self._context & contexts.TAG_ATTR_BODY_QUOTED): + elif this == "\n" and ( + self._context & contexts.TAG_OPEN and not + self._context & contexts.TAG_ATTR_BODY_QUOTED): if self._context & contexts.TAG_CLOSE: self._pop() self._fail_route() - elif this == ">" and (self._context & contexts.TAG_OPEN and not - self._context & contexts.TAG_ATTR_BODY_QUOTED): + elif this == ">" and ( + self._context & contexts.TAG_OPEN and not + self._context & contexts.TAG_ATTR_BODY_QUOTED): self._handle_tag_close_open() elif this == "/" and next == ">" and ( - self._context & contexts.TAG_OPEN and not - self._context & contexts.TAG_ATTR_BODY_QUOTED): + self._context & contexts.TAG_OPEN and not + self._context & contexts.TAG_ATTR_BODY_QUOTED): return self._handle_tag_selfclose() elif this == "<" and next == "/" and self._context & contexts.TAG_BODY: self._handle_tag_open_close()