Browse Source

Tags should fully work now in tokenizer and builder.

Still need to do attributes.
tags/v0.3
Ben Kurtovic 12 years ago
parent
commit
7e46601b1d
3 changed files with 39 additions and 30 deletions
  1. +3
    -2
      mwparserfromhell/nodes/tag.py
  2. +2
    -0
      mwparserfromhell/parser/builder.py
  3. +34
    -28
      mwparserfromhell/parser/tokenizer.py

+ 3
- 2
mwparserfromhell/nodes/tag.py View File

@@ -70,8 +70,9 @@ class Tag(Node):
TAG_POEM = 202 TAG_POEM = 202


# Lists of tags: # Lists of tags:
TAGS_ALL = set(range(300))
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE))
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE
TAGS_VISIBLE = TAGS_ALL - TAGS_INVISIBLE


TRANSLATIONS = { TRANSLATIONS = {
"i": TAG_ITALIC, "i": TAG_ITALIC,
@@ -248,7 +249,7 @@ class Tag(Node):
@type.setter @type.setter
def type(self, value): def type(self, value):
value = int(value) value = int(value)
if value not in self.TAGS_INVISIBLE | self.TAGS_VISIBLE:
if value not in self.TAGS_ALL:
raise ValueError(value) raise ValueError(value)
self._type = value self._type = value




+ 2
- 0
mwparserfromhell/parser/builder.py View File

@@ -219,7 +219,9 @@ class Builder(object):
self_closing=True, open_padding=token.padding) self_closing=True, open_padding=token.padding)
elif isinstance(token, tokens.TagOpenClose): elif isinstance(token, tokens.TagOpenClose):
contents = self._pop() contents = self._pop()
self._push()
elif isinstance(token, tokens.TagCloseClose): elif isinstance(token, tokens.TagCloseClose):
self._pop()
return Tag(type_, tag, contents, attrs, showtag, False, return Tag(type_, tag, contents, attrs, showtag, False,
open_pad, token.padding) open_pad, token.padding)
else: else:


+ 34
- 28
mwparserfromhell/parser/tokenizer.py View File

@@ -423,8 +423,8 @@ class Tokenizer(object):


def _parse_tag(self): def _parse_tag(self):
"""Parse an HTML tag at the head of the wikicode string.""" """Parse an HTML tag at the head of the wikicode string."""
self._head += 1
reset = self._head reset = self._head
self._head += 1
try: try:
tokens = self._parse(contexts.TAG_OPEN) tokens = self._parse(contexts.TAG_OPEN)
except BadRoute: except BadRoute:
@@ -444,11 +444,24 @@ class Tokenizer(object):
except KeyError: except KeyError:
return Tag.TAG_UNKNOWN return Tag.TAG_UNKNOWN


def _handle_tag_close_name(self):
tag = self._get_tag_type_from_stack()
if tag is None:
self._fail_route()
self._write(tokens.TagOpenOpen(type=tag, showtag=False))
def _actually_close_tag_opening(self):
if self._context & contexts.TAG_ATTR:
if self._context & contexts.TAG_ATTR_BODY:
self._context ^= contexts.TAG_ATTR_BODY
if self._context & contexts.TAG_ATTR_BODY_QUOTED:
self._context ^= contexts.TAG_ATTR_BODY_QUOTED
else:
self._context ^= contexts.TAG_ATTR_NAME
else:
tag = self._get_tag_type_from_stack()
if tag is None:
self._fail_route()
self._write_first(tokens.TagOpenOpen(type=tag, showtag=True))

self._context ^= contexts.TAG_OPEN
self._context |= contexts.TAG_BODY
padding = "" # TODO
return padding


# def _handle_attribute(self): # def _handle_attribute(self):
# if not self._context & contexts.TAG_ATTR: # if not self._context & contexts.TAG_ATTR:
@@ -462,28 +475,18 @@ class Tokenizer(object):
# pass # pass


def _handle_tag_close_open(self): def _handle_tag_close_open(self):
if not self._context & contexts.TAG_ATTR:
self._handle_tag_close_name()

self._context ^= contexts.TAG_OPEN # also TAG_ATTR_*
self._context |= contexts.TAG_BODY

padding = "" # TODO
padding = self._actually_close_tag_opening()
self._write(tokens.TagCloseOpen(padding=padding)) self._write(tokens.TagCloseOpen(padding=padding))


def _handle_tag_selfclose(self): def _handle_tag_selfclose(self):
self._context ^= contexts.TAG_OPEN # also TAG_ATTR_*
self._context |= contexts.TAG_BODY

padding = "" # TODO
padding = self._actually_close_tag_opening()
self._write(tokens.TagCloseSelfclose(padding=padding)) self._write(tokens.TagCloseSelfclose(padding=padding))
self._pop()
self._head += 1
return self._pop()


def _handle_tag_open_close(self): def _handle_tag_open_close(self):
self._context ^= contexts.TAG_BODY
self._context |= contexts.TAG_CLOSE
self._write(tokens.TagOpenClose()) self._write(tokens.TagOpenClose())
self._push()
self._push(contexts.TAG_CLOSE)
self._head += 1 self._head += 1


def _handle_tag_close_close(self): def _handle_tag_close_close(self):
@@ -562,7 +565,8 @@ class Tokenizer(object):
self._parse_comment() self._parse_comment()
else: else:
self._write_text(this) self._write_text(this)
elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY):
elif this == "<" and next != "/" and (
not self._context & (contexts.TAG ^ contexts.TAG_BODY)):
self._parse_tag() self._parse_tag()
# elif this == " " and (self._context & contexts.TAG_OPEN and not # elif this == " " and (self._context & contexts.TAG_OPEN and not
# self._context & contexts.TAG_ATTR_BODY_QUOTED): # self._context & contexts.TAG_ATTR_BODY_QUOTED):
@@ -571,17 +575,19 @@ class Tokenizer(object):
# self._handle_attribute_name() # self._handle_attribute_name()
# elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED: # elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
# self._handle_quoted_attribute_close() # self._handle_quoted_attribute_close()
elif this == "\n" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
elif this == "\n" and (
self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
if self._context & contexts.TAG_CLOSE: if self._context & contexts.TAG_CLOSE:
self._pop() self._pop()
self._fail_route() self._fail_route()
elif this == ">" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
elif this == ">" and (
self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
self._handle_tag_close_open() self._handle_tag_close_open()
elif this == "/" and next == ">" and ( elif this == "/" and next == ">" and (
self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_selfclose() return self._handle_tag_selfclose()
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY: elif this == "<" and next == "/" and self._context & contexts.TAG_BODY:
self._handle_tag_open_close() self._handle_tag_open_close()


Loading…
Cancel
Save