- Translation dict, contexts, parse_* and handle_* hooks in tokenizer.tags/v0.3
@@ -73,6 +73,42 @@ class Tag(Node): | |||||
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | ||||
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | ||||
TRANSLATIONS = { | |||||
"i": TAG_ITALIC, | |||||
"em": TAG_ITALIC, | |||||
"b": TAG_BOLD, | |||||
"strong": TAG_BOLD, | |||||
"u": TAG_UNDERLINE, | |||||
"s": TAG_STRIKETHROUGH, | |||||
"ul": TAG_UNORDERED_LIST, | |||||
"ol": TAG_ORDERED_LIST, | |||||
"dt": TAG_DEF_TERM, | |||||
"dd": TAG_DEF_ITEM, | |||||
"blockquote": TAG_BLOCKQUOTE, | |||||
"hl": TAG_RULE, | |||||
"br": TAG_BREAK, | |||||
"abbr": TAG_ABBR, | |||||
"pre": TAG_PRE, | |||||
"tt": TAG_MONOSPACE, | |||||
"code": TAG_CODE, | |||||
"span": TAG_SPAN, | |||||
"div": TAG_DIV, | |||||
"font": TAG_FONT, | |||||
"small": TAG_SMALL, | |||||
"big": TAG_BIG, | |||||
"center": TAG_CENTER, | |||||
"ref": TAG_REF, | |||||
"gallery": TAG_GALLERY, | |||||
"math": TAG_MATH, | |||||
"nowiki": TAG_NOWIKI, | |||||
"noinclude": TAG_NOINCLUDE, | |||||
"includeonly": TAG_INCLUDEONLY, | |||||
"onlyinclude": TAG_ONLYINCLUDE, | |||||
"syntaxhighlight": TAG_SYNTAXHIGHLIGHT, | |||||
"source": TAG_SYNTAXHIGHLIGHT, | |||||
"poem": TAG_POEM, | |||||
} | |||||
def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | ||||
self_closing=False, open_padding=0, close_padding=0): | self_closing=False, open_padding=0, close_padding=0): | ||||
super(Tag, self).__init__() | super(Tag, self).__init__() | ||||
@@ -62,35 +62,56 @@ Local (stack-specific) contexts: | |||||
* :py:const:`COMMENT` | * :py:const:`COMMENT` | ||||
Global contexts: | |||||
* :py:const:`TAG` | |||||
* :py:const:`GL_HEADING` | |||||
""" | |||||
* :py:const:`TAG_OPEN` | |||||
* :py:const:`TAG_ATTR` | |||||
# Local contexts: | |||||
* :py:const:`TAG_ATTR_NAME` | |||||
* :py:const:`TAG_ATTR_BODY` | |||||
* :py:const:`TAG_ATTR_BODY_QUOTED` | |||||
TEMPLATE = 0b00000000000111 | |||||
TEMPLATE_NAME = 0b00000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||||
* :py:const:`TAG_BODY` | |||||
* :py:const:`TAG_CLOSE` | |||||
ARGUMENT = 0b00000000011000 | |||||
ARGUMENT_NAME = 0b00000000001000 | |||||
ARGUMENT_DEFAULT = 0b00000000010000 | |||||
Global contexts: | |||||
WIKILINK = 0b00000001100000 | |||||
WIKILINK_TITLE = 0b00000000100000 | |||||
WIKILINK_TEXT = 0b00000001000000 | |||||
* :py:const:`GL_HEADING` | |||||
""" | |||||
HEADING = 0b01111110000000 | |||||
HEADING_LEVEL_1 = 0b00000010000000 | |||||
HEADING_LEVEL_2 = 0b00000100000000 | |||||
HEADING_LEVEL_3 = 0b00001000000000 | |||||
HEADING_LEVEL_4 = 0b00010000000000 | |||||
HEADING_LEVEL_5 = 0b00100000000000 | |||||
HEADING_LEVEL_6 = 0b01000000000000 | |||||
# Local contexts: | |||||
COMMENT = 0b10000000000000 | |||||
TEMPLATE = 0b00000000000000000111 | |||||
TEMPLATE_NAME = 0b00000000000000000001 | |||||
TEMPLATE_PARAM_KEY = 0b00000000000000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b00000000000000000100 | |||||
ARGUMENT = 0b00000000000000011000 | |||||
ARGUMENT_NAME = 0b00000000000000001000 | |||||
ARGUMENT_DEFAULT = 0b00000000000000010000 | |||||
WIKILINK = 0b00000000000001100000 | |||||
WIKILINK_TITLE = 0b00000000000000100000 | |||||
WIKILINK_TEXT = 0b00000000000001000000 | |||||
HEADING = 0b00000001111110000000 | |||||
HEADING_LEVEL_1 = 0b00000000000010000000 | |||||
HEADING_LEVEL_2 = 0b00000000000100000000 | |||||
HEADING_LEVEL_3 = 0b00000000001000000000 | |||||
HEADING_LEVEL_4 = 0b00000000010000000000 | |||||
HEADING_LEVEL_5 = 0b00000000100000000000 | |||||
HEADING_LEVEL_6 = 0b00000001000000000000 | |||||
COMMENT = 0b00000010000000000000 | |||||
TAG = 0b11111100000000000000 | |||||
TAG_OPEN = 0b00000100000000000000 | |||||
TAG_ATTR = 0b00111000000000000000 | |||||
TAG_ATTR_NAME = 0b00001000000000000000 | |||||
TAG_ATTR_BODY = 0b00010000000000000000 | |||||
TAG_ATTR_BODY_QUOTED = 0b00100000000000000000 | |||||
TAG_BODY = 0b01000000000000000000 | |||||
TAG_CLOSE = 0b10000000000000000000 | |||||
# Global contexts: | # Global contexts: | ||||
@@ -767,7 +767,6 @@ Tokenizer_parse_heading(Tokenizer* self) | |||||
self->global ^= GL_HEADING; | self->global ^= GL_HEADING; | ||||
return 0; | return 0; | ||||
} | } | ||||
level = PyInt_FromSsize_t(heading->level); | level = PyInt_FromSsize_t(heading->level); | ||||
if (!level) { | if (!level) { | ||||
Py_DECREF(heading->title); | Py_DECREF(heading->title); | ||||
@@ -27,6 +27,7 @@ import string | |||||
from . import contexts | from . import contexts | ||||
from . import tokens | from . import tokens | ||||
from ..nodes.tag import Tag | |||||
from ..compat import htmlentities | from ..compat import htmlentities | ||||
__all__ = ["Tokenizer"] | __all__ = ["Tokenizer"] | ||||
@@ -420,6 +421,57 @@ class Tokenizer(object): | |||||
self._write(tokens.CommentEnd()) | self._write(tokens.CommentEnd()) | ||||
self._head += 2 | self._head += 2 | ||||
def _parse_tag(self): | |||||
"""Parse an HTML tag at the head of the wikicode string.""" | |||||
self._head += 1 | |||||
reset = self._head | |||||
self._push() | |||||
try: | |||||
t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN) | |||||
if not self_close: | |||||
t_body = self._parse(contexts.TAG_BODY) | |||||
t_close, c_pad = self._parse(contexts.TAG_CLOSE) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._pop() | |||||
self._write_text("<") | |||||
else: | |||||
self._pop() | |||||
self._write(tokens.TagOpenOpen(type=type_, showtag=False)) | |||||
self._write_all(t_open) | |||||
if self_close: | |||||
self._write(tokens.TagCloseSelfclose(padding=o_pad)) | |||||
else: | |||||
self._write(tokens.TagCloseOpen(padding=o_pad)) | |||||
self._write_all(t_body) | |||||
self._write(tokens.TagOpenClose()) | |||||
self._write_all(t_close) | |||||
self._write(tokens.TagCloseClose(padding=c_pad)) | |||||
def _handle_attribute(self): | |||||
if not self._context & contexts.TAG_ATTR: | |||||
## check name is valid | |||||
def _handle_attribute_name(self): | |||||
## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED | |||||
pass | |||||
def _handle_quoted_attribute_close(self): | |||||
pass | |||||
def _handle_tag_close_open(self): | |||||
pass ## .padding | |||||
def _handle_tag_selfclose(self): | |||||
pass ## .padding | |||||
def _handle_tag_close_open(self): | |||||
pass | |||||
def _handle_tag_close_close(self): | |||||
## check that the closing name is the same as the opening name | |||||
pass ## .padding | |||||
def _parse(self, context=0): | def _parse(self, context=0): | ||||
"""Parse the wikicode string, using *context* for when to stop.""" | """Parse the wikicode string, using *context* for when to stop.""" | ||||
self._push(context) | self._push(context) | ||||
@@ -432,7 +484,7 @@ class Tokenizer(object): | |||||
if this is self.END: | if this is self.END: | ||||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | | fail = (contexts.TEMPLATE | contexts.ARGUMENT | | ||||
contexts.WIKILINK | contexts.HEADING | | contexts.WIKILINK | contexts.HEADING | | ||||
contexts.COMMENT) | |||||
contexts.COMMENT | contexts.TAG) | |||||
if self._context & contexts.TEMPLATE_PARAM_KEY: | if self._context & contexts.TEMPLATE_PARAM_KEY: | ||||
self._pop() | self._pop() | ||||
if self._context & fail: | if self._context & fail: | ||||
@@ -484,6 +536,29 @@ class Tokenizer(object): | |||||
self._parse_comment() | self._parse_comment() | ||||
else: | else: | ||||
self._write_text(this) | self._write_text(this) | ||||
elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY): | |||||
self._parse_tag() | |||||
elif this == " " and (self._context & contexts.TAG_OPEN and not | |||||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||||
self._handle_attribute() | |||||
elif this == "=" and self._context & contexts.TAG_ATTR_NAME: | |||||
self._handle_attribute_name() | |||||
elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED: | |||||
self._handle_quoted_attribute_close() | |||||
elif this == "\n" and (self._context & contexts.TAG_OPEN and not | |||||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||||
self._fail_route() | |||||
elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not | |||||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||||
return self._handle_tag_close_open() | |||||
elif this == "/" and next == ">" and ( | |||||
self._context & contexts.TAG_ATTR_OPEN and not | |||||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||||
return self._handle_tag_selfclose() | |||||
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY: | |||||
self._handle_tag_close_open() | |||||
elif this == ">" and self._context & contexts.TAG_CLOSE: | |||||
self._handle_tag_close_close() | |||||
else: | else: | ||||
self._write_text(this) | self._write_text(this) | ||||
self._head += 1 | self._head += 1 | ||||