- Translation dict, contexts, parse_* and handle_* hooks in tokenizer.tags/v0.3
@@ -73,6 +73,42 @@ class Tag(Node): | |||
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | |||
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | |||
TRANSLATIONS = { | |||
"i": TAG_ITALIC, | |||
"em": TAG_ITALIC, | |||
"b": TAG_BOLD, | |||
"strong": TAG_BOLD, | |||
"u": TAG_UNDERLINE, | |||
"s": TAG_STRIKETHROUGH, | |||
"ul": TAG_UNORDERED_LIST, | |||
"ol": TAG_ORDERED_LIST, | |||
"dt": TAG_DEF_TERM, | |||
"dd": TAG_DEF_ITEM, | |||
"blockquote": TAG_BLOCKQUOTE, | |||
"hl": TAG_RULE, | |||
"br": TAG_BREAK, | |||
"abbr": TAG_ABBR, | |||
"pre": TAG_PRE, | |||
"tt": TAG_MONOSPACE, | |||
"code": TAG_CODE, | |||
"span": TAG_SPAN, | |||
"div": TAG_DIV, | |||
"font": TAG_FONT, | |||
"small": TAG_SMALL, | |||
"big": TAG_BIG, | |||
"center": TAG_CENTER, | |||
"ref": TAG_REF, | |||
"gallery": TAG_GALLERY, | |||
"math": TAG_MATH, | |||
"nowiki": TAG_NOWIKI, | |||
"noinclude": TAG_NOINCLUDE, | |||
"includeonly": TAG_INCLUDEONLY, | |||
"onlyinclude": TAG_ONLYINCLUDE, | |||
"syntaxhighlight": TAG_SYNTAXHIGHLIGHT, | |||
"source": TAG_SYNTAXHIGHLIGHT, | |||
"poem": TAG_POEM, | |||
} | |||
def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | |||
self_closing=False, open_padding=0, close_padding=0): | |||
super(Tag, self).__init__() | |||
@@ -62,35 +62,56 @@ Local (stack-specific) contexts: | |||
* :py:const:`COMMENT` | |||
Global contexts: | |||
* :py:const:`TAG` | |||
* :py:const:`GL_HEADING` | |||
""" | |||
* :py:const:`TAG_OPEN` | |||
* :py:const:`TAG_ATTR` | |||
# Local contexts: | |||
* :py:const:`TAG_ATTR_NAME` | |||
* :py:const:`TAG_ATTR_BODY` | |||
* :py:const:`TAG_ATTR_BODY_QUOTED` | |||
TEMPLATE = 0b00000000000111 | |||
TEMPLATE_NAME = 0b00000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||
* :py:const:`TAG_BODY` | |||
* :py:const:`TAG_CLOSE` | |||
ARGUMENT = 0b00000000011000 | |||
ARGUMENT_NAME = 0b00000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000010000 | |||
Global contexts: | |||
WIKILINK = 0b00000001100000 | |||
WIKILINK_TITLE = 0b00000000100000 | |||
WIKILINK_TEXT = 0b00000001000000 | |||
* :py:const:`GL_HEADING` | |||
""" | |||
HEADING = 0b01111110000000 | |||
HEADING_LEVEL_1 = 0b00000010000000 | |||
HEADING_LEVEL_2 = 0b00000100000000 | |||
HEADING_LEVEL_3 = 0b00001000000000 | |||
HEADING_LEVEL_4 = 0b00010000000000 | |||
HEADING_LEVEL_5 = 0b00100000000000 | |||
HEADING_LEVEL_6 = 0b01000000000000 | |||
# Local contexts: | |||
COMMENT = 0b10000000000000 | |||
TEMPLATE = 0b00000000000000000111 | |||
TEMPLATE_NAME = 0b00000000000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000000000100 | |||
ARGUMENT = 0b00000000000000011000 | |||
ARGUMENT_NAME = 0b00000000000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000000000010000 | |||
WIKILINK = 0b00000000000001100000 | |||
WIKILINK_TITLE = 0b00000000000000100000 | |||
WIKILINK_TEXT = 0b00000000000001000000 | |||
HEADING = 0b00000001111110000000 | |||
HEADING_LEVEL_1 = 0b00000000000010000000 | |||
HEADING_LEVEL_2 = 0b00000000000100000000 | |||
HEADING_LEVEL_3 = 0b00000000001000000000 | |||
HEADING_LEVEL_4 = 0b00000000010000000000 | |||
HEADING_LEVEL_5 = 0b00000000100000000000 | |||
HEADING_LEVEL_6 = 0b00000001000000000000 | |||
COMMENT = 0b00000010000000000000 | |||
TAG = 0b11111100000000000000 | |||
TAG_OPEN = 0b00000100000000000000 | |||
TAG_ATTR = 0b00111000000000000000 | |||
TAG_ATTR_NAME = 0b00001000000000000000 | |||
TAG_ATTR_BODY = 0b00010000000000000000 | |||
TAG_ATTR_BODY_QUOTED = 0b00100000000000000000 | |||
TAG_BODY = 0b01000000000000000000 | |||
TAG_CLOSE = 0b10000000000000000000 | |||
# Global contexts: | |||
@@ -767,7 +767,6 @@ Tokenizer_parse_heading(Tokenizer* self) | |||
self->global ^= GL_HEADING; | |||
return 0; | |||
} | |||
level = PyInt_FromSsize_t(heading->level); | |||
if (!level) { | |||
Py_DECREF(heading->title); | |||
@@ -27,6 +27,7 @@ import string | |||
from . import contexts | |||
from . import tokens | |||
from ..nodes.tag import Tag | |||
from ..compat import htmlentities | |||
__all__ = ["Tokenizer"] | |||
@@ -420,6 +421,57 @@ class Tokenizer(object): | |||
self._write(tokens.CommentEnd()) | |||
self._head += 2 | |||
def _parse_tag(self): | |||
"""Parse an HTML tag at the head of the wikicode string.""" | |||
self._head += 1 | |||
reset = self._head | |||
self._push() | |||
try: | |||
t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN) | |||
if not self_close: | |||
t_body = self._parse(contexts.TAG_BODY) | |||
t_close, c_pad = self._parse(contexts.TAG_CLOSE) | |||
except BadRoute: | |||
self._head = reset | |||
self._pop() | |||
self._write_text("<") | |||
else: | |||
self._pop() | |||
self._write(tokens.TagOpenOpen(type=type_, showtag=False)) | |||
self._write_all(t_open) | |||
if self_close: | |||
self._write(tokens.TagCloseSelfclose(padding=o_pad)) | |||
else: | |||
self._write(tokens.TagCloseOpen(padding=o_pad)) | |||
self._write_all(t_body) | |||
self._write(tokens.TagOpenClose()) | |||
self._write_all(t_close) | |||
self._write(tokens.TagCloseClose(padding=c_pad)) | |||
def _handle_attribute(self): | |||
if not self._context & contexts.TAG_ATTR: | |||
## check name is valid | |||
def _handle_attribute_name(self): | |||
## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED | |||
pass | |||
def _handle_quoted_attribute_close(self): | |||
pass | |||
def _handle_tag_close_open(self): | |||
pass ## .padding | |||
def _handle_tag_selfclose(self): | |||
pass ## .padding | |||
def _handle_tag_close_open(self): | |||
pass | |||
def _handle_tag_close_close(self): | |||
## check that the closing name is the same as the opening name | |||
pass ## .padding | |||
def _parse(self, context=0): | |||
"""Parse the wikicode string, using *context* for when to stop.""" | |||
self._push(context) | |||
@@ -432,7 +484,7 @@ class Tokenizer(object): | |||
if this is self.END: | |||
fail = (contexts.TEMPLATE | contexts.ARGUMENT | | |||
contexts.WIKILINK | contexts.HEADING | | |||
contexts.COMMENT) | |||
contexts.COMMENT | contexts.TAG) | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._pop() | |||
if self._context & fail: | |||
@@ -484,6 +536,29 @@ class Tokenizer(object): | |||
self._parse_comment() | |||
else: | |||
self._write_text(this) | |||
elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY): | |||
self._parse_tag() | |||
elif this == " " and (self._context & contexts.TAG_OPEN and not | |||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||
self._handle_attribute() | |||
elif this == "=" and self._context & contexts.TAG_ATTR_NAME: | |||
self._handle_attribute_name() | |||
elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED: | |||
self._handle_quoted_attribute_close() | |||
elif this == "\n" and (self._context & contexts.TAG_OPEN and not | |||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||
self._fail_route() | |||
elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not | |||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||
return self._handle_tag_close_open() | |||
elif this == "/" and next == ">" and ( | |||
self._context & contexts.TAG_ATTR_OPEN and not | |||
self._context & contexts.TAG_ATTR_BODY_QUOTED): | |||
return self._handle_tag_selfclose() | |||
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY: | |||
self._handle_tag_close_open() | |||
elif this == ">" and self._context & contexts.TAG_CLOSE: | |||
self._handle_tag_close_close() | |||
else: | |||
self._write_text(this) | |||
self._head += 1 | |||