Browse Source

Starting tag work.

- Translation dict, contexts, parse_* and handle_* hooks in tokenizer.
tags/v0.3
Ben Kurtovic 11 years ago
parent
commit
d1a9ba9a34
4 changed files with 155 additions and 24 deletions
  1. +36
    -0
      mwparserfromhell/nodes/tag.py
  2. +43
    -22
      mwparserfromhell/parser/contexts.py
  3. +0
    -1
      mwparserfromhell/parser/tokenizer.c
  4. +76
    -1
      mwparserfromhell/parser/tokenizer.py

+ 36
- 0
mwparserfromhell/nodes/tag.py View File

@@ -73,6 +73,42 @@ class Tag(Node):
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE))
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE

TRANSLATIONS = {
"i": TAG_ITALIC,
"em": TAG_ITALIC,
"b": TAG_BOLD,
"strong": TAG_BOLD,
"u": TAG_UNDERLINE,
"s": TAG_STRIKETHROUGH,
"ul": TAG_UNORDERED_LIST,
"ol": TAG_ORDERED_LIST,
"dt": TAG_DEF_TERM,
"dd": TAG_DEF_ITEM,
"blockquote": TAG_BLOCKQUOTE,
"hl": TAG_RULE,
"br": TAG_BREAK,
"abbr": TAG_ABBR,
"pre": TAG_PRE,
"tt": TAG_MONOSPACE,
"code": TAG_CODE,
"span": TAG_SPAN,
"div": TAG_DIV,
"font": TAG_FONT,
"small": TAG_SMALL,
"big": TAG_BIG,
"center": TAG_CENTER,
"ref": TAG_REF,
"gallery": TAG_GALLERY,
"math": TAG_MATH,
"nowiki": TAG_NOWIKI,
"noinclude": TAG_NOINCLUDE,
"includeonly": TAG_INCLUDEONLY,
"onlyinclude": TAG_ONLYINCLUDE,
"syntaxhighlight": TAG_SYNTAXHIGHLIGHT,
"source": TAG_SYNTAXHIGHLIGHT,
"poem": TAG_POEM,
}

def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
self_closing=False, open_padding=0, close_padding=0):
super(Tag, self).__init__()


+ 43
- 22
mwparserfromhell/parser/contexts.py View File

@@ -62,35 +62,56 @@ Local (stack-specific) contexts:

* :py:const:`COMMENT`

Global contexts:
* :py:const:`TAG`

* :py:const:`GL_HEADING`
"""
* :py:const:`TAG_OPEN`
* :py:const:`TAG_ATTR`

# Local contexts:
* :py:const:`TAG_ATTR_NAME`
* :py:const:`TAG_ATTR_BODY`
* :py:const:`TAG_ATTR_BODY_QUOTED`

TEMPLATE = 0b00000000000111
TEMPLATE_NAME = 0b00000000000001
TEMPLATE_PARAM_KEY = 0b00000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000100
* :py:const:`TAG_BODY`
* :py:const:`TAG_CLOSE`

ARGUMENT = 0b00000000011000
ARGUMENT_NAME = 0b00000000001000
ARGUMENT_DEFAULT = 0b00000000010000
Global contexts:

WIKILINK = 0b00000001100000
WIKILINK_TITLE = 0b00000000100000
WIKILINK_TEXT = 0b00000001000000
* :py:const:`GL_HEADING`
"""

HEADING = 0b01111110000000
HEADING_LEVEL_1 = 0b00000010000000
HEADING_LEVEL_2 = 0b00000100000000
HEADING_LEVEL_3 = 0b00001000000000
HEADING_LEVEL_4 = 0b00010000000000
HEADING_LEVEL_5 = 0b00100000000000
HEADING_LEVEL_6 = 0b01000000000000
# Local contexts:

COMMENT = 0b10000000000000
TEMPLATE = 0b00000000000000000111
TEMPLATE_NAME = 0b00000000000000000001
TEMPLATE_PARAM_KEY = 0b00000000000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000000000100

ARGUMENT = 0b00000000000000011000
ARGUMENT_NAME = 0b00000000000000001000
ARGUMENT_DEFAULT = 0b00000000000000010000

WIKILINK = 0b00000000000001100000
WIKILINK_TITLE = 0b00000000000000100000
WIKILINK_TEXT = 0b00000000000001000000

HEADING = 0b00000001111110000000
HEADING_LEVEL_1 = 0b00000000000010000000
HEADING_LEVEL_2 = 0b00000000000100000000
HEADING_LEVEL_3 = 0b00000000001000000000
HEADING_LEVEL_4 = 0b00000000010000000000
HEADING_LEVEL_5 = 0b00000000100000000000
HEADING_LEVEL_6 = 0b00000001000000000000

COMMENT = 0b00000010000000000000

TAG = 0b11111100000000000000
TAG_OPEN = 0b00000100000000000000
TAG_ATTR = 0b00111000000000000000
TAG_ATTR_NAME = 0b00001000000000000000
TAG_ATTR_BODY = 0b00010000000000000000
TAG_ATTR_BODY_QUOTED = 0b00100000000000000000
TAG_BODY = 0b01000000000000000000
TAG_CLOSE = 0b10000000000000000000


# Global contexts:


+ 0
- 1
mwparserfromhell/parser/tokenizer.c View File

@@ -767,7 +767,6 @@ Tokenizer_parse_heading(Tokenizer* self)
self->global ^= GL_HEADING;
return 0;
}

level = PyInt_FromSsize_t(heading->level);
if (!level) {
Py_DECREF(heading->title);


+ 76
- 1
mwparserfromhell/parser/tokenizer.py View File

@@ -27,6 +27,7 @@ import string

from . import contexts
from . import tokens
from ..nodes.tag import Tag
from ..compat import htmlentities

__all__ = ["Tokenizer"]
@@ -420,6 +421,57 @@ class Tokenizer(object):
self._write(tokens.CommentEnd())
self._head += 2

def _parse_tag(self):
"""Parse an HTML tag at the head of the wikicode string."""
self._head += 1
reset = self._head
self._push()
try:
t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN)
if not self_close:
t_body = self._parse(contexts.TAG_BODY)
t_close, c_pad = self._parse(contexts.TAG_CLOSE)
except BadRoute:
self._head = reset
self._pop()
self._write_text("<")
else:
self._pop()
self._write(tokens.TagOpenOpen(type=type_, showtag=False))
self._write_all(t_open)
if self_close:
self._write(tokens.TagCloseSelfclose(padding=o_pad))
else:
self._write(tokens.TagCloseOpen(padding=o_pad))
self._write_all(t_body)
self._write(tokens.TagOpenClose())
self._write_all(t_close)
self._write(tokens.TagCloseClose(padding=c_pad))

def _handle_attribute(self):
if not self._context & contexts.TAG_ATTR:
## check name is valid

def _handle_attribute_name(self):
## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
pass

def _handle_quoted_attribute_close(self):
pass

def _handle_tag_close_open(self):
pass ## .padding

def _handle_tag_selfclose(self):
pass ## .padding

def _handle_tag_close_open(self):
pass

def _handle_tag_close_close(self):
## check that the closing name is the same as the opening name
pass ## .padding

def _parse(self, context=0):
"""Parse the wikicode string, using *context* for when to stop."""
self._push(context)
@@ -432,7 +484,7 @@ class Tokenizer(object):
if this is self.END:
fail = (contexts.TEMPLATE | contexts.ARGUMENT |
contexts.WIKILINK | contexts.HEADING |
contexts.COMMENT)
contexts.COMMENT | contexts.TAG)
if self._context & contexts.TEMPLATE_PARAM_KEY:
self._pop()
if self._context & fail:
@@ -484,6 +536,29 @@ class Tokenizer(object):
self._parse_comment()
else:
self._write_text(this)
elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY):
self._parse_tag()
elif this == " " and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
self._handle_attribute()
elif this == "=" and self._context & contexts.TAG_ATTR_NAME:
self._handle_attribute_name()
elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
self._handle_quoted_attribute_close()
elif this == "\n" and (self._context & contexts.TAG_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
self._fail_route()
elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_close_open()
elif this == "/" and next == ">" and (
self._context & contexts.TAG_ATTR_OPEN and not
self._context & contexts.TAG_ATTR_BODY_QUOTED):
return self._handle_tag_selfclose()
elif this == "<" and next == "/" and self._context & contexts.TAG_BODY:
self._handle_tag_close_open()
elif this == ">" and self._context & contexts.TAG_CLOSE:
self._handle_tag_close_close()
else:
self._write_text(this)
self._head += 1


Loading…
Cancel
Save