Starting tag work.

- Translation dict, contexts, parse_* and handle_* hooks in tokenizer.
vor 12 Jahren · d1a9ba9a34
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -73,6 +73,42 @@ class Tag(Node):
    TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE))
    TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE

    TRANSLATIONS = {
        "i": TAG_ITALIC,
        "em": TAG_ITALIC,
        "b": TAG_BOLD,
        "strong": TAG_BOLD,
        "u": TAG_UNDERLINE,
        "s": TAG_STRIKETHROUGH,
        "ul": TAG_UNORDERED_LIST,
        "ol": TAG_ORDERED_LIST,
        "dt": TAG_DEF_TERM,
        "dd": TAG_DEF_ITEM,
        "blockquote": TAG_BLOCKQUOTE,
        "hl": TAG_RULE,
        "br": TAG_BREAK,
        "abbr": TAG_ABBR,
        "pre": TAG_PRE,
        "tt": TAG_MONOSPACE,
        "code": TAG_CODE,
        "span": TAG_SPAN,
        "div": TAG_DIV,
        "font": TAG_FONT,
        "small": TAG_SMALL,
        "big": TAG_BIG,
        "center": TAG_CENTER,
        "ref": TAG_REF,
        "gallery": TAG_GALLERY,
        "math": TAG_MATH,
        "nowiki": TAG_NOWIKI,
        "noinclude": TAG_NOINCLUDE,
        "includeonly": TAG_INCLUDEONLY,
        "onlyinclude": TAG_ONLYINCLUDE,
        "syntaxhighlight": TAG_SYNTAXHIGHLIGHT,
        "source": TAG_SYNTAXHIGHLIGHT,
        "poem": TAG_POEM,
    }

    def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
                 self_closing=False, open_padding=0, close_padding=0):
        super(Tag, self).__init__()
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -62,35 +62,56 @@ Local (stack-specific) contexts:

 * :py:const:`COMMENT`

 Global contexts:
 * :py:const:`TAG`

 * :py:const:`GL_HEADING`
 """
    * :py:const:`TAG_OPEN`
    * :py:const:`TAG_ATTR`

 # Local contexts:
        * :py:const:`TAG_ATTR_NAME`
        * :py:const:`TAG_ATTR_BODY`
        * :py:const:`TAG_ATTR_BODY_QUOTED`

 TEMPLATE =              0b00000000000111
 TEMPLATE_NAME =         0b00000000000001
 TEMPLATE_PARAM_KEY =    0b00000000000010
 TEMPLATE_PARAM_VALUE =  0b00000000000100
    * :py:const:`TAG_BODY`
    * :py:const:`TAG_CLOSE`

 ARGUMENT =              0b00000000011000
 ARGUMENT_NAME =         0b00000000001000
 ARGUMENT_DEFAULT =      0b00000000010000
 Global contexts:

 WIKILINK =              0b00000001100000
 WIKILINK_TITLE =        0b00000000100000
 WIKILINK_TEXT =         0b00000001000000
 * :py:const:`GL_HEADING`
 """

 HEADING =               0b01111110000000
 HEADING_LEVEL_1 =       0b00000010000000
 HEADING_LEVEL_2 =       0b00000100000000
 HEADING_LEVEL_3 =       0b00001000000000
 HEADING_LEVEL_4 =       0b00010000000000
 HEADING_LEVEL_5 =       0b00100000000000
 HEADING_LEVEL_6 =       0b01000000000000
 # Local contexts:

 COMMENT =               0b10000000000000
 TEMPLATE =              0b00000000000000000111
 TEMPLATE_NAME =         0b00000000000000000001
 TEMPLATE_PARAM_KEY =    0b00000000000000000010
 TEMPLATE_PARAM_VALUE =  0b00000000000000000100

 ARGUMENT =              0b00000000000000011000
 ARGUMENT_NAME =         0b00000000000000001000
 ARGUMENT_DEFAULT =      0b00000000000000010000

 WIKILINK =              0b00000000000001100000
 WIKILINK_TITLE =        0b00000000000000100000
 WIKILINK_TEXT =         0b00000000000001000000

 HEADING =               0b00000001111110000000
 HEADING_LEVEL_1 =       0b00000000000010000000
 HEADING_LEVEL_2 =       0b00000000000100000000
 HEADING_LEVEL_3 =       0b00000000001000000000
 HEADING_LEVEL_4 =       0b00000000010000000000
 HEADING_LEVEL_5 =       0b00000000100000000000
 HEADING_LEVEL_6 =       0b00000001000000000000

 COMMENT =               0b00000010000000000000

 TAG =                   0b11111100000000000000
 TAG_OPEN =              0b00000100000000000000
 TAG_ATTR =              0b00111000000000000000
 TAG_ATTR_NAME =         0b00001000000000000000
 TAG_ATTR_BODY =         0b00010000000000000000
 TAG_ATTR_BODY_QUOTED =  0b00100000000000000000
 TAG_BODY =              0b01000000000000000000
 TAG_CLOSE =             0b10000000000000000000


 # Global contexts:
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -767,7 +767,6 @@ Tokenizer_parse_heading(Tokenizer* self)
        self->global ^= GL_HEADING;
        return 0;
    }

    level = PyInt_FromSsize_t(heading->level);
    if (!level) {
        Py_DECREF(heading->title);
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -27,6 +27,7 @@ import string

 from . import contexts
 from . import tokens
 from ..nodes.tag import Tag
 from ..compat import htmlentities

 __all__ = ["Tokenizer"]
@@ -420,6 +421,57 @@ class Tokenizer(object):
            self._write(tokens.CommentEnd())
            self._head += 2

    def _parse_tag(self):
        """Parse an HTML tag at the head of the wikicode string."""
        self._head += 1
        reset = self._head
        self._push()
        try:
            t_open, type_, self_close, o_pad = self._parse(contexts.TAG_OPEN)
            if not self_close:
                t_body = self._parse(contexts.TAG_BODY)
                t_close, c_pad = self._parse(contexts.TAG_CLOSE)
        except BadRoute:
            self._head = reset
            self._pop()
            self._write_text("<")
        else:
            self._pop()
            self._write(tokens.TagOpenOpen(type=type_, showtag=False))
            self._write_all(t_open)
            if self_close:
                self._write(tokens.TagCloseSelfclose(padding=o_pad))
            else:
                self._write(tokens.TagCloseOpen(padding=o_pad))
                self._write_all(t_body)
                self._write(tokens.TagOpenClose())
                self._write_all(t_close)
                self._write(tokens.TagCloseClose(padding=c_pad))

    def _handle_attribute(self):
        if not self._context & contexts.TAG_ATTR:
            ## check name is valid

    def _handle_attribute_name(self):
        ## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
        pass

    def _handle_quoted_attribute_close(self):
        pass

    def _handle_tag_close_open(self):
        pass  ## .padding

    def _handle_tag_selfclose(self):
        pass  ## .padding

    def _handle_tag_close_open(self):
        pass

    def _handle_tag_close_close(self):
        ## check that the closing name is the same as the opening name
        pass  ## .padding

    def _parse(self, context=0):
        """Parse the wikicode string, using *context* for when to stop."""
        self._push(context)
@@ -432,7 +484,7 @@ class Tokenizer(object):
            if this is self.END:
                fail = (contexts.TEMPLATE | contexts.ARGUMENT |
                        contexts.WIKILINK | contexts.HEADING |
                        contexts.COMMENT)
                        contexts.COMMENT | contexts.TAG)
                if self._context & contexts.TEMPLATE_PARAM_KEY:
                    self._pop()
                if self._context & fail:
@@ -484,6 +536,29 @@ class Tokenizer(object):
                    self._parse_comment()
                else:
                    self._write_text(this)
            elif this == "<" and not self._context & (contexts.TAG ^ contexts.TAG_BODY):
                self._parse_tag()
            elif this == " " and (self._context & contexts.TAG_OPEN and not
                                  self._context & contexts.TAG_ATTR_BODY_QUOTED):
                self._handle_attribute()
            elif this == "=" and self._context & contexts.TAG_ATTR_NAME:
                self._handle_attribute_name()
            elif this == '"' and self._context & contexts.TAG_ATTR_BODY_QUOTED:
                self._handle_quoted_attribute_close()
            elif this == "\n" and (self._context & contexts.TAG_OPEN and not
                                  self._context & contexts.TAG_ATTR_BODY_QUOTED):
                self._fail_route()
            elif this == ">" and (self._context & contexts.TAG_ATTR_OPEN and not
                                  self._context & contexts.TAG_ATTR_BODY_QUOTED):
                return self._handle_tag_close_open()
            elif this == "/" and next == ">" and (
                            self._context & contexts.TAG_ATTR_OPEN and not
                            self._context & contexts.TAG_ATTR_BODY_QUOTED):
                return self._handle_tag_selfclose()
            elif this == "<" and next == "/" and self._context & contexts.TAG_BODY:
                self._handle_tag_close_open()
            elif this == ">" and self._context & contexts.TAG_CLOSE:
                self._handle_tag_close_close()
            else:
                self._write_text(this)
            self._head += 1