@@ -51,11 +51,11 @@ Local (stack-specific) contexts: | |||
* :py:const:`WIKILINK_TITLE` | |||
* :py:const:`WIKILINK_TEXT` | |||
* :py:const:`EXTERNAL_LINK` | |||
* :py:const:`EXT_LINK` | |||
* :py:const:`EXTERNAL_LINK_URL` | |||
* :py:const:`EXTERNAL_LINK_TITLE` | |||
* :py:const:`EXTERNAL_LINK_BRACKETS` | |||
* :py:const:`EXT_LINK_URL` | |||
* :py:const:`EXT_LINK_TITLE` | |||
* :py:const:`EXT_LINK_BRACKETS` | |||
* :py:const:`HEADING` | |||
@@ -100,6 +100,7 @@ Aggregate contexts: | |||
* :py:const:`FAIL` | |||
* :py:const:`UNSAFE` | |||
* :py:const:`DOUBLE` | |||
* :py:const:`INVALID_LINK` | |||
""" | |||
@@ -118,10 +119,10 @@ WIKILINK_TITLE = 1 << 5 | |||
WIKILINK_TEXT = 1 << 6 | |||
WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | |||
EXTERNAL_LINK_URL = 1 << 7 | |||
EXTERNAL_LINK_TITLE = 1 << 8 | |||
EXTERNAL_LINK_BRACKETS = 1 << 9 | |||
EXTERNAL_LINK = EXTERNAL_LINK_URL + EXTERNAL_LINK_TITLE | |||
EXT_LINK_URL = 1 << 7 | |||
EXT_LINK_TITLE = 1 << 8 | |||
EXT_LINK_BRACKETS = 1 << 9 | |||
EXT_LINK = EXT_LINK_URL + EXT_LINK_TITLE + EXT_LINK_BRACKETS | |||
HEADING_LEVEL_1 = 1 << 10 | |||
HEADING_LEVEL_2 = 1 << 11 | |||
@@ -161,7 +162,8 @@ GL_HEADING = 1 << 0 | |||
# Aggregate contexts: | |||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXTERNAL_LINK + HEADING + TAG + STYLE | |||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK + HEADING + TAG + STYLE | |||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + TEMPLATE_PARAM_KEY + ARGUMENT_NAME + | |||
TAG_CLOSE) | |||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | |||
INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URL |
@@ -2192,9 +2192,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
if (Tokenizer_emit_char(self, this)) | |||
return NULL; | |||
} | |||
else if (this == next && next == *"[") { | |||
if (!(this_context & LC_WIKILINK_TITLE) && | |||
Tokenizer_CAN_RECURSE(self)) { | |||
else if (this == next && next == *"[" && Tokenizer_CAN_RECURSE(self)) { | |||
if (!(this_context & AGG_INVALID_LINK)) { | |||
if (Tokenizer_parse_wikilink(self)) | |||
return NULL; | |||
} | |||
@@ -2243,9 +2242,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||
return NULL; | |||
} | |||
} | |||
else if (this == *"<") { | |||
if (!(this_context & LC_TAG_CLOSE) && | |||
Tokenizer_CAN_RECURSE(self)) { | |||
else if (this == *"<" && !(this_context & LC_TAG_CLOSE)) { | |||
if (Tokenizer_CAN_RECURSE(self)) { | |||
if (Tokenizer_parse_tag(self)) | |||
return NULL; | |||
} | |||
@@ -2389,6 +2387,11 @@ static int load_tokens(void) | |||
WikilinkSeparator = PyObject_GetAttrString(tokens, "WikilinkSeparator"); | |||
WikilinkClose = PyObject_GetAttrString(tokens, "WikilinkClose"); | |||
ExternalLinkOpen = PyObject_GetAttrString(tokens, "ExternalLinkOpen"); | |||
ExternalLinkSeparator = PyObject_GetAttrString(tokens, | |||
"ExternalLinkSeparator"); | |||
ExternalLinkClose = PyObject_GetAttrString(tokens, "ExternalLinkClose"); | |||
HTMLEntityStart = PyObject_GetAttrString(tokens, "HTMLEntityStart"); | |||
HTMLEntityNumeric = PyObject_GetAttrString(tokens, "HTMLEntityNumeric"); | |||
HTMLEntityHex = PyObject_GetAttrString(tokens, "HTMLEntityHex"); | |||
@@ -82,6 +82,10 @@ static PyObject* WikilinkOpen; | |||
static PyObject* WikilinkSeparator; | |||
static PyObject* WikilinkClose; | |||
static PyObject* ExternalLinkOpen; | |||
static PyObject* ExternalLinkSeparator; | |||
static PyObject* ExternalLinkClose; | |||
static PyObject* HTMLEntityStart; | |||
static PyObject* HTMLEntityNumeric; | |||
static PyObject* HTMLEntityHex; | |||
@@ -104,48 +108,53 @@ static PyObject* TagCloseClose; | |||
/* Local contexts: */ | |||
#define LC_TEMPLATE 0x0000007 | |||
#define LC_TEMPLATE_NAME 0x0000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x0000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x0000004 | |||
#define LC_ARGUMENT 0x0000018 | |||
#define LC_ARGUMENT_NAME 0x0000008 | |||
#define LC_ARGUMENT_DEFAULT 0x0000010 | |||
#define LC_WIKILINK 0x0000060 | |||
#define LC_WIKILINK_TITLE 0x0000020 | |||
#define LC_WIKILINK_TEXT 0x0000040 | |||
#define LC_HEADING 0x0001F80 | |||
#define LC_HEADING_LEVEL_1 0x0000080 | |||
#define LC_HEADING_LEVEL_2 0x0000100 | |||
#define LC_HEADING_LEVEL_3 0x0000200 | |||
#define LC_HEADING_LEVEL_4 0x0000400 | |||
#define LC_HEADING_LEVEL_5 0x0000800 | |||
#define LC_HEADING_LEVEL_6 0x0001000 | |||
#define LC_TAG 0x001E000 | |||
#define LC_TAG_OPEN 0x0002000 | |||
#define LC_TAG_ATTR 0x0004000 | |||
#define LC_TAG_BODY 0x0008000 | |||
#define LC_TAG_CLOSE 0x0010000 | |||
#define LC_STYLE 0x01E0000 | |||
#define LC_STYLE_ITALICS 0x0020000 | |||
#define LC_STYLE_BOLD 0x0040000 | |||
#define LC_STYLE_PASS_AGAIN 0x0080000 | |||
#define LC_STYLE_SECOND_PASS 0x0100000 | |||
#define LC_DLTERM 0x0200000 | |||
#define LC_SAFETY_CHECK 0xFC00000 | |||
#define LC_HAS_TEXT 0x0400000 | |||
#define LC_FAIL_ON_TEXT 0x0800000 | |||
#define LC_FAIL_NEXT 0x1000000 | |||
#define LC_FAIL_ON_LBRACE 0x2000000 | |||
#define LC_FAIL_ON_RBRACE 0x4000000 | |||
#define LC_FAIL_ON_EQUALS 0x8000000 | |||
#define LC_TEMPLATE 0x00000007 | |||
#define LC_TEMPLATE_NAME 0x00000001 | |||
#define LC_TEMPLATE_PARAM_KEY 0x00000002 | |||
#define LC_TEMPLATE_PARAM_VALUE 0x00000004 | |||
#define LC_ARGUMENT 0x00000018 | |||
#define LC_ARGUMENT_NAME 0x00000008 | |||
#define LC_ARGUMENT_DEFAULT 0x00000010 | |||
#define LC_WIKILINK 0x00000060 | |||
#define LC_WIKILINK_TITLE 0x00000020 | |||
#define LC_WIKILINK_TEXT 0x00000040 | |||
#define LC_EXT_LINK 0x00000380 | |||
#define LC_EXT_LINK_URL 0x00000080 | |||
#define LC_EXT_LINK_TITLE 0x00000100 | |||
#define LC_EXT_LINK_BRACKETS 0x00000200 | |||
#define LC_HEADING 0x0000FC00 | |||
#define LC_HEADING_LEVEL_1 0x00000400 | |||
#define LC_HEADING_LEVEL_2 0x00000800 | |||
#define LC_HEADING_LEVEL_3 0x00001000 | |||
#define LC_HEADING_LEVEL_4 0x00002000 | |||
#define LC_HEADING_LEVEL_5 0x00004000 | |||
#define LC_HEADING_LEVEL_6 0x00008000 | |||
#define LC_TAG 0x000F0000 | |||
#define LC_TAG_OPEN 0x00010000 | |||
#define LC_TAG_ATTR 0x00020000 | |||
#define LC_TAG_BODY 0x00040000 | |||
#define LC_TAG_CLOSE 0x00080000 | |||
#define LC_STYLE 0x00F00000 | |||
#define LC_STYLE_ITALICS 0x00100000 | |||
#define LC_STYLE_BOLD 0x00200000 | |||
#define LC_STYLE_PASS_AGAIN 0x00400000 | |||
#define LC_STYLE_SECOND_PASS 0x00800000 | |||
#define LC_DLTERM 0x01000000 | |||
#define LC_SAFETY_CHECK 0x7E000000 | |||
#define LC_HAS_TEXT 0x02000000 | |||
#define LC_FAIL_ON_TEXT 0x04000000 | |||
#define LC_FAIL_NEXT 0x08000000 | |||
#define LC_FAIL_ON_LBRACE 0x10000000 | |||
#define LC_FAIL_ON_RBRACE 0x20000000 | |||
#define LC_FAIL_ON_EQUALS 0x40000000 | |||
/* Global contexts: */ | |||
@@ -153,9 +162,10 @@ static PyObject* TagCloseClose; | |||
/* Aggregate contexts: */ | |||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URL) | |||
/* Tag contexts: */ | |||
@@ -311,6 +311,11 @@ class Tokenizer(object): | |||
self._head += 1 | |||
return self._pop() | |||
def _parse_external_link(self, brackets): | |||
"""Parse an external link at the head of the wikicode string.""" | |||
self._emit_text(self._read()) | |||
# raise NotImplementedError() | |||
def _parse_heading(self): | |||
"""Parse a section heading at the head of the wikicode string.""" | |||
self._global |= contexts.GL_HEADING | |||
@@ -898,8 +903,8 @@ class Tokenizer(object): | |||
return self._handle_argument_end() | |||
else: | |||
self._emit_text("}") | |||
elif this == next == "[": | |||
if not self._context & contexts.WIKILINK_TITLE and self._can_recurse(): | |||
elif this == next == "[" and self._can_recurse(): | |||
if not self._context & contexts.INVALID_LINK: | |||
self._parse_wikilink() | |||
else: | |||
self._emit_text("[") | |||
@@ -907,6 +912,11 @@ class Tokenizer(object): | |||
self._handle_wikilink_separator() | |||
elif this == next == "]" and self._context & contexts.WIKILINK: | |||
return self._handle_wikilink_end() | |||
elif this == "[" and not self._context & contexts.INVALID_LINK: ## or this == ":" | |||
if self._can_recurse(): | |||
self._parse_external_link(brackets=this == "[") | |||
else: | |||
self._emit_text("[") | |||
elif this == "=" and not self._global & contexts.GL_HEADING: | |||
if self._read(-1) in ("\n", self.START): | |||
self._parse_heading() | |||
@@ -928,8 +938,8 @@ class Tokenizer(object): | |||
self._handle_tag_open_close() | |||
else: | |||
self._handle_invalid_tag_start() | |||
elif this == "<": | |||
if not self._context & contexts.TAG_CLOSE and self._can_recurse(): | |||
elif this == "<" and not self._context & contexts.TAG_CLOSE: | |||
if self._can_recurse(): | |||
self._parse_tag() | |||
else: | |||
self._emit_text("<") | |||
@@ -12,6 +12,13 @@ output: [TemplateOpen(), ArgumentOpen(), ArgumentOpen(), Text(text="foo"), Argum | |||
--- | |||
name: link_in_template_name | |||
label: a wikilink inside a template name, which breaks the template | |||
input: "{{foo[[bar]]}}" | |||
output: [Text(text="{{foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="}}")] | |||
--- | |||
name: rich_heading | |||
label: a heading with templates/wikilinks in it | |||
input: "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} ==" | |||