@@ -51,11 +51,11 @@ Local (stack-specific) contexts: | |||||
* :py:const:`WIKILINK_TITLE` | * :py:const:`WIKILINK_TITLE` | ||||
* :py:const:`WIKILINK_TEXT` | * :py:const:`WIKILINK_TEXT` | ||||
* :py:const:`EXTERNAL_LINK` | |||||
* :py:const:`EXT_LINK` | |||||
* :py:const:`EXTERNAL_LINK_URL` | |||||
* :py:const:`EXTERNAL_LINK_TITLE` | |||||
* :py:const:`EXTERNAL_LINK_BRACKETS` | |||||
* :py:const:`EXT_LINK_URL` | |||||
* :py:const:`EXT_LINK_TITLE` | |||||
* :py:const:`EXT_LINK_BRACKETS` | |||||
* :py:const:`HEADING` | * :py:const:`HEADING` | ||||
@@ -100,6 +100,7 @@ Aggregate contexts: | |||||
* :py:const:`FAIL` | * :py:const:`FAIL` | ||||
* :py:const:`UNSAFE` | * :py:const:`UNSAFE` | ||||
* :py:const:`DOUBLE` | * :py:const:`DOUBLE` | ||||
* :py:const:`INVALID_LINK` | |||||
""" | """ | ||||
@@ -118,10 +119,10 @@ WIKILINK_TITLE = 1 << 5 | |||||
WIKILINK_TEXT = 1 << 6 | WIKILINK_TEXT = 1 << 6 | ||||
WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | ||||
EXTERNAL_LINK_URL = 1 << 7 | |||||
EXTERNAL_LINK_TITLE = 1 << 8 | |||||
EXTERNAL_LINK_BRACKETS = 1 << 9 | |||||
EXTERNAL_LINK = EXTERNAL_LINK_URL + EXTERNAL_LINK_TITLE | |||||
EXT_LINK_URL = 1 << 7 | |||||
EXT_LINK_TITLE = 1 << 8 | |||||
EXT_LINK_BRACKETS = 1 << 9 | |||||
EXT_LINK = EXT_LINK_URL + EXT_LINK_TITLE + EXT_LINK_BRACKETS | |||||
HEADING_LEVEL_1 = 1 << 10 | HEADING_LEVEL_1 = 1 << 10 | ||||
HEADING_LEVEL_2 = 1 << 11 | HEADING_LEVEL_2 = 1 << 11 | ||||
@@ -161,7 +162,8 @@ GL_HEADING = 1 << 0 | |||||
# Aggregate contexts: | # Aggregate contexts: | ||||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXTERNAL_LINK + HEADING + TAG + STYLE | |||||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK + HEADING + TAG + STYLE | |||||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + TEMPLATE_PARAM_KEY + ARGUMENT_NAME + | UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + TEMPLATE_PARAM_KEY + ARGUMENT_NAME + | ||||
TAG_CLOSE) | TAG_CLOSE) | ||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | ||||
INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URL |
@@ -2192,9 +2192,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
if (Tokenizer_emit_char(self, this)) | if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == next && next == *"[") { | |||||
if (!(this_context & LC_WIKILINK_TITLE) && | |||||
Tokenizer_CAN_RECURSE(self)) { | |||||
else if (this == next && next == *"[" && Tokenizer_CAN_RECURSE(self)) { | |||||
if (!(this_context & AGG_INVALID_LINK)) { | |||||
if (Tokenizer_parse_wikilink(self)) | if (Tokenizer_parse_wikilink(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
@@ -2243,9 +2242,8 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
} | } | ||||
else if (this == *"<") { | |||||
if (!(this_context & LC_TAG_CLOSE) && | |||||
Tokenizer_CAN_RECURSE(self)) { | |||||
else if (this == *"<" && !(this_context & LC_TAG_CLOSE)) { | |||||
if (Tokenizer_CAN_RECURSE(self)) { | |||||
if (Tokenizer_parse_tag(self)) | if (Tokenizer_parse_tag(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
@@ -2389,6 +2387,11 @@ static int load_tokens(void) | |||||
WikilinkSeparator = PyObject_GetAttrString(tokens, "WikilinkSeparator"); | WikilinkSeparator = PyObject_GetAttrString(tokens, "WikilinkSeparator"); | ||||
WikilinkClose = PyObject_GetAttrString(tokens, "WikilinkClose"); | WikilinkClose = PyObject_GetAttrString(tokens, "WikilinkClose"); | ||||
ExternalLinkOpen = PyObject_GetAttrString(tokens, "ExternalLinkOpen"); | |||||
ExternalLinkSeparator = PyObject_GetAttrString(tokens, | |||||
"ExternalLinkSeparator"); | |||||
ExternalLinkClose = PyObject_GetAttrString(tokens, "ExternalLinkClose"); | |||||
HTMLEntityStart = PyObject_GetAttrString(tokens, "HTMLEntityStart"); | HTMLEntityStart = PyObject_GetAttrString(tokens, "HTMLEntityStart"); | ||||
HTMLEntityNumeric = PyObject_GetAttrString(tokens, "HTMLEntityNumeric"); | HTMLEntityNumeric = PyObject_GetAttrString(tokens, "HTMLEntityNumeric"); | ||||
HTMLEntityHex = PyObject_GetAttrString(tokens, "HTMLEntityHex"); | HTMLEntityHex = PyObject_GetAttrString(tokens, "HTMLEntityHex"); | ||||
@@ -82,6 +82,10 @@ static PyObject* WikilinkOpen; | |||||
static PyObject* WikilinkSeparator; | static PyObject* WikilinkSeparator; | ||||
static PyObject* WikilinkClose; | static PyObject* WikilinkClose; | ||||
static PyObject* ExternalLinkOpen; | |||||
static PyObject* ExternalLinkSeparator; | |||||
static PyObject* ExternalLinkClose; | |||||
static PyObject* HTMLEntityStart; | static PyObject* HTMLEntityStart; | ||||
static PyObject* HTMLEntityNumeric; | static PyObject* HTMLEntityNumeric; | ||||
static PyObject* HTMLEntityHex; | static PyObject* HTMLEntityHex; | ||||
@@ -104,48 +108,53 @@ static PyObject* TagCloseClose; | |||||
/* Local contexts: */ | /* Local contexts: */ | ||||
#define LC_TEMPLATE 0x0000007 | |||||
#define LC_TEMPLATE_NAME 0x0000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x0000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x0000004 | |||||
#define LC_ARGUMENT 0x0000018 | |||||
#define LC_ARGUMENT_NAME 0x0000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x0000010 | |||||
#define LC_WIKILINK 0x0000060 | |||||
#define LC_WIKILINK_TITLE 0x0000020 | |||||
#define LC_WIKILINK_TEXT 0x0000040 | |||||
#define LC_HEADING 0x0001F80 | |||||
#define LC_HEADING_LEVEL_1 0x0000080 | |||||
#define LC_HEADING_LEVEL_2 0x0000100 | |||||
#define LC_HEADING_LEVEL_3 0x0000200 | |||||
#define LC_HEADING_LEVEL_4 0x0000400 | |||||
#define LC_HEADING_LEVEL_5 0x0000800 | |||||
#define LC_HEADING_LEVEL_6 0x0001000 | |||||
#define LC_TAG 0x001E000 | |||||
#define LC_TAG_OPEN 0x0002000 | |||||
#define LC_TAG_ATTR 0x0004000 | |||||
#define LC_TAG_BODY 0x0008000 | |||||
#define LC_TAG_CLOSE 0x0010000 | |||||
#define LC_STYLE 0x01E0000 | |||||
#define LC_STYLE_ITALICS 0x0020000 | |||||
#define LC_STYLE_BOLD 0x0040000 | |||||
#define LC_STYLE_PASS_AGAIN 0x0080000 | |||||
#define LC_STYLE_SECOND_PASS 0x0100000 | |||||
#define LC_DLTERM 0x0200000 | |||||
#define LC_SAFETY_CHECK 0xFC00000 | |||||
#define LC_HAS_TEXT 0x0400000 | |||||
#define LC_FAIL_ON_TEXT 0x0800000 | |||||
#define LC_FAIL_NEXT 0x1000000 | |||||
#define LC_FAIL_ON_LBRACE 0x2000000 | |||||
#define LC_FAIL_ON_RBRACE 0x4000000 | |||||
#define LC_FAIL_ON_EQUALS 0x8000000 | |||||
#define LC_TEMPLATE 0x00000007 | |||||
#define LC_TEMPLATE_NAME 0x00000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x00000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x00000004 | |||||
#define LC_ARGUMENT 0x00000018 | |||||
#define LC_ARGUMENT_NAME 0x00000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x00000010 | |||||
#define LC_WIKILINK 0x00000060 | |||||
#define LC_WIKILINK_TITLE 0x00000020 | |||||
#define LC_WIKILINK_TEXT 0x00000040 | |||||
#define LC_EXT_LINK 0x00000380 | |||||
#define LC_EXT_LINK_URL 0x00000080 | |||||
#define LC_EXT_LINK_TITLE 0x00000100 | |||||
#define LC_EXT_LINK_BRACKETS 0x00000200 | |||||
#define LC_HEADING 0x0000FC00 | |||||
#define LC_HEADING_LEVEL_1 0x00000400 | |||||
#define LC_HEADING_LEVEL_2 0x00000800 | |||||
#define LC_HEADING_LEVEL_3 0x00001000 | |||||
#define LC_HEADING_LEVEL_4 0x00002000 | |||||
#define LC_HEADING_LEVEL_5 0x00004000 | |||||
#define LC_HEADING_LEVEL_6 0x00008000 | |||||
#define LC_TAG 0x000F0000 | |||||
#define LC_TAG_OPEN 0x00010000 | |||||
#define LC_TAG_ATTR 0x00020000 | |||||
#define LC_TAG_BODY 0x00040000 | |||||
#define LC_TAG_CLOSE 0x00080000 | |||||
#define LC_STYLE 0x00F00000 | |||||
#define LC_STYLE_ITALICS 0x00100000 | |||||
#define LC_STYLE_BOLD 0x00200000 | |||||
#define LC_STYLE_PASS_AGAIN 0x00400000 | |||||
#define LC_STYLE_SECOND_PASS 0x00800000 | |||||
#define LC_DLTERM 0x01000000 | |||||
#define LC_SAFETY_CHECK 0x7E000000 | |||||
#define LC_HAS_TEXT 0x02000000 | |||||
#define LC_FAIL_ON_TEXT 0x04000000 | |||||
#define LC_FAIL_NEXT 0x08000000 | |||||
#define LC_FAIL_ON_LBRACE 0x10000000 | |||||
#define LC_FAIL_ON_RBRACE 0x20000000 | |||||
#define LC_FAIL_ON_EQUALS 0x40000000 | |||||
/* Global contexts: */ | /* Global contexts: */ | ||||
@@ -153,9 +162,10 @@ static PyObject* TagCloseClose; | |||||
/* Aggregate contexts: */ | /* Aggregate contexts: */ | ||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||||
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URL) | |||||
/* Tag contexts: */ | /* Tag contexts: */ | ||||
@@ -311,6 +311,11 @@ class Tokenizer(object): | |||||
self._head += 1 | self._head += 1 | ||||
return self._pop() | return self._pop() | ||||
def _parse_external_link(self, brackets): | |||||
"""Parse an external link at the head of the wikicode string.""" | |||||
self._emit_text(self._read()) | |||||
# raise NotImplementedError() | |||||
def _parse_heading(self): | def _parse_heading(self): | ||||
"""Parse a section heading at the head of the wikicode string.""" | """Parse a section heading at the head of the wikicode string.""" | ||||
self._global |= contexts.GL_HEADING | self._global |= contexts.GL_HEADING | ||||
@@ -898,8 +903,8 @@ class Tokenizer(object): | |||||
return self._handle_argument_end() | return self._handle_argument_end() | ||||
else: | else: | ||||
self._emit_text("}") | self._emit_text("}") | ||||
elif this == next == "[": | |||||
if not self._context & contexts.WIKILINK_TITLE and self._can_recurse(): | |||||
elif this == next == "[" and self._can_recurse(): | |||||
if not self._context & contexts.INVALID_LINK: | |||||
self._parse_wikilink() | self._parse_wikilink() | ||||
else: | else: | ||||
self._emit_text("[") | self._emit_text("[") | ||||
@@ -907,6 +912,11 @@ class Tokenizer(object): | |||||
self._handle_wikilink_separator() | self._handle_wikilink_separator() | ||||
elif this == next == "]" and self._context & contexts.WIKILINK: | elif this == next == "]" and self._context & contexts.WIKILINK: | ||||
return self._handle_wikilink_end() | return self._handle_wikilink_end() | ||||
elif this == "[" and not self._context & contexts.INVALID_LINK: ## or this == ":" | |||||
if self._can_recurse(): | |||||
self._parse_external_link(brackets=this == "[") | |||||
else: | |||||
self._emit_text("[") | |||||
elif this == "=" and not self._global & contexts.GL_HEADING: | elif this == "=" and not self._global & contexts.GL_HEADING: | ||||
if self._read(-1) in ("\n", self.START): | if self._read(-1) in ("\n", self.START): | ||||
self._parse_heading() | self._parse_heading() | ||||
@@ -928,8 +938,8 @@ class Tokenizer(object): | |||||
self._handle_tag_open_close() | self._handle_tag_open_close() | ||||
else: | else: | ||||
self._handle_invalid_tag_start() | self._handle_invalid_tag_start() | ||||
elif this == "<": | |||||
if not self._context & contexts.TAG_CLOSE and self._can_recurse(): | |||||
elif this == "<" and not self._context & contexts.TAG_CLOSE: | |||||
if self._can_recurse(): | |||||
self._parse_tag() | self._parse_tag() | ||||
else: | else: | ||||
self._emit_text("<") | self._emit_text("<") | ||||
@@ -12,6 +12,13 @@ output: [TemplateOpen(), ArgumentOpen(), ArgumentOpen(), Text(text="foo"), Argum | |||||
--- | --- | ||||
name: link_in_template_name | |||||
label: a wikilink inside a template name, which breaks the template | |||||
input: "{{foo[[bar]]}}" | |||||
output: [Text(text="{{foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="}}")] | |||||
--- | |||||
name: rich_heading | name: rich_heading | ||||
label: a heading with templates/wikilinks in it | label: a heading with templates/wikilinks in it | ||||
input: "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} ==" | input: "== Head{{ing}} [[with]] {{{funky|{{stuf}}}}} ==" | ||||