Browse Source

Implement improved wikilink handling.

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
d42e05a554
4 changed files with 22 additions and 17 deletions
  1. +6
    -6
      mwparserfromhell/parser/contexts.py
  2. +6
    -7
      mwparserfromhell/parser/tokenizer.c
  3. +2
    -2
      mwparserfromhell/parser/tokenizer.h
  4. +8
    -2
      mwparserfromhell/parser/tokenizer.py

+ 6
- 6
mwparserfromhell/parser/contexts.py View File

@@ -53,7 +53,7 @@ Local (stack-specific) contexts:

* :py:const:`EXT_LINK`

* :py:const:`EXT_LINK_URL`
* :py:const:`EXT_LINK_URI`
* :py:const:`EXT_LINK_TITLE`
* :py:const:`EXT_LINK_BRACKETS`

@@ -119,10 +119,10 @@ WIKILINK_TITLE = 1 << 5
WIKILINK_TEXT = 1 << 6
WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT

EXT_LINK_URL = 1 << 7
EXT_LINK_URI = 1 << 7
EXT_LINK_TITLE = 1 << 8
EXT_LINK_BRACKETS = 1 << 9
EXT_LINK = EXT_LINK_URL + EXT_LINK_TITLE + EXT_LINK_BRACKETS
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS

HEADING_LEVEL_1 = 1 << 10
HEADING_LEVEL_2 = 1 << 11
@@ -163,7 +163,7 @@ GL_HEADING = 1 << 0
# Aggregate contexts:

FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK + HEADING + TAG + STYLE
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + TEMPLATE_PARAM_KEY + ARGUMENT_NAME +
TAG_CLOSE)
UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY +
ARGUMENT_NAME + TAG_CLOSE)
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE
INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URL
INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK

+ 6
- 7
mwparserfromhell/parser/tokenizer.c View File

@@ -2050,18 +2050,17 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
if (context & LC_FAIL_NEXT) {
return -1;
}
if (context & LC_WIKILINK_TITLE) {
if (data == *"]" || data == *"{")
if (context & LC_WIKILINK) {
if (context & LC_WIKILINK_TEXT)
return (data == *"[" && Tokenizer_READ(self, 1) == *"[") ? -1 : 0;
else if (data == *"]" || data == *"{")
self->topstack->context |= LC_FAIL_NEXT;
else if (data == *"\n" || data == *"[" || data == *"}")
return -1;
return 0;
}
if (context & LC_TAG_CLOSE) {
if (data == *"<")
return -1;
return 0;
}
if (context & LC_TAG_CLOSE)
return (data == *"<") ? -1 : 0;
if (context & LC_TEMPLATE_NAME) {
if (data == *"{" || data == *"}" || data == *"[") {
self->topstack->context |= LC_FAIL_NEXT;


+ 2
- 2
mwparserfromhell/parser/tokenizer.h View File

@@ -163,9 +163,9 @@ static PyObject* TagCloseClose;
/* Aggregate contexts: */

#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_HEADING | LC_TAG | LC_STYLE)
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE)
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URL)
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK_URL)

/* Tag contexts: */



+ 8
- 2
mwparserfromhell/parser/tokenizer.py View File

@@ -835,12 +835,16 @@ class Tokenizer(object):
context = self._context
if context & contexts.FAIL_NEXT:
return False
if context & contexts.WIKILINK_TITLE:
if this == "]" or this == "{":
if context & contexts.WIKILINK:
if context & contexts.WIKILINK_TEXT:
return not (this == self._read(1) == "[")
elif this == "]" or this == "{":
self._context |= contexts.FAIL_NEXT
elif this == "\n" or this == "[" or this == "}":
return False
return True
elif context & contexts.EXT_LINK_TITLE:
return this != "\n"
elif context & contexts.TEMPLATE_NAME:
if this == "{" or this == "}" or this == "[":
self._context |= contexts.FAIL_NEXT
@@ -936,6 +940,8 @@ class Tokenizer(object):
self._parse_external_link(True)
elif this == ":" and self._read(-1) not in self.MARKERS:
self._parse_external_link(False)
elif this == "]" and self._context & contexts.EXT_LINK_TITLE:
return self._pop()
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()


Loading…
Cancel
Save