@@ -8,6 +8,9 @@ v0.4.1 (unreleased): | |||||
includes when denoting tags, but not comments. | includes when denoting tags, but not comments. | ||||
- Fixed the behavior of preserve_spacing in Template.add() and keep_field in | - Fixed the behavior of preserve_spacing in Template.add() and keep_field in | ||||
Template.remove() on parameters with hidden keys. | Template.remove() on parameters with hidden keys. | ||||
- Fixed parser bugs involving: | |||||
- templates with completely blank names; | |||||
- templates with newlines and comments. | |||||
- Fixed some bugs in the release scripts. | - Fixed some bugs in the release scripts. | ||||
v0.4 (released May 23, 2015): | v0.4 (released May 23, 2015): | ||||
@@ -15,6 +15,11 @@ Unreleased | |||||
This includes when denoting tags, but not comments. | This includes when denoting tags, but not comments. | ||||
- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and | - Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and | ||||
*keep_field* in :func:`~.Template.remove` on parameters with hidden keys. | *keep_field* in :func:`~.Template.remove` on parameters with hidden keys. | ||||
- Fixed parser bugs involving: | |||||
- templates with completely blank names; | |||||
- templates with newlines and comments. | |||||
- Fixed some bugs in the release scripts. | - Fixed some bugs in the release scripts. | ||||
v0.4 | v0.4 | ||||
@@ -89,6 +89,7 @@ Local (stack-specific) contexts: | |||||
* :const:`FAIL_ON_LBRACE` | * :const:`FAIL_ON_LBRACE` | ||||
* :const:`FAIL_ON_RBRACE` | * :const:`FAIL_ON_RBRACE` | ||||
* :const:`FAIL_ON_EQUALS` | * :const:`FAIL_ON_EQUALS` | ||||
* :const:`HAS_TEMPLATE` | |||||
* :const:`TABLE` | * :const:`TABLE` | ||||
@@ -161,15 +162,16 @@ FAIL_NEXT = 1 << 26 | |||||
FAIL_ON_LBRACE = 1 << 27 | FAIL_ON_LBRACE = 1 << 27 | ||||
FAIL_ON_RBRACE = 1 << 28 | FAIL_ON_RBRACE = 1 << 28 | ||||
FAIL_ON_EQUALS = 1 << 29 | FAIL_ON_EQUALS = 1 << 29 | ||||
HAS_TEMPLATE = 1 << 30 | |||||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | ||||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | |||||
TABLE_OPEN = 1 << 30 | |||||
TABLE_CELL_OPEN = 1 << 31 | |||||
TABLE_CELL_STYLE = 1 << 32 | |||||
TABLE_ROW_OPEN = 1 << 33 | |||||
TABLE_TD_LINE = 1 << 34 | |||||
TABLE_TH_LINE = 1 << 35 | |||||
FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE) | |||||
TABLE_OPEN = 1 << 31 | |||||
TABLE_CELL_OPEN = 1 << 32 | |||||
TABLE_CELL_STYLE = 1 << 33 | |||||
TABLE_ROW_OPEN = 1 << 34 | |||||
TABLE_TD_LINE = 1 << 35 | |||||
TABLE_TH_LINE = 1 << 36 | |||||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | ||||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | ||||
TABLE_TD_LINE + TABLE_TH_LINE) | TABLE_TD_LINE + TABLE_TH_LINE) | ||||
@@ -582,12 +582,16 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||||
/* | /* | ||||
Parse a template at the head of the wikicode string. | Parse a template at the head of the wikicode string. | ||||
*/ | */ | ||||
static int Tokenizer_parse_template(Tokenizer* self) | |||||
static int Tokenizer_parse_template(Tokenizer* self, int has_content) | |||||
{ | { | ||||
PyObject *template; | PyObject *template; | ||||
Py_ssize_t reset = self->head; | Py_ssize_t reset = self->head; | ||||
uint64_t context = LC_TEMPLATE_NAME; | |||||
template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); | |||||
if (has_content) | |||||
context |= LC_HAS_TEMPLATE; | |||||
template = Tokenizer_parse(self, context, 1); | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
self->head = reset; | self->head = reset; | ||||
return 0; | return 0; | ||||
@@ -643,6 +647,7 @@ static int Tokenizer_parse_argument(Tokenizer* self) | |||||
static int Tokenizer_parse_template_or_argument(Tokenizer* self) | static int Tokenizer_parse_template_or_argument(Tokenizer* self) | ||||
{ | { | ||||
unsigned int braces = 2, i; | unsigned int braces = 2, i; | ||||
int has_content = 0; | |||||
PyObject *tokenlist; | PyObject *tokenlist; | ||||
self->head += 2; | self->head += 2; | ||||
@@ -659,7 +664,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
return 0; | return 0; | ||||
} | } | ||||
if (braces == 2) { | if (braces == 2) { | ||||
if (Tokenizer_parse_template(self)) | |||||
if (Tokenizer_parse_template(self, has_content)) | |||||
return -1; | return -1; | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
@@ -673,7 +678,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
return -1; | return -1; | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
if (Tokenizer_parse_template(self)) | |||||
if (Tokenizer_parse_template(self, has_content)) | |||||
return -1; | return -1; | ||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
char text[MAX_BRACES + 1]; | char text[MAX_BRACES + 1]; | ||||
@@ -689,8 +694,10 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
} | } | ||||
else | else | ||||
braces -= 3; | braces -= 3; | ||||
if (braces) | |||||
if (braces) { | |||||
has_content = 1; | |||||
self->head++; | self->head++; | ||||
} | |||||
} | } | ||||
tokenlist = Tokenizer_pop(self); | tokenlist = Tokenizer_pop(self); | ||||
if (!tokenlist) | if (!tokenlist) | ||||
@@ -712,8 +719,13 @@ static int Tokenizer_handle_template_param(Tokenizer* self) | |||||
{ | { | ||||
PyObject *stack; | PyObject *stack; | ||||
if (self->topstack->context & LC_TEMPLATE_NAME) | |||||
if (self->topstack->context & LC_TEMPLATE_NAME) { | |||||
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { | |||||
Tokenizer_fail_route(self); | |||||
return -1; | |||||
} | |||||
self->topstack->context ^= LC_TEMPLATE_NAME; | self->topstack->context ^= LC_TEMPLATE_NAME; | ||||
} | |||||
else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) | else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) | ||||
self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; | self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; | ||||
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | ||||
@@ -764,7 +776,11 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) | |||||
{ | { | ||||
PyObject* stack; | PyObject* stack; | ||||
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||||
if (self->topstack->context & LC_TEMPLATE_NAME) { | |||||
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) | |||||
return Tokenizer_fail_route(self); | |||||
} | |||||
else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||||
stack = Tokenizer_pop_keeping_context(self); | stack = Tokenizer_pop_keeping_context(self); | ||||
if (!stack) | if (!stack) | ||||
return NULL; | return NULL; | ||||
@@ -2885,30 +2901,26 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data) | |||||
if (context & LC_TAG_CLOSE) | if (context & LC_TAG_CLOSE) | ||||
return (data == '<') ? -1 : 0; | return (data == '<') ? -1 : 0; | ||||
if (context & LC_TEMPLATE_NAME) { | if (context & LC_TEMPLATE_NAME) { | ||||
if (data == '{' || data == '}' || data == '[') { | |||||
if (data == '{') { | |||||
self->topstack->context |= LC_HAS_TEMPLATE | LC_FAIL_NEXT; | |||||
return 0; | |||||
} | |||||
if (data == '}' || (data == '<' && Tokenizer_READ(self, 1) == '!')) { | |||||
self->topstack->context |= LC_FAIL_NEXT; | self->topstack->context |= LC_FAIL_NEXT; | ||||
return 0; | return 0; | ||||
} | } | ||||
if (data == ']' || data == '>' || (data == '<' && | |||||
Tokenizer_READ(self, 1) != '!')) { | |||||
if (data == '[' || data == ']' || data == '<' || data == '>') { | |||||
return -1; | return -1; | ||||
} | } | ||||
if (data == '|') | if (data == '|') | ||||
return 0; | return 0; | ||||
if (context & LC_HAS_TEXT) { | if (context & LC_HAS_TEXT) { | ||||
if (context & LC_FAIL_ON_TEXT) { | if (context & LC_FAIL_ON_TEXT) { | ||||
if (!Py_UNICODE_ISSPACE(data)) { | |||||
if (data == '<' && Tokenizer_READ(self, 1) == '!') { | |||||
self->topstack->context |= LC_FAIL_NEXT; | |||||
return 0; | |||||
} | |||||
if (!Py_UNICODE_ISSPACE(data)) | |||||
return -1; | return -1; | ||||
} | |||||
} | |||||
else { | |||||
if (data == '\n') | |||||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||||
} | } | ||||
else if (data == '\n') | |||||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||||
} | } | ||||
else if (!Py_UNICODE_ISSPACE(data)) | else if (!Py_UNICODE_ISSPACE(data)) | ||||
self->topstack->context |= LC_HAS_TEXT; | self->topstack->context |= LC_HAS_TEXT; | ||||
@@ -150,22 +150,23 @@ static PyObject* TagCloseClose; | |||||
#define LC_DLTERM 0x0000000000800000 | #define LC_DLTERM 0x0000000000800000 | ||||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||||
#define LC_SAFETY_CHECK 0x000000007F000000 | |||||
#define LC_HAS_TEXT 0x0000000001000000 | #define LC_HAS_TEXT 0x0000000001000000 | ||||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | #define LC_FAIL_ON_TEXT 0x0000000002000000 | ||||
#define LC_FAIL_NEXT 0x0000000004000000 | #define LC_FAIL_NEXT 0x0000000004000000 | ||||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | #define LC_FAIL_ON_LBRACE 0x0000000008000000 | ||||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | #define LC_FAIL_ON_RBRACE 0x0000000010000000 | ||||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | #define LC_FAIL_ON_EQUALS 0x0000000020000000 | ||||
#define LC_TABLE 0x0000000FC0000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||||
#define LC_TABLE_OPEN 0x0000000040000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||||
#define LC_HAS_TEMPLATE 0x0000000040000000 | |||||
#define LC_TABLE 0x0000001F80000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 | |||||
#define LC_TABLE_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000100000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000200000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000400000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000800000000 | |||||
#define LC_TABLE_TH_LINE 0x0000001000000000 | |||||
/* Global contexts: */ | /* Global contexts: */ | ||||
@@ -192,11 +192,14 @@ class Tokenizer(object): | |||||
self._fail_route() | self._fail_route() | ||||
return self.END | return self.END | ||||
def _parse_template(self): | |||||
def _parse_template(self, has_content): | |||||
"""Parse a template at the head of the wikicode string.""" | """Parse a template at the head of the wikicode string.""" | ||||
reset = self._head | reset = self._head | ||||
context = contexts.TEMPLATE_NAME | |||||
if has_content: | |||||
context |= contexts.HAS_TEMPLATE | |||||
try: | try: | ||||
template = self._parse(contexts.TEMPLATE_NAME) | |||||
template = self._parse(context) | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
raise | raise | ||||
@@ -223,6 +226,7 @@ class Tokenizer(object): | |||||
while self._read() == "{": | while self._read() == "{": | ||||
self._head += 1 | self._head += 1 | ||||
braces += 1 | braces += 1 | ||||
has_content = False | |||||
self._push() | self._push() | ||||
while braces: | while braces: | ||||
@@ -230,7 +234,7 @@ class Tokenizer(object): | |||||
return self._emit_text_then_stack("{") | return self._emit_text_then_stack("{") | ||||
if braces == 2: | if braces == 2: | ||||
try: | try: | ||||
self._parse_template() | |||||
self._parse_template(has_content) | |||||
except BadRoute: | except BadRoute: | ||||
return self._emit_text_then_stack("{{") | return self._emit_text_then_stack("{{") | ||||
break | break | ||||
@@ -239,11 +243,12 @@ class Tokenizer(object): | |||||
braces -= 3 | braces -= 3 | ||||
except BadRoute: | except BadRoute: | ||||
try: | try: | ||||
self._parse_template() | |||||
self._parse_template(has_content) | |||||
braces -= 2 | braces -= 2 | ||||
except BadRoute: | except BadRoute: | ||||
return self._emit_text_then_stack("{" * braces) | return self._emit_text_then_stack("{" * braces) | ||||
if braces: | if braces: | ||||
has_content = True | |||||
self._head += 1 | self._head += 1 | ||||
self._emit_all(self._pop()) | self._emit_all(self._pop()) | ||||
@@ -253,6 +258,8 @@ class Tokenizer(object): | |||||
def _handle_template_param(self): | def _handle_template_param(self): | ||||
"""Handle a template parameter at the head of the string.""" | """Handle a template parameter at the head of the string.""" | ||||
if self._context & contexts.TEMPLATE_NAME: | if self._context & contexts.TEMPLATE_NAME: | ||||
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): | |||||
self._fail_route() | |||||
self._context ^= contexts.TEMPLATE_NAME | self._context ^= contexts.TEMPLATE_NAME | ||||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | elif self._context & contexts.TEMPLATE_PARAM_VALUE: | ||||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | self._context ^= contexts.TEMPLATE_PARAM_VALUE | ||||
@@ -271,7 +278,10 @@ class Tokenizer(object): | |||||
def _handle_template_end(self): | def _handle_template_end(self): | ||||
"""Handle the end of a template at the head of the string.""" | """Handle the end of a template at the head of the string.""" | ||||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
if self._context & contexts.TEMPLATE_NAME: | |||||
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): | |||||
self._fail_route() | |||||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
self._emit_all(self._pop(keep_context=True)) | self._emit_all(self._pop(keep_context=True)) | ||||
self._head += 1 | self._head += 1 | ||||
return self._pop() | return self._pop() | ||||
@@ -1183,23 +1193,22 @@ class Tokenizer(object): | |||||
elif context & contexts.EXT_LINK_TITLE: | elif context & contexts.EXT_LINK_TITLE: | ||||
return this != "\n" | return this != "\n" | ||||
elif context & contexts.TEMPLATE_NAME: | elif context & contexts.TEMPLATE_NAME: | ||||
if this == "{" or this == "}" or this == "[": | |||||
if this == "{": | |||||
self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT | |||||
return True | |||||
if this == "}" or (this == "<" and self._read(1) == "!"): | |||||
self._context |= contexts.FAIL_NEXT | self._context |= contexts.FAIL_NEXT | ||||
return True | return True | ||||
if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"): | |||||
if this == "[" or this == "]" or this == "<" or this == ">": | |||||
return False | return False | ||||
if this == "|": | if this == "|": | ||||
return True | return True | ||||
if context & contexts.HAS_TEXT: | if context & contexts.HAS_TEXT: | ||||
if context & contexts.FAIL_ON_TEXT: | if context & contexts.FAIL_ON_TEXT: | ||||
if this is self.END or not this.isspace(): | if this is self.END or not this.isspace(): | ||||
if this == "<" and self._read(1) == "!": | |||||
self._context |= contexts.FAIL_NEXT | |||||
return True | |||||
return False | return False | ||||
else: | |||||
if this == "\n": | |||||
self._context |= contexts.FAIL_ON_TEXT | |||||
elif this == "\n": | |||||
self._context |= contexts.FAIL_ON_TEXT | |||||
elif this is self.END or not this.isspace(): | elif this is self.END or not this.isspace(): | ||||
self._context |= contexts.HAS_TEXT | self._context |= contexts.HAS_TEXT | ||||
return True | return True | ||||
@@ -686,5 +686,5 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ | |||||
name: recursion_opens_and_closes | name: recursion_opens_and_closes | ||||
label: test potentially dangerous recursion: template openings and closings | label: test potentially dangerous recursion: template openings and closings | ||||
input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" | |||||
output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] | |||||
input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}" | |||||
output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")] |