@@ -8,6 +8,9 @@ v0.4.1 (unreleased): | |||
includes when denoting tags, but not comments. | |||
- Fixed the behavior of preserve_spacing in Template.add() and keep_field in | |||
Template.remove() on parameters with hidden keys. | |||
- Fixed parser bugs involving: | |||
- templates with completely blank names; | |||
- templates with newlines and comments. | |||
- Fixed some bugs in the release scripts. | |||
v0.4 (released May 23, 2015): | |||
@@ -15,6 +15,11 @@ Unreleased | |||
This includes when denoting tags, but not comments. | |||
- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and | |||
*keep_field* in :func:`~.Template.remove` on parameters with hidden keys. | |||
- Fixed parser bugs involving: | |||
- templates with completely blank names; | |||
- templates with newlines and comments. | |||
- Fixed some bugs in the release scripts. | |||
v0.4 | |||
@@ -89,6 +89,7 @@ Local (stack-specific) contexts: | |||
* :const:`FAIL_ON_LBRACE` | |||
* :const:`FAIL_ON_RBRACE` | |||
* :const:`FAIL_ON_EQUALS` | |||
* :const:`HAS_TEMPLATE` | |||
* :const:`TABLE` | |||
@@ -161,15 +162,16 @@ FAIL_NEXT = 1 << 26 | |||
FAIL_ON_LBRACE = 1 << 27 | |||
FAIL_ON_RBRACE = 1 << 28 | |||
FAIL_ON_EQUALS = 1 << 29 | |||
HAS_TEMPLATE = 1 << 30 | |||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | |||
TABLE_OPEN = 1 << 30 | |||
TABLE_CELL_OPEN = 1 << 31 | |||
TABLE_CELL_STYLE = 1 << 32 | |||
TABLE_ROW_OPEN = 1 << 33 | |||
TABLE_TD_LINE = 1 << 34 | |||
TABLE_TH_LINE = 1 << 35 | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE) | |||
TABLE_OPEN = 1 << 31 | |||
TABLE_CELL_OPEN = 1 << 32 | |||
TABLE_CELL_STYLE = 1 << 33 | |||
TABLE_ROW_OPEN = 1 << 34 | |||
TABLE_TD_LINE = 1 << 35 | |||
TABLE_TH_LINE = 1 << 36 | |||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | |||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | |||
TABLE_TD_LINE + TABLE_TH_LINE) | |||
@@ -582,12 +582,16 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||
/* | |||
Parse a template at the head of the wikicode string. | |||
*/ | |||
static int Tokenizer_parse_template(Tokenizer* self) | |||
static int Tokenizer_parse_template(Tokenizer* self, int has_content) | |||
{ | |||
PyObject *template; | |||
Py_ssize_t reset = self->head; | |||
uint64_t context = LC_TEMPLATE_NAME; | |||
template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); | |||
if (has_content) | |||
context |= LC_HAS_TEMPLATE; | |||
template = Tokenizer_parse(self, context, 1); | |||
if (BAD_ROUTE) { | |||
self->head = reset; | |||
return 0; | |||
@@ -643,6 +647,7 @@ static int Tokenizer_parse_argument(Tokenizer* self) | |||
static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
{ | |||
unsigned int braces = 2, i; | |||
int has_content = 0; | |||
PyObject *tokenlist; | |||
self->head += 2; | |||
@@ -659,7 +664,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
return 0; | |||
} | |||
if (braces == 2) { | |||
if (Tokenizer_parse_template(self)) | |||
if (Tokenizer_parse_template(self, has_content)) | |||
return -1; | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
@@ -673,7 +678,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
return -1; | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
if (Tokenizer_parse_template(self)) | |||
if (Tokenizer_parse_template(self, has_content)) | |||
return -1; | |||
if (BAD_ROUTE) { | |||
char text[MAX_BRACES + 1]; | |||
@@ -689,8 +694,10 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
} | |||
else | |||
braces -= 3; | |||
if (braces) | |||
if (braces) { | |||
has_content = 1; | |||
self->head++; | |||
} | |||
} | |||
tokenlist = Tokenizer_pop(self); | |||
if (!tokenlist) | |||
@@ -712,8 +719,13 @@ static int Tokenizer_handle_template_param(Tokenizer* self) | |||
{ | |||
PyObject *stack; | |||
if (self->topstack->context & LC_TEMPLATE_NAME) | |||
if (self->topstack->context & LC_TEMPLATE_NAME) { | |||
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { | |||
Tokenizer_fail_route(self); | |||
return -1; | |||
} | |||
self->topstack->context ^= LC_TEMPLATE_NAME; | |||
} | |||
else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) | |||
self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; | |||
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||
@@ -764,7 +776,11 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) | |||
{ | |||
PyObject* stack; | |||
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||
if (self->topstack->context & LC_TEMPLATE_NAME) { | |||
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) | |||
return Tokenizer_fail_route(self); | |||
} | |||
else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||
stack = Tokenizer_pop_keeping_context(self); | |||
if (!stack) | |||
return NULL; | |||
@@ -2885,30 +2901,26 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data) | |||
if (context & LC_TAG_CLOSE) | |||
return (data == '<') ? -1 : 0; | |||
if (context & LC_TEMPLATE_NAME) { | |||
if (data == '{' || data == '}' || data == '[') { | |||
if (data == '{') { | |||
self->topstack->context |= LC_HAS_TEMPLATE | LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (data == '}' || (data == '<' && Tokenizer_READ(self, 1) == '!')) { | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (data == ']' || data == '>' || (data == '<' && | |||
Tokenizer_READ(self, 1) != '!')) { | |||
if (data == '[' || data == ']' || data == '<' || data == '>') { | |||
return -1; | |||
} | |||
if (data == '|') | |||
return 0; | |||
if (context & LC_HAS_TEXT) { | |||
if (context & LC_FAIL_ON_TEXT) { | |||
if (!Py_UNICODE_ISSPACE(data)) { | |||
if (data == '<' && Tokenizer_READ(self, 1) == '!') { | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (!Py_UNICODE_ISSPACE(data)) | |||
return -1; | |||
} | |||
} | |||
else { | |||
if (data == '\n') | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
else if (data == '\n') | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
else if (!Py_UNICODE_ISSPACE(data)) | |||
self->topstack->context |= LC_HAS_TEXT; | |||
@@ -150,22 +150,23 @@ static PyObject* TagCloseClose; | |||
#define LC_DLTERM 0x0000000000800000 | |||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||
#define LC_SAFETY_CHECK 0x000000007F000000 | |||
#define LC_HAS_TEXT 0x0000000001000000 | |||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||
#define LC_FAIL_NEXT 0x0000000004000000 | |||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||
#define LC_TABLE 0x0000000FC0000000 | |||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||
#define LC_TABLE_OPEN 0x0000000040000000 | |||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||
#define LC_HAS_TEMPLATE 0x0000000040000000 | |||
#define LC_TABLE 0x0000001F80000000 | |||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 | |||
#define LC_TABLE_OPEN 0x0000000080000000 | |||
#define LC_TABLE_CELL_OPEN 0x0000000100000000 | |||
#define LC_TABLE_CELL_STYLE 0x0000000200000000 | |||
#define LC_TABLE_ROW_OPEN 0x0000000400000000 | |||
#define LC_TABLE_TD_LINE 0x0000000800000000 | |||
#define LC_TABLE_TH_LINE 0x0000001000000000 | |||
/* Global contexts: */ | |||
@@ -192,11 +192,14 @@ class Tokenizer(object): | |||
self._fail_route() | |||
return self.END | |||
def _parse_template(self): | |||
def _parse_template(self, has_content): | |||
"""Parse a template at the head of the wikicode string.""" | |||
reset = self._head | |||
context = contexts.TEMPLATE_NAME | |||
if has_content: | |||
context |= contexts.HAS_TEMPLATE | |||
try: | |||
template = self._parse(contexts.TEMPLATE_NAME) | |||
template = self._parse(context) | |||
except BadRoute: | |||
self._head = reset | |||
raise | |||
@@ -223,6 +226,7 @@ class Tokenizer(object): | |||
while self._read() == "{": | |||
self._head += 1 | |||
braces += 1 | |||
has_content = False | |||
self._push() | |||
while braces: | |||
@@ -230,7 +234,7 @@ class Tokenizer(object): | |||
return self._emit_text_then_stack("{") | |||
if braces == 2: | |||
try: | |||
self._parse_template() | |||
self._parse_template(has_content) | |||
except BadRoute: | |||
return self._emit_text_then_stack("{{") | |||
break | |||
@@ -239,11 +243,12 @@ class Tokenizer(object): | |||
braces -= 3 | |||
except BadRoute: | |||
try: | |||
self._parse_template() | |||
self._parse_template(has_content) | |||
braces -= 2 | |||
except BadRoute: | |||
return self._emit_text_then_stack("{" * braces) | |||
if braces: | |||
has_content = True | |||
self._head += 1 | |||
self._emit_all(self._pop()) | |||
@@ -253,6 +258,8 @@ class Tokenizer(object): | |||
def _handle_template_param(self): | |||
"""Handle a template parameter at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_NAME: | |||
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): | |||
self._fail_route() | |||
self._context ^= contexts.TEMPLATE_NAME | |||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||
@@ -271,7 +278,10 @@ class Tokenizer(object): | |||
def _handle_template_end(self): | |||
"""Handle the end of a template at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
if self._context & contexts.TEMPLATE_NAME: | |||
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): | |||
self._fail_route() | |||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._emit_all(self._pop(keep_context=True)) | |||
self._head += 1 | |||
return self._pop() | |||
@@ -1183,23 +1193,22 @@ class Tokenizer(object): | |||
elif context & contexts.EXT_LINK_TITLE: | |||
return this != "\n" | |||
elif context & contexts.TEMPLATE_NAME: | |||
if this == "{" or this == "}" or this == "[": | |||
if this == "{": | |||
self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT | |||
return True | |||
if this == "}" or (this == "<" and self._read(1) == "!"): | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"): | |||
if this == "[" or this == "]" or this == "<" or this == ">": | |||
return False | |||
if this == "|": | |||
return True | |||
if context & contexts.HAS_TEXT: | |||
if context & contexts.FAIL_ON_TEXT: | |||
if this is self.END or not this.isspace(): | |||
if this == "<" and self._read(1) == "!": | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
return False | |||
else: | |||
if this == "\n": | |||
self._context |= contexts.FAIL_ON_TEXT | |||
elif this == "\n": | |||
self._context |= contexts.FAIL_ON_TEXT | |||
elif this is self.END or not this.isspace(): | |||
self._context |= contexts.HAS_TEXT | |||
return True | |||
@@ -686,5 +686,5 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ | |||
name: recursion_opens_and_closes | |||
label: test potentially dangerous recursion: template openings and closings | |||
input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" | |||
output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] | |||
input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}" | |||
output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")] |