@@ -1,6 +1,8 @@ | |||||
v0.5 (unreleased): | v0.5 (unreleased): | ||||
- | |||||
- Fixed parsing bugs involving: | |||||
- wikitables nested in templates; | |||||
- wikitable error recovery when unable to recurse. | |||||
v0.4.3 (released October 29, 2015): | v0.4.3 (released October 29, 2015): | ||||
@@ -7,7 +7,10 @@ v0.5 | |||||
Unreleased | Unreleased | ||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.3...develop>`__): | (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.3...develop>`__): | ||||
- | |||||
- Fixed parsing bugs involving: | |||||
- wikitables nested in templates; | |||||
- wikitable error recovery when unable to recurse. | |||||
v0.4.3 | v0.4.3 | ||||
------ | ------ | ||||
@@ -2190,7 +2190,7 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token | |||||
*/ | */ | ||||
static int Tokenizer_parse_table(Tokenizer* self) | static int Tokenizer_parse_table(Tokenizer* self) | ||||
{ | { | ||||
Py_ssize_t reset = self->head + 1; | |||||
Py_ssize_t reset = self->head; | |||||
PyObject *style, *padding; | PyObject *style, *padding; | ||||
PyObject *table = NULL; | PyObject *table = NULL; | ||||
self->head += 2; | self->head += 2; | ||||
@@ -2201,7 +2201,7 @@ static int Tokenizer_parse_table(Tokenizer* self) | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset; | self->head = reset; | ||||
if (Tokenizer_emit_text(self, "{|")) | |||||
if (Tokenizer_emit_char(self, '{')) | |||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -2220,7 +2220,7 @@ static int Tokenizer_parse_table(Tokenizer* self) | |||||
Py_DECREF(padding); | Py_DECREF(padding); | ||||
Py_DECREF(style); | Py_DECREF(style); | ||||
self->head = reset; | self->head = reset; | ||||
if (Tokenizer_emit_text(self, "{|")) | |||||
if (Tokenizer_emit_char(self, '{')) | |||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -2689,10 +2689,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||||
if (Tokenizer_parse_table(self)) | if (Tokenizer_parse_table(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next)) | |||||
else if (Tokenizer_emit_char(self, this)) | |||||
return NULL; | return NULL; | ||||
else | |||||
self->head++; | |||||
} | } | ||||
else if (this_context & LC_TABLE_OPEN) { | else if (this_context & LC_TABLE_OPEN) { | ||||
if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) { | if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) { | ||||
@@ -1074,14 +1074,14 @@ class Tokenizer(object): | |||||
def _parse_table(self): | def _parse_table(self): | ||||
"""Parse a wikicode table by starting with the first line.""" | """Parse a wikicode table by starting with the first line.""" | ||||
reset = self._head + 1 | |||||
reset = self._head | |||||
self._head += 2 | self._head += 2 | ||||
self._push(contexts.TABLE_OPEN) | self._push(contexts.TABLE_OPEN) | ||||
try: | try: | ||||
padding = self._handle_table_style("\n") | padding = self._handle_table_style("\n") | ||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
self._emit_text("{|") | |||||
self._emit_text("{") | |||||
return | return | ||||
style = self._pop() | style = self._pop() | ||||
@@ -1090,7 +1090,7 @@ class Tokenizer(object): | |||||
table = self._parse(contexts.TABLE_OPEN) | table = self._parse(contexts.TABLE_OPEN) | ||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
self._emit_text("{|") | |||||
self._emit_text("{") | |||||
return | return | ||||
self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | ||||
@@ -1352,7 +1352,7 @@ class Tokenizer(object): | |||||
if self._can_recurse(): | if self._can_recurse(): | ||||
self._parse_table() | self._parse_table() | ||||
else: | else: | ||||
self._emit_text("{|") | |||||
self._emit_text("{") | |||||
elif self._context & contexts.TABLE_OPEN: | elif self._context & contexts.TABLE_OPEN: | ||||
if this == next == "|" and self._context & contexts.TABLE_TD_LINE: | if this == next == "|" and self._context & contexts.TABLE_TD_LINE: | ||||
if self._context & contexts.TABLE_CELL_OPEN: | if self._context & contexts.TABLE_CELL_OPEN: | ||||
@@ -332,3 +332,17 @@ name: wikilink_to_external_link_fallback_2 | |||||
label: an external link enclosed in an extra pair of brackets (see issue #120) | label: an external link enclosed in an extra pair of brackets (see issue #120) | ||||
input: "[[http://example.com]]" | input: "[[http://example.com]]" | ||||
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")] | output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")] | ||||
--- | |||||
name: tables_in_templates | |||||
label: catch error handling mistakes when wikitables are inside templates | |||||
input: "{{hello|test\n{|\n|} }}" | |||||
output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" "), TemplateClose()] | |||||
--- | |||||
name: tables_in_templates_2 | |||||
label: catch error handling mistakes when wikitables are inside templates | |||||
input: "{{hello|test\n{|\n| }}" | |||||
output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n{"), TemplateParamSeparator(), Text(text="\n"), TemplateParamSeparator(), Text(text=" "), TemplateClose()] |