Browse Source

Fix two parser bugs involving wikitable error handling.

tags/v0.4.4
Ben Kurtovic 8 years ago
parent
commit
61b6b98470
5 changed files with 29 additions and 12 deletions
  1. +3
    -1
      CHANGELOG
  2. +4
    -1
      docs/changelog.rst
  3. +4
    -6
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  4. +4
    -4
      mwparserfromhell/parser/tokenizer.py
  5. +14
    -0
      tests/tokenizer/integration.mwtest

+ 3
- 1
CHANGELOG View File

@@ -1,6 +1,8 @@
v0.5 (unreleased):

-
- Fixed parsing bugs involving:
- wikitables nested in templates;
- wikitable error recovery when unable to recurse.

v0.4.3 (released October 29, 2015):



+ 4
- 1
docs/changelog.rst View File

@@ -7,7 +7,10 @@ v0.5
Unreleased
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.3...develop>`__):

-
- Fixed parsing bugs involving:

- wikitables nested in templates;
- wikitable error recovery when unable to recurse.

v0.4.3
------


+ 4
- 6
mwparserfromhell/parser/ctokenizer/tok_parse.c View File

@@ -2190,7 +2190,7 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token
*/
static int Tokenizer_parse_table(Tokenizer* self)
{
Py_ssize_t reset = self->head + 1;
Py_ssize_t reset = self->head;
PyObject *style, *padding;
PyObject *table = NULL;
self->head += 2;
@@ -2201,7 +2201,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_text(self, "{|"))
if (Tokenizer_emit_char(self, '{'))
return -1;
return 0;
}
@@ -2220,7 +2220,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
Py_DECREF(padding);
Py_DECREF(style);
self->head = reset;
if (Tokenizer_emit_text(self, "{|"))
if (Tokenizer_emit_char(self, '{'))
return -1;
return 0;
}
@@ -2689,10 +2689,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
if (Tokenizer_parse_table(self))
return NULL;
}
else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
else if (Tokenizer_emit_char(self, this))
return NULL;
else
self->head++;
}
else if (this_context & LC_TABLE_OPEN) {
if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) {


+ 4
- 4
mwparserfromhell/parser/tokenizer.py View File

@@ -1074,14 +1074,14 @@ class Tokenizer(object):

def _parse_table(self):
"""Parse a wikicode table by starting with the first line."""
reset = self._head + 1
reset = self._head
self._head += 2
self._push(contexts.TABLE_OPEN)
try:
padding = self._handle_table_style("\n")
except BadRoute:
self._head = reset
self._emit_text("{|")
self._emit_text("{")
return
style = self._pop()

@@ -1090,7 +1090,7 @@ class Tokenizer(object):
table = self._parse(contexts.TABLE_OPEN)
except BadRoute:
self._head = reset
self._emit_text("{|")
self._emit_text("{")
return

self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
@@ -1352,7 +1352,7 @@ class Tokenizer(object):
if self._can_recurse():
self._parse_table()
else:
self._emit_text("{|")
self._emit_text("{")
elif self._context & contexts.TABLE_OPEN:
if this == next == "|" and self._context & contexts.TABLE_TD_LINE:
if self._context & contexts.TABLE_CELL_OPEN:


+ 14
- 0
tests/tokenizer/integration.mwtest View File

@@ -332,3 +332,17 @@ name: wikilink_to_external_link_fallback_2
label: an external link enclosed in an extra pair of brackets (see issue #120)
input: "[[http://example.com]]"
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")]

---

name: tables_in_templates
label: catch error handling mistakes when wikitables are inside templates
input: "{{hello|test\n{|\n|} }}"
output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" "), TemplateClose()]

---

name: tables_in_templates_2
label: catch error handling mistakes when wikitables are inside templates
input: "{{hello|test\n{|\n| }}"
output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n{"), TemplateParamSeparator(), Text(text="\n"), TemplateParamSeparator(), Text(text=" "), TemplateClose()]

Loading…
Cancel
Save