Ver a proveniência

Correctly handle no table endings

Tests were not correctly testing the situations without a table close.
Fixed tests and then fixed tokenizers for failing tests. Also refactored
pytokenizer to more closely match the ctokenizer by only holding the
`_parse` methods in the try blocks and no other code.
tags/v0.4
David Winegar há 10 anos
ascendente
cometimento
1a4c88e11f
3 ficheiros alterados com 110 adições e 37 eliminações
  1. +23
    -5
      mwparserfromhell/parser/tokenizer.c
  2. +45
    -25
      mwparserfromhell/parser/tokenizer.py
  3. +42
    -7
      tests/tokenizer/tables.mwtest

+ 23
- 5
mwparserfromhell/parser/tokenizer.c Ver ficheiro

@@ -2636,8 +2636,9 @@ static int Tokenizer_handle_table_start(Tokenizer* self)
self->head++;
table = Tokenizer_parse(self, LC_TABLE_OPEN, 1);
if (BAD_ROUTE) {
Py_DECREF(padding);
Py_DECREF(style);
RESET_ROUTE();
// offset displacement done by parse()
self->head = reset;
if (Tokenizer_emit_text(self, "{|"))
return -1;
@@ -2676,7 +2677,7 @@ static PyObject * Tokenizer_handle_table_end(Tokenizer* self)
static int Tokenizer_handle_table_row(Tokenizer* self)
{
Py_ssize_t reset = self->head;
PyObject *padding, *style, *row;
PyObject *padding, *style, *row, *trash;
self->head += 2;

if (!Tokenizer_CAN_RECURSE(self)) {
@@ -2690,6 +2691,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
return -1;
padding = Tokenizer_parse_as_table_style(self, '\n', 0);
if (BAD_ROUTE) {
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
self->head = reset;
return 0;
}
@@ -2704,6 +2707,8 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
self->head++;
row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1);
if (BAD_ROUTE) {
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
Py_DECREF(padding);
Py_DECREF(style);
self->head = reset;
@@ -2712,7 +2717,6 @@ static int Tokenizer_handle_table_row(Tokenizer* self)
if (!row) {
Py_DECREF(padding);
Py_DECREF(style);
Py_DECREF(row);
return -1;
}

@@ -2741,7 +2745,7 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
uint64_t old_context = self->topstack->context;
uint64_t cell_context;
Py_ssize_t reset = self->head;
PyObject *padding, *cell;
PyObject *padding, *cell, *trash;
PyObject *style = NULL;
const char *close_open_markup = NULL;
self->head += strlen(markup);
@@ -2755,6 +2759,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,

cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | LC_TABLE_CELL_STYLE | line_context, 1);
if (BAD_ROUTE) {
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
self->head = reset;
return 0;
}
@@ -2770,6 +2776,8 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
return -1;
padding = Tokenizer_parse_as_table_style(self, '|', 0);
if (BAD_ROUTE) {
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
self->head = reset;
return 0;
}
@@ -2784,11 +2792,18 @@ static int Tokenizer_handle_table_cell(Tokenizer* self, const char *markup,
self->head++;
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | line_context, 1);
if (BAD_ROUTE) {
Py_DECREF(padding);
Py_DECREF(style);
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
self->head = reset;
return 0;
}
if (!cell)
if (!cell) {
Py_DECREF(padding);
Py_DECREF(style);
return -1;
}
cell_context = self->topstack->context;
self->topstack->context = old_context;
}
@@ -3148,6 +3163,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
}
else if (Tokenizer_emit_char(self, this))
return NULL;
// Raise BadRoute to table start
if (BAD_ROUTE)
return NULL;
}
else if (Tokenizer_emit_char(self, this))
return NULL;


+ 45
- 25
mwparserfromhell/parser/tokenizer.py Ver ficheiro

@@ -1053,24 +1053,30 @@ class Tokenizer(object):
reset = self._head + 1
style, table = None, None
self._head += 2

self._push(contexts.TABLE_OPEN)
try:
self._push(contexts.TABLE_OPEN)
padding = self._parse_as_table_style("\n", break_on_table_end=True)
style = self._pop()
# continue to parse if it is NOT an inline table
if "\n" in padding:
self._head += 1
table = self._parse(contexts.TABLE_OPEN)
else:
# close tag
self._head += 2
except BadRoute:
# offset displacement done by _parse()
self._head = reset
self._emit_text("{|")
return
style = self._pop()
# continue to parse if it is NOT an inline table
if "\n" in padding:
self._head += 1
try:
table = self._parse(contexts.TABLE_OPEN)
except BadRoute:
self._head = reset
self._emit_text("{|")
return
else:
self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
self._head -= 1
# close tag
self._head += 2
self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
# offset displacement done by _parse()
self._head -= 1

def _handle_table_end(self):
"""Return the stack in order to handle the table end."""
@@ -1087,15 +1093,21 @@ class Tokenizer(object):
self._head -= 1
return

self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
try:
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
padding = self._parse_as_table_style("\n")
style = self._pop()
# don't parse the style separator
self._head += 1
except BadRoute:
self._head = reset
self._pop()
raise
style = self._pop()
# don't parse the style separator
self._head += 1
try:
row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN)
except BadRoute:
self._head = reset
self._pop()
raise
self._emit_table_tag("|-", "tr", style, padding, None, row, "")
# offset displacement done by parse()
@@ -1119,26 +1131,34 @@ class Tokenizer(object):

try:
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | contexts.TABLE_CELL_STYLE)
cell_context = self._context
self._context = old_context
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
except BadRoute:
self._head = reset
self._pop()
raise
cell_context = self._context
self._context = old_context
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE
if reset_for_style:
self._head = reset + len(markup)
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
try:
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
padding = self._parse_as_table_style("|")
style = self._pop()
# Don't parse the style separator
self._head += 1
except BadRoute:
self._head = reset
self._pop()
raise
style = self._pop()
# Don't parse the style separator
self._head += 1
try:
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context)
cell_context = self._context
self._context = old_context
except BadRoute:
self._head = reset
ret = self._pop()
raise
cell_context = self._context
self._context = old_context

close_open_markup = "|" if reset_for_style else None
self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "")
# keep header/cell line contexts


+ 42
- 7
tests/tokenizer/tables.mwtest Ver ficheiro

@@ -13,23 +13,51 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding
---

name: no_table_close_simple
label: Handle case when there is no table close.
label: No table close on inline table
input: "{| "
output: [Text(text="{| ")]

---

name: no_table_close_newline
label: No table close with a newline
input: "{| \n "
output: [Text(text="{| \n ")]

---

name: no_table_close_inside_cell
label: Handle case when there is no table close while inside of a cell.
input: "{| | "
output: [Text(text="{| | ")]
label: No table close while inside of a cell
input: "{| \n| "
output: [Text(text="{| \n| ")]

---

name: no_table_close_inside_cell_after_newline
label: No table close while inside of a cell after a newline
input: "{| \n| \n "
output: [Text(text="{| \n| \n ")]

---

name: no_table_close_inside_cell_with_attributes
label: No table close while inside of a cell with attributes
input: "{| \n| red | test"
output: [Text(text="{| \n| red | test")]

---

name: no_table_close_inside_row
label: Handle case when there is no table close while inside of a row.
input: "{| |- "
output: [Text(text="{| |- ")]
label: No table close while inside of a row
input: "{| \n|- "
output: [Text(text="{| \n|- ")]

---

name: no_table_close_inside_row_after_newline
label: No table close while inside of a row after a newline
input: "{| \n|- \n "
output: [Text(text="{| \n|- \n ")]

---

@@ -40,6 +68,13 @@ output: [Text(text="{| border=\"1\"")]

---

name: no_table_close_unclosed_attributes
label: Don't parse unclosed attributes if the table doesn't exist.
input: "{| border="
output: [Text(text="{| border=")]

---

name: no_table_close_row_attributes
label: Don't parse row attributes as attributes if the table doesn't exist.
input: "{| |- border="1""


Carregando…
Cancelar
Guardar