From f1664a8d67d7544d6524bd8de3ab3e554247bc2e Mon Sep 17 00:00:00 2001 From: David Winegar Date: Wed, 16 Jul 2014 10:00:58 -0700 Subject: [PATCH] Updated row and table handling Changed row recursion handling to make sure the tag is emitted even when hitting recursion limits. Need to test table recursion to make sure that works. Also fixed a bug in which tables were eating the trailing token. Added several tests for rows and trailing tokens with tables. --- mwparserfromhell/parser/tokenizer.py | 33 ++++++++++++++++----------------- tests/tokenizer/tables.mwtest | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 0829e7d..787ea0a 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1027,6 +1027,8 @@ class Tokenizer(object): self._emit(tokens.TagOpenClose(wiki_markup="|}")) self._emit_text("table") self._emit(tokens.TagCloseClose()) + # offset displacement done by _parse() + self._head -= 1 def _handle_table_end(self): """Return the stack in order to handle the table end.""" @@ -1035,25 +1037,22 @@ class Tokenizer(object): def _handle_table_row(self): """Parse as style until end of the line, then continue.""" - if not self._can_recurse(): - self._emit_text("|-") - self._head += 2 - return - reset = self._head self._head += 2 - try: - self._push(contexts.TABLE_OPEN) - (style, padding) = self._parse_as_table_style("\n") - except BadRoute: - self._head = reset - raise - else: - self._emit(tokens.TagOpenOpen(wiki_markup="|-")) - self._emit_text("tr") - if style: - self._emit_all(style) - self._emit(tokens.TagCloseSelfclose(padding=padding)) + style, padding = None, "" + # If we can't recurse, still tokenize tag but parse style attrs as text + if self._can_recurse(): + try: + self._push(contexts.TABLE_OPEN) + (style, padding) = self._parse_as_table_style("\n") + except BadRoute: + self._head = reset + raise + self._emit(tokens.TagOpenOpen(wiki_markup="|-")) + self._emit_text("tr") + if style: + self._emit_all(style) + self._emit(tokens.TagCloseSelfclose(padding=padding)) def _handle_table_cell(self, markup, tag, line_context): """Parse as normal syntax unless we hit a style marker, then parse style diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest index 7cf826c..2770227 100644 --- a/tests/tokenizer/tables.mwtest +++ b/tests/tokenizer/tables.mwtest @@ -26,6 +26,13 @@ output: [Text(text="{| | ")] --- +name: no_table_close_inside_row +label: Handle case when there is no table close while inside of a row. +input: "{| |- " +output: [Text(text="{| |- ")] + +--- + name: leading_whitespace_table label: Handle leading whitespace for a table. input: "foo \n \t {|\n|}" @@ -33,6 +40,27 @@ output: [Text(text="foo \n \t "), TagOpenOpen(wiki_markup="{|"), Text(text="t --- +name: whitespace_after_table +label: Handle whitespace after a table close. +input: "{|\n|}\n \t " +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text="\n \t ")] + +--- + +name: different_whitespace_after_table +label: Handle spaces after a table close. +input: "{|\n|} \n " +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n ")] + +--- + +name: characters_after_table +label: Handle characters after a table close. +input: "{|\n|} tsta" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")] + +--- + name: leading_characters_table label: Don't parse as a table when leading characters are not newline or whitespace. input: "foo \n foo \t {|\n|}" @@ -47,6 +75,13 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding --- +name: table_row_multiple +label: Simple table row. +input: "{|\n |- \n|- \n |-\n |}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] + +--- + name: table_cell_simple label: Simple table cell. input: "{|\n | foo \n|}" @@ -171,7 +206,6 @@ label: Parse table row style attributes with different whitespace. input: "{| \t \n |- \t name="foo bar" \t \n|}" output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(" \t \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(padding=" \t \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] - --- name: table_attributes