diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 70e2d5d..80cb501 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1020,17 +1020,34 @@ class Tokenizer(object): return self._pop() def _handle_table_row(self): - self._head += 2 - self._emit(tokens.TagOpenOpen(wiki_markup="{-")) + self._head += 1 + self._emit(tokens.TagOpenOpen(wiki_markup="|-")) self._emit_text("tr") self._emit(tokens.TagCloseSelfclose()) - self._context &= ~contexts.TABLE_CELL_OPEN - def _handle_table_cell(self): - pass + def _handle_table_cell(self, markup, tag, line_context): + """Parse as normal syntax unless we hit a style marker, then parse as HTML attributes""" + if not self._can_recurse(): + self._emit_text(markup) + self._head += len(markup) - 1 + return - def _handle_header_cell(self): - pass + reset = self._head + self._head += len(markup) + try: + cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | contexts.TABLE_CELL_STYLE_POSSIBLE | line_context) + except BadRoute: + self._head = reset + raise + else: + self._emit(tokens.TagOpenOpen(wiki_markup=markup)) + self._emit_text(tag) + self._emit(tokens.TagCloseSelfclose()) + self._emit_all(cell) + self._head -= 1 + + def _handle_table_cell_end(self): + return self._pop() def _handle_cell_style(self): pass @@ -1184,36 +1201,51 @@ class Tokenizer(object): elif this in ("\n", ":") and self._context & contexts.DL_TERM: self._handle_dl_term() - elif (this == "{" and next == "|" and (self._read(-1) in ("\n", self.START)) or - (self._read(-2) in ("\n", self.START) and self._read(-1).strip() == "")): + elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or + (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): if self._can_recurse(): self._handle_table_start() else: self._emit_text("{|") elif self._context & contexts.TABLE_OPEN: if this == "|" and next == "}": + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() return self._handle_table_end() elif this == "|" and next == "|" and self._context & contexts.TABLE_CELL_LINE: - self._handle_table_cell() + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() + self._handle_table_cell("||", "td", contexts.TABLE_CELL_LINE) elif this == "|" and next == "|" and self._context & contexts.TABLE_HEADER_LINE: - self._handle_header_cell() + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() + self._handle_table_cell("||", "th", contexts.TABLE_HEADER_LINE) elif this == "!" and next == "!" and self._context & contexts.TABLE_HEADER_LINE: - self._handle_header_cell() + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() + self._handle_table_cell("!!", "th", contexts.TABLE_HEADER_LINE) elif this == "|" and self._context & contexts.TABLE_CELL_STYLE_POSSIBLE: self._handle_cell_style() # on newline, clear out cell line contexts elif this == "\n" and self._context & (contexts.TABLE_CELL_LINE | contexts.TABLE_HEADER_LINE | contexts.TABLE_CELL_STYLE_POSSIBLE): + # TODO might not be handled due to DL_TERM code above + # TODO does this even work? self._context &= (~contexts.TABLE_CELL_LINE & ~contexts.TABLE_HEADER_LINE & ~contexts.TABLE_CELL_STYLE_POSSIBLE) self._emit_text(this) - # newline or whitespace/newline elif (self._read(-1) in ("\n", self.START) or - (self._read(-2) in ("\n", self.START) and self._read(-1).strip() == "")): + (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): if this == "|" and next == "-": + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() self._handle_table_row() - elif this == "|" and self._can_recurse(): - self._handle_table_cell() - elif this == "!" and self._can_recurse(): - self._handle_header_cell() + elif this == "|": + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() + self._handle_table_cell("|", "td", contexts.TABLE_CELL_LINE) + elif this == "!": + if self._context & contexts.TABLE_CELL_OPEN: + return self._handle_table_cell_end() + self._handle_table_cell("!", "th", contexts.TABLE_HEADER_LINE) else: self._emit_text(this) else: diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest index 399f7fd..f818f65 100644 --- a/tests/tokenizer/tables.mwtest +++ b/tests/tokenizer/tables.mwtest @@ -19,6 +19,13 @@ output: [Text(text="{| ")] --- +name: no_table_close_inside_cell +label: Handle case when there is no table close while inside of a cell. +input: "{| | " +output: [Text(text="{| | ")] + +--- + name: leading_whitespace_table label: Handle leading whitespace for a table. input: "foo \n \t {|\n|}" @@ -30,3 +37,52 @@ name: leading_characters_table label: Don't parse as a table when leading characters are not newline or whitespace. input: "foo \n foo \t {|\n|}" output: [Text(text="foo \n foo \t {|\n|}")] + +--- + +name: table_row_simple +label: Simple table row. +input: "{|\n |- \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseSelfclose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: table_cell_simple +label: Simple table cell. +input: "{|\n | foo \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: nowiki_inside_table +label: Nowiki handles pipe characters in tables. +input: "{|\n | foo | |- {| |} || ! !! bar \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: table_text_outside_cell +label: Parse text inside table but outside of a cell. +input: "{|\n bar \n | foo \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), Text(text=" foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: no_table_cell_with_leading_characters +label: Fail to create a table cell when there are leading non-whitespace characters. +input: "{|\n bar | foo \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar | foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: no_table_row_with_leading_characters +label: Fail to create a table row when there are leading non-whitespace characters. +input: "{|\n bar |- foo \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n bar |- foo \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: template_inside_table_cell +label: Template within table cell. +input: "{|\n |{{foo\n|bar=baz}} \n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text="\n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseSelfclose(), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]