diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index b899e75..c2d5240 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1007,23 +1007,53 @@ class Tokenizer(object): # TODO - fail all other contexts on start? self._head += 2 reset = self._head - 1 + style = None try: + self._push(contexts.TABLE_OPEN) + style = self._parse_as_table_style("\n", break_on_table_end=True) + if len(style) == 0: + self._head = reset + 1 table = self._parse(contexts.TABLE_OPEN) except BadRoute: self._head = reset self._emit_text("{|") else: - self._emit_style_tag("table", "{|", table) + self._emit(tokens.TagOpenOpen(wiki_markup="{|")) + self._emit_text("table") + if style: + self._emit_all(style) + self._emit(tokens.TagCloseOpen()) + self._emit_all(table) + self._emit(tokens.TagOpenClose()) + self._emit_text("table") + self._emit(tokens.TagCloseClose()) + # self._emit_style_tag("table", "{|", table) def _handle_table_end(self): self._head += 2 return self._pop() def _handle_table_row(self): - self._head += 1 - self._emit(tokens.TagOpenOpen(wiki_markup="|-")) - self._emit_text("tr") - self._emit(tokens.TagCloseSelfclose()) + reset = self._head + self._head += 2 + try: + self._push(contexts.TABLE_OPEN) + style = self._parse_as_table_style("\n") + if len(style) == 0: + self._head = reset + 2 + except BadRoute: + self._head = reset + raise + else: + self._emit(tokens.TagOpenOpen(wiki_markup="|-")) + self._emit_text("tr") + if style: + # this looks highly suspicious + # if type(style[0] == tokens.Text): + # style.pop(0) + self._emit_all(style) + self._emit(tokens.TagCloseSelfclose()) + self._head -= 1 def _handle_table_cell(self, markup, tag, line_context): """Parse as normal syntax unless we hit a style marker, then parse as HTML attributes""" @@ -1047,9 +1077,10 @@ class Tokenizer(object): self._head = reset + len(markup) try: style = self._parse_as_table_style("|") + # Don't parse the style separator + self._head += 1 (cell_context, cell) = self._parse(table_context) except BadRoute: - assert False self._head = reset raise self._emit(tokens.TagOpenOpen(wiki_markup=markup)) @@ -1066,7 +1097,7 @@ class Tokenizer(object): # offset displacement done by _parse() self._head -= 1 - def _parse_as_table_style(self, end_token): + def _parse_as_table_style(self, end_token, break_on_table_end=False): data = _TagOpenData() data.context = _TagOpenData.CX_ATTR_READY while True: @@ -1086,7 +1117,9 @@ class Tokenizer(object): elif this == end_token and can_exit: if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE): self._push_tag_buffer(data) - self._head += 1 + # self._head += 1 + return self._pop() + elif break_on_table_end and this == "|" and next == "}": return self._pop() else: self._handle_tag_data(data, this) diff --git a/tests/tokenizer/tables.mwtest b/tests/tokenizer/tables.mwtest index 1087381..fa068fd 100644 --- a/tests/tokenizer/tables.mwtest +++ b/tests/tokenizer/tables.mwtest @@ -127,7 +127,7 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text name: table_cell_attributes_name_with_pipe label: Pipe inside an attribute name should still be used as a style separator. input: "{| \n | name|="foo bar"| test \n|}" -output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] --- @@ -135,3 +135,25 @@ name: table_cell_attributes_pipe_after_equals label: Pipe inside an attribute should still be used as a style separator after an equals. input: "{| \n | name=|"foo|bar"| test \n|}" output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: table_row_attributes +label: Parse table row style attributes. +input: "{| \n |- name="foo bar"\n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + +--- + +name: table_row_attributes_crazy_whitespace +label: Parse table row style attributes with different whitespace. +input: "{| \t \n |- \t name="foo bar"\n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()] + + +--- + +name: table_attributes +label: Parse table style attributes. +input: "{| name="foo bar"\n|}" +output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]