Procházet zdrojové kódy

Add table start/row start style attribute support

Started styling attributes for table row and table start. Still not entirely
sure about this, definitely need to make changes regarding padding.
tags/v0.4
David Winegar před 10 roky
rodič
revize
9f159ecfa2
2 změnil soubory, kde provedl 64 přidání a 9 odebrání
  1. +41
    -8
      mwparserfromhell/parser/tokenizer.py
  2. +23
    -1
      tests/tokenizer/tables.mwtest

+ 41
- 8
mwparserfromhell/parser/tokenizer.py Zobrazit soubor

@@ -1007,23 +1007,53 @@ class Tokenizer(object):
# TODO - fail all other contexts on start?
self._head += 2
reset = self._head - 1
style = None
try:
self._push(contexts.TABLE_OPEN)
style = self._parse_as_table_style("\n", break_on_table_end=True)
if len(style) == 0:
self._head = reset + 1
table = self._parse(contexts.TABLE_OPEN)
except BadRoute:
self._head = reset
self._emit_text("{|")
else:
self._emit_style_tag("table", "{|", table)
self._emit(tokens.TagOpenOpen(wiki_markup="{|"))
self._emit_text("table")
if style:
self._emit_all(style)
self._emit(tokens.TagCloseOpen())
self._emit_all(table)
self._emit(tokens.TagOpenClose())
self._emit_text("table")
self._emit(tokens.TagCloseClose())
# self._emit_style_tag("table", "{|", table)

def _handle_table_end(self):
self._head += 2
return self._pop()

def _handle_table_row(self):
self._head += 1
self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
self._emit_text("tr")
self._emit(tokens.TagCloseSelfclose())
reset = self._head
self._head += 2
try:
self._push(contexts.TABLE_OPEN)
style = self._parse_as_table_style("\n")
if len(style) == 0:
self._head = reset + 2
except BadRoute:
self._head = reset
raise
else:
self._emit(tokens.TagOpenOpen(wiki_markup="|-"))
self._emit_text("tr")
if style:
# this looks highly suspicious
# if type(style[0] == tokens.Text):
# style.pop(0)
self._emit_all(style)
self._emit(tokens.TagCloseSelfclose())
self._head -= 1

def _handle_table_cell(self, markup, tag, line_context):
"""Parse as normal syntax unless we hit a style marker, then parse as HTML attributes"""
@@ -1047,9 +1077,10 @@ class Tokenizer(object):
self._head = reset + len(markup)
try:
style = self._parse_as_table_style("|")
# Don't parse the style separator
self._head += 1
(cell_context, cell) = self._parse(table_context)
except BadRoute:
assert False
self._head = reset
raise
self._emit(tokens.TagOpenOpen(wiki_markup=markup))
@@ -1066,7 +1097,7 @@ class Tokenizer(object):
# offset displacement done by _parse()
self._head -= 1

def _parse_as_table_style(self, end_token):
def _parse_as_table_style(self, end_token, break_on_table_end=False):
data = _TagOpenData()
data.context = _TagOpenData.CX_ATTR_READY
while True:
@@ -1086,7 +1117,9 @@ class Tokenizer(object):
elif this == end_token and can_exit:
if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE):
self._push_tag_buffer(data)
self._head += 1
# self._head += 1
return self._pop()
elif break_on_table_end and this == "|" and next == "}":
return self._pop()
else:
self._handle_tag_data(data, this)


+ 23
- 1
tests/tokenizer/tables.mwtest Zobrazit soubor

@@ -127,7 +127,7 @@ output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text
name: table_cell_attributes_name_with_pipe
label: Pipe inside an attribute name should still be used as a style separator.
input: "{| \n | name|="foo bar"| test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseSelfclose(), Text(text="=\"foo bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

---

@@ -135,3 +135,25 @@ name: table_cell_attributes_pipe_after_equals
label: Pipe inside an attribute should still be used as a style separator after an equals.
input: "{| \n | name=|"foo|bar"| test \n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseSelfclose(), Text(text="\"foo|bar\"| test \n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

---

name: table_row_attributes
label: Parse table row style attributes.
input: "{| \n |- name="foo bar"\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

---

name: table_row_attributes_crazy_whitespace
label: Parse table row style attributes with different whitespace.
input: "{| \t \n |- \t name="foo bar"\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(), Text(text=" \t \n "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseSelfclose(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]


---

name: table_attributes
label: Parse table style attributes.
input: "{| name="foo bar"\n|}"
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"),TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(), Text(text="\n"), TagOpenClose(), Text(text="table"), TagCloseClose()]

Načítá se…
Zrušit
Uložit