瀏覽代碼

Seems to be working for quoted attributes now.

tags/v0.3
Ben Kurtovic 11 年之前
父節點
當前提交
26d30f3d1a
共有 1 個文件被更改,包括 31 次插入9 次删除
  1. +31
    -9
      mwparserfromhell/parser/tokenizer.py

+ 31
- 9
mwparserfromhell/parser/tokenizer.py 查看文件

@@ -461,7 +461,7 @@ class Tokenizer(object):
return padding return padding


def _actually_handle_chunk(self, chunks, is_new): def _actually_handle_chunk(self, chunks, is_new):
if is_new:
if is_new and not self._context & contexts.TAG_OPEN_ATTR_BODY_QUOTED:
padding = 0 padding = 0
while chunks: while chunks:
if chunks[0] == "": if chunks[0] == "":
@@ -472,6 +472,15 @@ class Tokenizer(object):
self._write(tokens.TagAttrStart(padding=" " * padding)) self._write(tokens.TagAttrStart(padding=" " * padding))
if chunks: if chunks:
chunk = chunks.pop(0) chunk = chunks.pop(0)
if self._context & contexts.TAG_OPEN_ATTR_BODY:
self._context ^= contexts.TAG_OPEN_ATTR_BODY
self._context |= contexts.TAG_OPEN_ATTR_NAME
if self._context & contexts.TAG_OPEN_ATTR_BODY_QUOTED:
if re.search(r'[^\\]"', chunk[:-1]):
self._fail_route()
if re.search(r'[^\\]"$', chunk):
self._write_text(chunk[:-1])
return self._pop() # Back to _handle_tag_attribute_body()
self._write_text(chunk) self._write_text(chunk)


def _handle_tag_chunk(self, text): def _handle_tag_chunk(self, text):
@@ -490,26 +499,35 @@ class Tokenizer(object):
self._actually_handle_chunk(chunks, True) self._actually_handle_chunk(chunks, True)
is_new = False is_new = False
while chunks: while chunks:
self._actually_handle_chunk(chunks, is_new)
should_exit = self._actually_handle_chunk(chunks, is_new)
if should_exit:
return should_exit
is_new = True is_new = True


def _handle_tag_attribute_body(self): def _handle_tag_attribute_body(self):
self._context ^= contexts.TAG_OPEN_ATTR_NAME self._context ^= contexts.TAG_OPEN_ATTR_NAME
self._context |= contexts.TAG_OPEN_ATTR_BODY self._context |= contexts.TAG_OPEN_ATTR_BODY
self._write(TagAttrEquals())
self._write(tokens.TagAttrEquals())
next = self._read(1) next = self._read(1)
if next not in self.MARKERS and next.startswith('"'): if next not in self.MARKERS and next.startswith('"'):
if re.search(r'[^\\]"$', next[1:]): if re.search(r'[^\\]"$', next[1:]):
if not re.search(r'[^\\]"', next[1:-1]): if not re.search(r'[^\\]"', next[1:-1]):
self._write(TagAttrQuote())
self._write(tokens.TagAttrQuote())
self._write_text(next[1:-1]) self._write_text(next[1:-1])
self._head += 1 self._head += 1
else: else:
if not re.search(r'[^\\]"', next[1:]): if not re.search(r'[^\\]"', next[1:]):
self._push(contexts.TAG_OPEN_ATTR_BODY_QUOTED)
self._write(TagAttrQuote())
self._write_text(next[1:])
self._head += 1 self._head += 1
reset = self._head
try:
attr = self._parse(contexts.TAG_OPEN_ATTR_BODY_QUOTED)
except BadRoute:
self._head = reset
self._write_text(next)
else:
self._write(tokens.TagAttrQuote())
self._write_text(next[1:])
self._write_all(attr)


def _handle_tag_close_open(self): def _handle_tag_close_open(self):
padding = self._actually_close_tag_opening() padding = self._actually_close_tag_opening()
@@ -543,7 +561,9 @@ class Tokenizer(object):
this = self._read() this = self._read()
if this not in self.MARKERS: if this not in self.MARKERS:
if self._context & contexts.TAG_OPEN: if self._context & contexts.TAG_OPEN:
self._handle_tag_chunk(this)
should_exit = self._handle_tag_chunk(this)
if should_exit:
return should_exit
else: else:
self._write_text(this) self._write_text(this)
self._head += 1 self._head += 1
@@ -593,6 +613,8 @@ class Tokenizer(object):
elif this == "=" and not self._global & contexts.GL_HEADING: elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START): if self._read(-1) in ("\n", self.START):
self._parse_heading() self._parse_heading()
elif self._context & contexts.TAG_OPEN_ATTR_NAME:
self._handle_tag_attribute_body()
else: else:
self._write_text("=") self._write_text("=")
elif this == "=" and self._context & contexts.HEADING: elif this == "=" and self._context & contexts.HEADING:
@@ -618,7 +640,7 @@ class Tokenizer(object):
self._handle_tag_close_open() self._handle_tag_close_open()
elif this == "/" and next == ">": elif this == "/" and next == ">":
return self._handle_tag_selfclose() return self._handle_tag_selfclose()
elif this == "=":
elif this == "=" and self._context & contexts.TAG_OPEN_ATTR_NAME:
self._handle_tag_attribute_body() self._handle_tag_attribute_body()
elif this == "<" and next == "/" and ( elif this == "<" and next == "/" and (
self._context & contexts.TAG_BODY): self._context & contexts.TAG_BODY):


Loading…
取消
儲存