浏览代码

Replace data.literal and data.quoted with a data.CX_QUOTED context

tags/v0.3
Ben Kurtovic 11 年前
父节点
当前提交
9693b6d5e6
共有 1 个文件被更改,包括 15 次插入19 次删除
  1. +15
    -19
      mwparserfromhell/parser/tokenizer.py

+ 15
- 19
mwparserfromhell/parser/tokenizer.py 查看文件

@@ -42,16 +42,15 @@ class _TagOpenData(object):
CX_ATTR_READY = 1 << 1 CX_ATTR_READY = 1 << 1
CX_ATTR_NAME = 1 << 2 CX_ATTR_NAME = 1 << 2
CX_ATTR_VALUE = 1 << 3 CX_ATTR_VALUE = 1 << 3
CX_NEED_SPACE = 1 << 4
CX_NEED_EQUALS = 1 << 5
CX_NEED_QUOTE = 1 << 6
CX_QUOTED = 1 << 4
CX_NEED_SPACE = 1 << 5
CX_NEED_EQUALS = 1 << 6
CX_NEED_QUOTE = 1 << 7
CX_ATTR = CX_ATTR_NAME | CX_ATTR_VALUE CX_ATTR = CX_ATTR_NAME | CX_ATTR_VALUE


def __init__(self): def __init__(self):
self.context = self.CX_NAME self.context = self.CX_NAME
self.literal = True
self.padding_buffer = [] self.padding_buffer = []
self.quoted = False
self.reset = 0 self.reset = 0
self.ignore_quote = False self.ignore_quote = False


@@ -448,17 +447,18 @@ class Tokenizer(object):
self._write(tokens.TagOpenOpen(showtag=True)) self._write(tokens.TagOpenOpen(showtag=True))
while True: while True:
this, next = self._read(), self._read(1) this, next = self._read(), self._read(1)
can_exit = not data.context & data.CX_QUOTED or data.context & data.CX_NEED_SPACE
if this not in self.MARKERS: if this not in self.MARKERS:
for chunk in self.tag_splitter.split(this): for chunk in self.tag_splitter.split(this):
if self._handle_tag_chunk(data, chunk): if self._handle_tag_chunk(data, chunk):
continue continue
elif this is self.END: elif this is self.END:
if self._context & contexts.TAG_ATTR: if self._context & contexts.TAG_ATTR:
if data.quoted:
if data.context & data.CX_QUOTED:
self._pop() self._pop()
self._pop() self._pop()
self._fail_route() self._fail_route()
elif this == ">" and data.literal:
elif this == ">" and can_exit:
if data.context & data.CX_ATTR: if data.context & data.CX_ATTR:
self._push_tag_buffer(data) self._push_tag_buffer(data)
padding = data.padding_buffer[0] if data.padding_buffer else "" padding = data.padding_buffer[0] if data.padding_buffer else ""
@@ -466,7 +466,7 @@ class Tokenizer(object):
self._context = contexts.TAG_BODY self._context = contexts.TAG_BODY
self._head += 1 self._head += 1
return self._parse(push=False) return self._parse(push=False)
elif this == "/" and next == ">" and data.literal:
elif this == "/" and next == ">" and can_exit:
if data.context & data.CX_ATTR: if data.context & data.CX_ATTR:
self._push_tag_buffer(data) self._push_tag_buffer(data)
padding = data.padding_buffer[0] if data.padding_buffer else "" padding = data.padding_buffer[0] if data.padding_buffer else ""
@@ -499,9 +499,8 @@ class Tokenizer(object):
data.padding_buffer.append(chunk) data.padding_buffer.append(chunk)
data.context = data.CX_ATTR_READY data.context = data.CX_ATTR_READY
else: else:
if data.context & data.CX_ATTR_VALUE:
data.context ^= data.CX_NEED_SPACE
data.quoted = False
if data.context & data.CX_QUOTED:
data.context ^= data.CX_NEED_SPACE | data.CX_QUOTED
data.ignore_quote = True data.ignore_quote = True
self._pop() self._pop()
self._head = data.reset self._head = data.reset
@@ -536,8 +535,7 @@ class Tokenizer(object):
if data.context & data.CX_NEED_QUOTE: if data.context & data.CX_NEED_QUOTE:
if chunk == '"' and not data.ignore_quote: if chunk == '"' and not data.ignore_quote:
data.context ^= data.CX_NEED_QUOTE data.context ^= data.CX_NEED_QUOTE
data.literal = False
data.quoted = True
data.context |= data.CX_QUOTED
self._push(self._context) self._push(self._context)
data.reset = self._head data.reset = self._head
elif chunk.isspace(): elif chunk.isspace():
@@ -545,10 +543,9 @@ class Tokenizer(object):
else: else:
data.context ^= data.CX_NEED_QUOTE data.context ^= data.CX_NEED_QUOTE
self._parse_tag_chunk(chunk) self._parse_tag_chunk(chunk)
elif not data.literal:
elif data.context & data.CX_QUOTED:
if chunk == '"': if chunk == '"':
data.context |= data.CX_NEED_SPACE data.context |= data.CX_NEED_SPACE
data.literal = True
else: else:
self._parse_tag_chunk(chunk) self._parse_tag_chunk(chunk)
elif chunk.isspace(): elif chunk.isspace():
@@ -574,13 +571,12 @@ class Tokenizer(object):


*data* is a :py:class:`_TagOpenData` object. *data* is a :py:class:`_TagOpenData` object.
""" """
if data.context & data.CX_QUOTED:
self._write_first(tokens.TagAttrQuote())
self._write_all(self._pop())
buf = data.padding_buffer buf = data.padding_buffer
while len(buf) < 3: while len(buf) < 3:
buf.append("") buf.append("")
if data.quoted:
data.quoted = False
self._write_first(tokens.TagAttrQuote())
self._write_all(self._pop())
self._write_first(tokens.TagAttrStart( self._write_first(tokens.TagAttrStart(
pad_after_eq=buf.pop(), pad_before_eq=buf.pop(), pad_after_eq=buf.pop(), pad_before_eq=buf.pop(),
pad_first=buf.pop())) pad_first=buf.pop()))


正在加载...
取消
保存