Kaynağa Gözat

More attribute stuff.

tags/v0.3
Ben Kurtovic 12 yıl önce
ebeveyn
işleme
d459899649
2 değiştirilmiş dosya ile 50 ekleme ve 25 silme
  1. +5
    -5
      mwparserfromhell/parser/builder.py
  2. +45
    -20
      mwparserfromhell/parser/tokenizer.py

+ 5
- 5
mwparserfromhell/parser/builder.py Dosyayı Görüntüle

@@ -180,9 +180,9 @@ class Builder(object):
else:
self._write(self._handle_token(token))

def _handle_attribute(self):
def _handle_attribute(self, token):
"""Handle a case where a tag attribute is at the head of the tokens."""
name, quoted = None, False
name, quoted, padding = None, False, token.padding
self._push()
while self._tokens:
token = self._tokens.pop()
@@ -195,8 +195,8 @@ class Builder(object):
tokens.TagCloseOpen)):
self._tokens.append(token)
if name is not None:
return Attribute(name, self._pop(), quoted)
return Attribute(self._pop(), quoted=quoted)
return Attribute(name, self._pop(), quoted, padding)
return Attribute(self._pop(), quoted=quoted, padding=padding)
else:
self._write(self._handle_token(token))

@@ -208,7 +208,7 @@ class Builder(object):
while self._tokens:
token = self._tokens.pop()
if isinstance(token, tokens.TagAttrStart):
attrs.append(self._handle_attribute())
attrs.append(self._handle_attribute(token))
elif isinstance(token, tokens.TagCloseOpen):
open_pad = token.padding
tag = self._pop()


+ 45
- 20
mwparserfromhell/parser/tokenizer.py Dosyayı Görüntüle

@@ -450,8 +450,6 @@ class Tokenizer(object):
self._context ^= contexts.TAG_OPEN_ATTR_NAME
if self._context & contexts.TAG_OPEN_ATTR_BODY:
self._context ^= contexts.TAG_OPEN_ATTR_BODY
if self._context & contexts.TAG_OPEN_ATTR_BODY_QUOTED:
self._context ^= contexts.TAG_OPEN_ATTR_BODY_QUOTED
else:
tag = self._get_tag_type_from_stack()
if not tag:
@@ -462,6 +460,20 @@ class Tokenizer(object):
padding = "" # TODO
return padding

def _actually_handle_chunk(self, chunks, is_new):
if is_new:
padding = 0
while chunks:
if chunks[0] == "":
padding += 1
chunks.pop(0)
else:
break
self._write(tokens.TagAttrStart(padding=" " * padding))
if chunks:
chunk = chunks.pop(0)
self._write_text(chunk)

def _handle_tag_chunk(self, text):
if " " not in text:
self._write_text(text)
@@ -475,18 +487,29 @@ class Tokenizer(object):
self._write_first(tokens.TagOpenOpen(type=tag, showtag=True))
self._context ^= contexts.TAG_OPEN_NAME
self._context |= contexts.TAG_OPEN_ATTR_NAME
self._write(tokens.TagAttrStart())
for i, chunk in enumerate(chunks):
if i > 0:
self._write(tokens.TagAttrStart())
self._write_text(chunk)

# def _handle_attribute_name(self):
# ## check if next character is a ", if so, set TAG_ATTR_BODY_QUOTED
# pass

# def _handle_quoted_attribute_close(self):
# pass
self._actually_handle_chunk(chunks, True)
is_new = False
while chunks:
self._actually_handle_chunk(chunks, is_new)
is_new = True

def _handle_tag_attribute_body(self):
self._context ^= contexts.TAG_OPEN_ATTR_NAME
self._context |= contexts.TAG_OPEN_ATTR_BODY
self._write(TagAttrEquals())
next = self._read(1)
if next not in self.MARKERS and next.startswith('"'):
if re.search(r'[^\\]"$', next[1:]):
if not re.search(r'[^\\]"', next[1:-1]):
self._write(TagAttrQuote())
self._write_text(next[1:-1])
self._head += 1
else:
if not re.search(r'[^\\]"', next[1:]):
self._push(contexts.TAG_OPEN_ATTR_BODY_QUOTED)
self._write(TagAttrQuote())
self._write_text(next[1:])
self._head += 1

def _handle_tag_close_open(self):
padding = self._actually_close_tag_opening()
@@ -526,10 +549,12 @@ class Tokenizer(object):
self._head += 1
continue
if this is self.END:
fail = (contexts.TEMPLATE | contexts.ARGUMENT |
contexts.WIKILINK | contexts.HEADING |
contexts.COMMENT | contexts.TAG)
double_fail = contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE
fail = (
contexts.TEMPLATE | contexts.ARGUMENT | contexts.WIKILINK |
contexts.HEADING | contexts.COMMENT | contexts.TAG)
double_fail = (
contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE |
contexts.TAG_OPEN_ATTR_BODY_QUOTED)
if self._context & double_fail:
self._pop()
if self._context & fail:
@@ -593,8 +618,8 @@ class Tokenizer(object):
self._handle_tag_close_open()
elif this == "/" and next == ">":
return self._handle_tag_selfclose()
# elif this == "=":
# self._handle_tag_attr_body()
elif this == "=":
self._handle_tag_attribute_body()
elif this == "<" and next == "/" and (
self._context & contexts.TAG_BODY):
self._handle_tag_open_close()


Yükleniyor…
İptal
Kaydet