Browse Source

Tokenizer now supports a very, very limited template syntax.

tags/v0.1
Ben Kurtovic 12 years ago
parent
commit
32d99c3c75
2 changed files with 24 additions and 9 deletions
  1. +8
    -8
      mwparserfromhell/parser/builder.py
  2. +16
    -1
      mwparserfromhell/parser/tokenizer.py

+ 8
- 8
mwparserfromhell/parser/builder.py View File

@@ -59,7 +59,7 @@ class Builder(object):
value = self._pop() value = self._pop()
return Parameter(key, value, showkey) return Parameter(key, value, showkey)
else: else:
self._write(self._handle_token())
self._write(self._handle_token(token))


def _handle_template(self): def _handle_template(self):
params = [] params = []
@@ -81,7 +81,7 @@ class Builder(object):
name = self._pop() name = self._pop()
return Template(name, params) return Template(name, params)
else: else:
self._write(self._handle_token())
self._write(self._handle_token(token))


def _handle_entity(self): def _handle_entity(self):
token = self._tokens.pop(0) token = self._tokens.pop(0)
@@ -103,7 +103,7 @@ class Builder(object):
title = self._pop() title = self._pop()
return Heading(title, level) return Heading(title, level)
else: else:
self._write(self._handle_token())
self._write(self._handle_token(token))


def _handle_attribute(self): def _handle_attribute(self):
name, quoted = None, False name, quoted = None, False
@@ -122,7 +122,7 @@ class Builder(object):
return Attribute(name, self._pop(), quoted) return Attribute(name, self._pop(), quoted)
return Attribute(self._pop(), quoted=quoted) return Attribute(self._pop(), quoted=quoted)
else: else:
self._write(self._handle_token())
self._write(self._handle_token(token))


def _handle_tag(self, token): def _handle_tag(self, token):
type_, showtag = token.type, token.showtag type_, showtag = token.type, token.showtag
@@ -146,10 +146,9 @@ class Builder(object):
return Tag(type_, tag, contents, attrs, showtag, False, return Tag(type_, tag, contents, attrs, showtag, False,
open_pad, token.padding) open_pad, token.padding)
else: else:
self._write(self._handle_token())
self._write(self._handle_token(token))


def _handle_token(self):
token = self._tokens.pop(0)
def _handle_token(self, token):
if isinstance(token, tokens.Text): if isinstance(token, tokens.Text):
return Text(token.text) return Text(token.text)
elif isinstance(token, tokens.TemplateOpen): elif isinstance(token, tokens.TemplateOpen):
@@ -165,5 +164,6 @@ class Builder(object):
self._tokens = tokenlist self._tokens = tokenlist
self._push() self._push()
while self._tokens: while self._tokens:
self._write(self._handle_token())
node = self._handle_token(self._tokens.pop(0))
self._write(node)
return self._pop() return self._pop()

+ 16
- 1
mwparserfromhell/parser/tokenizer.py View File

@@ -24,6 +24,9 @@ from . import tokens


__all__ = ["Tokenizer"] __all__ = ["Tokenizer"]


class BadRoute(Exception):
pass

class Tokenizer(object): class Tokenizer(object):
START = object() START = object()
END = object() END = object()
@@ -33,7 +36,7 @@ class Tokenizer(object):
self._head = 0 self._head = 0
self._stacks = [] self._stacks = []


self._modifiers = []
self._context = []


def _push(self): def _push(self):
self._stacks.append([]) self._stacks.append([])
@@ -66,6 +69,18 @@ class Tokenizer(object):
while True: while True:
if self._read() in (stop, self.END): if self._read() in (stop, self.END):
return self._pop() return self._pop()
elif self._read(0) == "{" and self._read(1) == "{":
reset = self._head
self._head += 2
try:
template = self._parse_until("}")
except BadRoute:
self._head = reset
self._write(tokens.Text(text=self._read()))
else:
self._write(tokens.TemplateOpen())
self._stacks[-1] += template
self._write(tokens.TemplateClose())
else: else:
self._write(tokens.Text(text=self._read())) self._write(tokens.Text(text=self._read()))
self._head += 1 self._head += 1


Loading…
Cancel
Save