Browse Source

Starting work on token contexts.

tags/v0.1
Ben Kurtovic 11 years ago
parent
commit
d3ea962d27
2 changed files with 56 additions and 13 deletions
  1. +26
    -0
      mwparserfromhell/parser/contexts.py
  2. +30
    -13
      mwparserfromhell/parser/tokenizer.py

+ 26
- 0
mwparserfromhell/parser/contexts.py View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

TEMPLATE = 0b111
TEMPLATE_NAME = 0b001
TEMPLATE_PARAM_KEY = 0b010
TEMPLATE_PARAM_VALUE = 0b100

+ 30
- 13
mwparserfromhell/parser/tokenizer.py View File

@@ -20,6 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from . import contexts
from . import tokens

__all__ = ["Tokenizer"]
@@ -35,8 +36,7 @@ class Tokenizer(object):
self._text = None
self._head = 0
self._stacks = []

self._context = []
self._context = 0

def _push(self):
self._stacks.append([])
@@ -64,9 +64,29 @@ class Tokenizer(object):
return self.END
return self._text[index]

def _verify_context(self):
if self._read() is self.END:
if self._context & contexts.INSIDE_TEMPLATE:
raise BadRoute()

def _catch_stop(self, stop):
if self._read() is self.END:
return True
try:
iter(stop)
except TypeError:
if self._read() is stop:
return True
else:
if all([self._read(i) == stop[i] for i in xrange(len(stop))]):
self._head += len(stop) - 1
return True
return False

def _parse_template(self):
reset = self._head
self._head += 2
self._context |= contexts.TEMPLATE_NAME
try:
template = self._parse_until("}}")
except BadRoute:
@@ -77,20 +97,17 @@ class Tokenizer(object):
self._stacks[-1] += template
self._write(tokens.TemplateClose())

def _parse_until(self, stop=None):
ending = (contexts.TEMPLATE_NAME, contexts.TEMPLATE_PARAM_KEY,
contexts.TEMPLATE_PARAM_VALUE)
for context in ending:
self._context ^= context if self._context & context else 0

def _parse_until(self, stop):
self._push()
while True:
if self._read() is self.END:
self._verify_context()
if self._catch_stop(stop):
return self._pop()
try:
iter(stop)
except TypeError:
if self._read() is stop:
return self._pop()
else:
if all([self._read(i) == stop[i] for i in xrange(len(stop))]):
self._head += len(stop) - 1
return self._pop()
if self._read(0) == "{" and self._read(1) == "{":
self._parse_template()
else:


Loading…
Cancel
Save