Pārlūkot izejas kodu

Starting work on token contexts.

tags/v0.1
Ben Kurtovic pirms 12 gadiem
vecāks
revīzija
d3ea962d27
2 mainītis faili ar 56 papildinājumiem un 13 dzēšanām
  1. +26
    -0
      mwparserfromhell/parser/contexts.py
  2. +30
    -13
      mwparserfromhell/parser/tokenizer.py

+ 26
- 0
mwparserfromhell/parser/contexts.py Parādīt failu

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

TEMPLATE = 0b111
TEMPLATE_NAME = 0b001
TEMPLATE_PARAM_KEY = 0b010
TEMPLATE_PARAM_VALUE = 0b100

+ 30
- 13
mwparserfromhell/parser/tokenizer.py Parādīt failu

@@ -20,6 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from . import contexts
from . import tokens from . import tokens


__all__ = ["Tokenizer"] __all__ = ["Tokenizer"]
@@ -35,8 +36,7 @@ class Tokenizer(object):
self._text = None self._text = None
self._head = 0 self._head = 0
self._stacks = [] self._stacks = []

self._context = []
self._context = 0


def _push(self): def _push(self):
self._stacks.append([]) self._stacks.append([])
@@ -64,9 +64,29 @@ class Tokenizer(object):
return self.END return self.END
return self._text[index] return self._text[index]


def _verify_context(self):
if self._read() is self.END:
if self._context & contexts.INSIDE_TEMPLATE:
raise BadRoute()

def _catch_stop(self, stop):
if self._read() is self.END:
return True
try:
iter(stop)
except TypeError:
if self._read() is stop:
return True
else:
if all([self._read(i) == stop[i] for i in xrange(len(stop))]):
self._head += len(stop) - 1
return True
return False

def _parse_template(self): def _parse_template(self):
reset = self._head reset = self._head
self._head += 2 self._head += 2
self._context |= contexts.TEMPLATE_NAME
try: try:
template = self._parse_until("}}") template = self._parse_until("}}")
except BadRoute: except BadRoute:
@@ -77,20 +97,17 @@ class Tokenizer(object):
self._stacks[-1] += template self._stacks[-1] += template
self._write(tokens.TemplateClose()) self._write(tokens.TemplateClose())


def _parse_until(self, stop=None):
ending = (contexts.TEMPLATE_NAME, contexts.TEMPLATE_PARAM_KEY,
contexts.TEMPLATE_PARAM_VALUE)
for context in ending:
self._context ^= context if self._context & context else 0

def _parse_until(self, stop):
self._push() self._push()
while True: while True:
if self._read() is self.END:
self._verify_context()
if self._catch_stop(stop):
return self._pop() return self._pop()
try:
iter(stop)
except TypeError:
if self._read() is stop:
return self._pop()
else:
if all([self._read(i) == stop[i] for i in xrange(len(stop))]):
self._head += len(stop) - 1
return self._pop()
if self._read(0) == "{" and self._read(1) == "{": if self._read(0) == "{" and self._read(1) == "{":
self._parse_template() self._parse_template()
else: else:


Notiek ielāde…
Atcelt
Saglabāt