From d3ea962d271b2079ba840de33253962f8e0c4433 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 14 Aug 2012 18:27:29 -0400 Subject: [PATCH] Starting work on token contexts. --- mwparserfromhell/parser/contexts.py | 26 ++++++++++++++++++++++ mwparserfromhell/parser/tokenizer.py | 43 +++++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 13 deletions(-) create mode 100644 mwparserfromhell/parser/contexts.py diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py new file mode 100644 index 0000000..f966a1b --- /dev/null +++ b/mwparserfromhell/parser/contexts.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +TEMPLATE = 0b111 +TEMPLATE_NAME = 0b001 +TEMPLATE_PARAM_KEY = 0b010 +TEMPLATE_PARAM_VALUE = 0b100 diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 6318337..260a5b1 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from . import contexts from . import tokens __all__ = ["Tokenizer"] @@ -35,8 +36,7 @@ class Tokenizer(object): self._text = None self._head = 0 self._stacks = [] - - self._context = [] + self._context = 0 def _push(self): self._stacks.append([]) @@ -64,9 +64,29 @@ class Tokenizer(object): return self.END return self._text[index] + def _verify_context(self): + if self._read() is self.END: + if self._context & contexts.INSIDE_TEMPLATE: + raise BadRoute() + + def _catch_stop(self, stop): + if self._read() is self.END: + return True + try: + iter(stop) + except TypeError: + if self._read() is stop: + return True + else: + if all([self._read(i) == stop[i] for i in xrange(len(stop))]): + self._head += len(stop) - 1 + return True + return False + def _parse_template(self): reset = self._head self._head += 2 + self._context |= contexts.TEMPLATE_NAME try: template = self._parse_until("}}") except BadRoute: @@ -77,20 +97,17 @@ class Tokenizer(object): self._stacks[-1] += template self._write(tokens.TemplateClose()) - def _parse_until(self, stop=None): + ending = (contexts.TEMPLATE_NAME, contexts.TEMPLATE_PARAM_KEY, + contexts.TEMPLATE_PARAM_VALUE) + for context in ending: + self._context ^= context if self._context & context else 0 + + def _parse_until(self, stop): self._push() while True: - if self._read() is self.END: + self._verify_context() + if self._catch_stop(stop): return self._pop() - try: - iter(stop) - except TypeError: - if self._read() is stop: - return self._pop() - else: - if all([self._read(i) == stop[i] for i in xrange(len(stop))]): - self._head += len(stop) - 1 - return self._pop() if self._read(0) == "{" and self._read(1) == "{": self._parse_template() else: