Starting work on token contexts.

12 years ago · d3ea962d27
--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -0,0 +1,26 @@
 # -*- coding: utf-8  -*-
 #
 # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 TEMPLATE =              0b111
 TEMPLATE_NAME =         0b001
 TEMPLATE_PARAM_KEY =    0b010
 TEMPLATE_PARAM_VALUE =  0b100
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -20,6 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from . import contexts
 from . import tokens

 __all__ = ["Tokenizer"]
@@ -35,8 +36,7 @@ class Tokenizer(object):
        self._text = None
        self._head = 0
        self._stacks = []

        self._context = []
        self._context = 0

    def _push(self):
        self._stacks.append([])
@@ -64,9 +64,29 @@ class Tokenizer(object):
            return self.END
        return self._text[index]

    def _verify_context(self):
        if self._read() is self.END:
            if self._context & contexts.INSIDE_TEMPLATE:
                raise BadRoute()

    def _catch_stop(self, stop):
        if self._read() is self.END:
            return True
        try:
            iter(stop)
        except TypeError:
            if self._read() is stop:
                return True
        else:
            if all([self._read(i) == stop[i] for i in xrange(len(stop))]):
                self._head += len(stop) - 1
                return True
        return False

    def _parse_template(self):
        reset = self._head
        self._head += 2
        self._context |= contexts.TEMPLATE_NAME
        try:
            template = self._parse_until("}}")
        except BadRoute:
@@ -77,20 +97,17 @@ class Tokenizer(object):
            self._stacks[-1] += template
            self._write(tokens.TemplateClose())

    def _parse_until(self, stop=None):
        ending = (contexts.TEMPLATE_NAME, contexts.TEMPLATE_PARAM_KEY,
                  contexts.TEMPLATE_PARAM_VALUE)
        for context in ending:
            self._context ^= context if self._context & context else 0

    def _parse_until(self, stop):
        self._push()
        while True:
            if self._read() is self.END:
            self._verify_context()
            if self._catch_stop(stop):
                return self._pop()
            try:
                iter(stop)
            except TypeError:
                if self._read() is stop:
                    return self._pop()
            else:
                if all([self._read(i) == stop[i] for i in xrange(len(stop))]):
                    self._head += len(stop) - 1
                    return self._pop()
            if self._read(0) == "{" and self._read(1) == "{":
                self._parse_template()
            else: