From 4539859c55aa6a1058900d4a02234085f18f3726 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 7 Aug 2012 01:56:16 -0400 Subject: [PATCH] Merge BuildStack into Builder. --- mwparserfromhell/parser/build_stack.py | 39 ----------------------- mwparserfromhell/parser/builder.py | 56 ++++++++++++++++++++-------------- mwparserfromhell/parser/tokenizer.py | 10 ++++-- 3 files changed, 41 insertions(+), 64 deletions(-) delete mode 100644 mwparserfromhell/parser/build_stack.py diff --git a/mwparserfromhell/parser/build_stack.py b/mwparserfromhell/parser/build_stack.py deleted file mode 100644 index 23061d0..0000000 --- a/mwparserfromhell/parser/build_stack.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from ..smart_list import SmartList -from ..wikicode import Wikicode - -__all__ = ["BuildStack"] - -class BuildStack(object): - def __init__(self): - self._stacks = [] - - def write(self, item): - self._stacks[-1].append(item) - - def push(self): - self._stacks.append([]) - - def pop(self): - return Wikicode(SmartList(self._stacks.pop())) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 5e8aaf5..3b6a643 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -23,30 +23,40 @@ import re from . import tokens -from .build_stack import BuildStack from ..nodes import Heading, HTMLEntity, Tag, Template, Text from ..nodes.extras import Attribute, Parameter +from ..smart_list import SmartList +from ..wikicode import Wikicode __all__ = ["Builder"] class Builder(object): def __init__(self): self._tokens = [] - self._stack = BuildStack() + self._stacks = [] + + def _push(self): + self._stacks.append([]) + + def _pop(self): + return Wikicode(SmartList(self._stacks.pop())) + + def _write(self, item): + self._stacks[-1].append(item) def _handle_parameter(self, key): showkey = False - self._stack.push() + self._push() while self._tokens: token = self._tokens.pop(0) if isinstance(token, tokens.TEMPLATE_PARAM_EQUALS): - key = self._stack.pop() + key = self._pop() showkey = True - self._stack.push() + self._push() elif isinstance(token, (tokens.TEMPLATE_PARAM_SEPARATOR, tokens.TEMPLATE_CLOSE)): self._tokens.insert(0, token) - value = self._stack.pop() + value = self._pop() return Parameter(key, value, showkey) else: self._stack.write(self._handle_token()) @@ -55,12 +65,12 @@ class Builder(object): params = [] int_keys = set() int_key_range = {1} - self._stack.push() + self._push() while self._tokens: token = self._tokens.pop(0) if isinstance(token, tokens.TEMPLATE_PARAM_SEPARATOR): if not params: - name = self._stack.pop() + name = self._pop() param = self._handle_parameter(min(int_key_range - int_keys)) if re.match(r"[1-9][0-9]*$", param.name.strip()): int_keys.add(int(param.name)) @@ -68,7 +78,7 @@ class Builder(object): params.append(param) elif isinstance(token, tokens.TEMPLATE_CLOSE): if not params: - name = self._stack.pop() + name = self._pop() return Template(name, params) else: self._stack.write(self._handle_token()) @@ -85,52 +95,52 @@ class Builder(object): def _handle_heading(self, token): level = token.level - self._stack.push() + self._push() while self._tokens: token = self._tokens.pop(0) if isinstance(token, tokens.HEADING_BLOCK): - title = self._stack.pop() + title = self._pop() return Heading(title, level) else: self._stack.write(self._handle_token()) def _handle_attribute(self): name, quoted = None, False - self._stack.push() + self._push() while self._tokens: token = self._tokens.pop(0) if isinstance(token, tokens.TAG_ATTR_EQUALS): - name = self._stack.pop() - self._stack.push() + name = self._pop() + self._push() elif isinstance(token, tokens.TAG_ATTR_QUOTE): quoted = True elif isinstance(token, (tokens.TAG_ATTR_START, tokens.TAG_CLOSE_OPEN)): self._tokens.insert(0, token) if name is not None: - return Attribute(name, self._stack.pop(), quoted) - return Attribute(self._stack.pop(), quoted=quoted) + return Attribute(name, self._pop(), quoted) + return Attribute(self._pop(), quoted=quoted) else: self._stack.write(self._handle_token()) def _handle_tag(self, token): type_, showtag = token.type, token.showtag attrs = [] - self._stack.push() + self._push() while self._tokens: token = self._tokens.pop(0) if isinstance(token, tokens.TAG_ATTR_START): attrs.append(self._handle_attribute()) elif isinstance(token, tokens.TAG_CLOSE_OPEN): open_pad = token.padding - tag = self._stack.pop() - self._stack.push() + tag = self._pop() + self._push() elif isinstance(token, tokens.TAG_CLOSE_SELFCLOSE): - tag = self._stack.pop() + tag = self._pop() return Tag(type_, tag, attrs=attrs, showtag=showtag, self_closing=True, open_padding=token.padding) elif isinstance(token, tokens.TAG_OPEN_CLOSE): - contents = self._stack.pop() + contents = self._pop() elif isinstance(token, tokens.TAG_CLOSE_CLOSE): return Tag(type_, tag, contents, attrs, showtag, False, open_pad, token.padding) @@ -152,7 +162,7 @@ class Builder(object): def build(self, tokenlist): self._tokens = tokenlist - self._stack.push() + self._push() while self._tokens: self._stack.write(self._handle_token()) - return self._stack.pop() + return self._pop() diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index dbdc48b..10b4d8a 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -25,6 +25,12 @@ from . import tokens __all__ = ["Tokenizer"] class Tokenizer(object): + def __init__(self): + self._text = None + self._head = 0 + self._tokens = [] + def tokenize(self, text): - tokenized = [tokens.TEXT(text=text)] - return tokenized + self._text = text + self._tokens.append(tokens.TEXT(text=text)) + return self._tokens