瀏覽代碼

Merge BuildStack into Builder.

tags/v0.1
Ben Kurtovic 11 年之前
父節點
當前提交
4539859c55
共有 3 個文件被更改,包括 41 次插入64 次删除
  1. +0
    -39
      mwparserfromhell/parser/build_stack.py
  2. +33
    -23
      mwparserfromhell/parser/builder.py
  3. +8
    -2
      mwparserfromhell/parser/tokenizer.py

+ 0
- 39
mwparserfromhell/parser/build_stack.py 查看文件

@@ -1,39 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from ..smart_list import SmartList
from ..wikicode import Wikicode

__all__ = ["BuildStack"]

class BuildStack(object):
def __init__(self):
self._stacks = []

def write(self, item):
self._stacks[-1].append(item)

def push(self):
self._stacks.append([])

def pop(self):
return Wikicode(SmartList(self._stacks.pop()))

+ 33
- 23
mwparserfromhell/parser/builder.py 查看文件

@@ -23,30 +23,40 @@
import re import re


from . import tokens from . import tokens
from .build_stack import BuildStack
from ..nodes import Heading, HTMLEntity, Tag, Template, Text from ..nodes import Heading, HTMLEntity, Tag, Template, Text
from ..nodes.extras import Attribute, Parameter from ..nodes.extras import Attribute, Parameter
from ..smart_list import SmartList
from ..wikicode import Wikicode


__all__ = ["Builder"] __all__ = ["Builder"]


class Builder(object): class Builder(object):
def __init__(self): def __init__(self):
self._tokens = [] self._tokens = []
self._stack = BuildStack()
self._stacks = []

def _push(self):
self._stacks.append([])

def _pop(self):
return Wikicode(SmartList(self._stacks.pop()))

def _write(self, item):
self._stacks[-1].append(item)


def _handle_parameter(self, key): def _handle_parameter(self, key):
showkey = False showkey = False
self._stack.push()
self._push()
while self._tokens: while self._tokens:
token = self._tokens.pop(0) token = self._tokens.pop(0)
if isinstance(token, tokens.TEMPLATE_PARAM_EQUALS): if isinstance(token, tokens.TEMPLATE_PARAM_EQUALS):
key = self._stack.pop()
key = self._pop()
showkey = True showkey = True
self._stack.push()
self._push()
elif isinstance(token, (tokens.TEMPLATE_PARAM_SEPARATOR, elif isinstance(token, (tokens.TEMPLATE_PARAM_SEPARATOR,
tokens.TEMPLATE_CLOSE)): tokens.TEMPLATE_CLOSE)):
self._tokens.insert(0, token) self._tokens.insert(0, token)
value = self._stack.pop()
value = self._pop()
return Parameter(key, value, showkey) return Parameter(key, value, showkey)
else: else:
self._stack.write(self._handle_token()) self._stack.write(self._handle_token())
@@ -55,12 +65,12 @@ class Builder(object):
params = [] params = []
int_keys = set() int_keys = set()
int_key_range = {1} int_key_range = {1}
self._stack.push()
self._push()
while self._tokens: while self._tokens:
token = self._tokens.pop(0) token = self._tokens.pop(0)
if isinstance(token, tokens.TEMPLATE_PARAM_SEPARATOR): if isinstance(token, tokens.TEMPLATE_PARAM_SEPARATOR):
if not params: if not params:
name = self._stack.pop()
name = self._pop()
param = self._handle_parameter(min(int_key_range - int_keys)) param = self._handle_parameter(min(int_key_range - int_keys))
if re.match(r"[1-9][0-9]*$", param.name.strip()): if re.match(r"[1-9][0-9]*$", param.name.strip()):
int_keys.add(int(param.name)) int_keys.add(int(param.name))
@@ -68,7 +78,7 @@ class Builder(object):
params.append(param) params.append(param)
elif isinstance(token, tokens.TEMPLATE_CLOSE): elif isinstance(token, tokens.TEMPLATE_CLOSE):
if not params: if not params:
name = self._stack.pop()
name = self._pop()
return Template(name, params) return Template(name, params)
else: else:
self._stack.write(self._handle_token()) self._stack.write(self._handle_token())
@@ -85,52 +95,52 @@ class Builder(object):


def _handle_heading(self, token): def _handle_heading(self, token):
level = token.level level = token.level
self._stack.push()
self._push()
while self._tokens: while self._tokens:
token = self._tokens.pop(0) token = self._tokens.pop(0)
if isinstance(token, tokens.HEADING_BLOCK): if isinstance(token, tokens.HEADING_BLOCK):
title = self._stack.pop()
title = self._pop()
return Heading(title, level) return Heading(title, level)
else: else:
self._stack.write(self._handle_token()) self._stack.write(self._handle_token())


def _handle_attribute(self): def _handle_attribute(self):
name, quoted = None, False name, quoted = None, False
self._stack.push()
self._push()
while self._tokens: while self._tokens:
token = self._tokens.pop(0) token = self._tokens.pop(0)
if isinstance(token, tokens.TAG_ATTR_EQUALS): if isinstance(token, tokens.TAG_ATTR_EQUALS):
name = self._stack.pop()
self._stack.push()
name = self._pop()
self._push()
elif isinstance(token, tokens.TAG_ATTR_QUOTE): elif isinstance(token, tokens.TAG_ATTR_QUOTE):
quoted = True quoted = True
elif isinstance(token, (tokens.TAG_ATTR_START, elif isinstance(token, (tokens.TAG_ATTR_START,
tokens.TAG_CLOSE_OPEN)): tokens.TAG_CLOSE_OPEN)):
self._tokens.insert(0, token) self._tokens.insert(0, token)
if name is not None: if name is not None:
return Attribute(name, self._stack.pop(), quoted)
return Attribute(self._stack.pop(), quoted=quoted)
return Attribute(name, self._pop(), quoted)
return Attribute(self._pop(), quoted=quoted)
else: else:
self._stack.write(self._handle_token()) self._stack.write(self._handle_token())


def _handle_tag(self, token): def _handle_tag(self, token):
type_, showtag = token.type, token.showtag type_, showtag = token.type, token.showtag
attrs = [] attrs = []
self._stack.push()
self._push()
while self._tokens: while self._tokens:
token = self._tokens.pop(0) token = self._tokens.pop(0)
if isinstance(token, tokens.TAG_ATTR_START): if isinstance(token, tokens.TAG_ATTR_START):
attrs.append(self._handle_attribute()) attrs.append(self._handle_attribute())
elif isinstance(token, tokens.TAG_CLOSE_OPEN): elif isinstance(token, tokens.TAG_CLOSE_OPEN):
open_pad = token.padding open_pad = token.padding
tag = self._stack.pop()
self._stack.push()
tag = self._pop()
self._push()
elif isinstance(token, tokens.TAG_CLOSE_SELFCLOSE): elif isinstance(token, tokens.TAG_CLOSE_SELFCLOSE):
tag = self._stack.pop()
tag = self._pop()
return Tag(type_, tag, attrs=attrs, showtag=showtag, return Tag(type_, tag, attrs=attrs, showtag=showtag,
self_closing=True, open_padding=token.padding) self_closing=True, open_padding=token.padding)
elif isinstance(token, tokens.TAG_OPEN_CLOSE): elif isinstance(token, tokens.TAG_OPEN_CLOSE):
contents = self._stack.pop()
contents = self._pop()
elif isinstance(token, tokens.TAG_CLOSE_CLOSE): elif isinstance(token, tokens.TAG_CLOSE_CLOSE):
return Tag(type_, tag, contents, attrs, showtag, False, return Tag(type_, tag, contents, attrs, showtag, False,
open_pad, token.padding) open_pad, token.padding)
@@ -152,7 +162,7 @@ class Builder(object):


def build(self, tokenlist): def build(self, tokenlist):
self._tokens = tokenlist self._tokens = tokenlist
self._stack.push()
self._push()
while self._tokens: while self._tokens:
self._stack.write(self._handle_token()) self._stack.write(self._handle_token())
return self._stack.pop()
return self._pop()

+ 8
- 2
mwparserfromhell/parser/tokenizer.py 查看文件

@@ -25,6 +25,12 @@ from . import tokens
__all__ = ["Tokenizer"] __all__ = ["Tokenizer"]


class Tokenizer(object): class Tokenizer(object):
def __init__(self):
self._text = None
self._head = 0
self._tokens = []

def tokenize(self, text): def tokenize(self, text):
tokenized = [tokens.TEXT(text=text)]
return tokenized
self._text = text
self._tokens.append(tokens.TEXT(text=text))
return self._tokens

Loading…
取消
儲存