Browse Source

Starting main parser: Parser, Tokens, Tokenizer, Builder, BuildStack.

tags/v0.1
Ben Kurtovic 11 years ago
parent
commit
33acb3eea3
7 changed files with 240 additions and 55 deletions
  1. +1
    -1
      mwparserfromhell/nodes/__init__.py
  2. +15
    -1
      mwparserfromhell/parser/__init__.py
  3. +36
    -0
      mwparserfromhell/parser/build_stack.py
  4. +93
    -0
      mwparserfromhell/parser/builder.py
  5. +0
    -53
      mwparserfromhell/parser/demo.py
  6. +30
    -0
      mwparserfromhell/parser/tokenizer.py
  7. +65
    -0
      mwparserfromhell/parser/tokens.py

+ 1
- 1
mwparserfromhell/nodes/__init__.py View File

@@ -22,7 +22,7 @@

from ..string_mixin import StringMixIn

__all__ = ["Node"]
__all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"]

class Node(StringMixIn):
def __iternodes__(self, getter):


+ 15
- 1
mwparserfromhell/parser/__init__.py View File

@@ -20,4 +20,18 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from .demo import DemoParser as Parser
from .builder import Builder
from .tokenizer import Tokenizer

__all__ = ["Parser"]

class Parser(object):
def __init__(self, text):
self.text = text
self._tokenizer = Tokenizer()
self._builder = Builder()

def parse(self):
tokens = self._tokenizer.tokenize(self.text)
code = self._builder.build(tokens)
return code

+ 36
- 0
mwparserfromhell/parser/build_stack.py View File

@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["BuildStack"]

class BuildStack(object):
def __init__(self):
pass

def write(self, item):
pass

def push(self):
pass

def pop(self):
pass

+ 93
- 0
mwparserfromhell/parser/builder.py View File

@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

from . import tokens
from .build_stack import BuildStack
from ..nodes import Template, Text
from ..nodes.extras import Parameter
from ..smart_list import SmartList
from ..wikicode import Wikicode

__all__ = ["Builder"]

class Builder(object):
def __init__(self):
self._tokens = []
self._stack = BuildStack()

def _pop(self):
return Wikicode(SmartList(stack.pop()))

def _handle_parameter(self, key):
showkey = False
self._stack.push()
while self._tokens:
token = self._tokens.pop(0)
if isinstance(token, tokens.TEMPLATE_PARAM_EQUALS):
key = self._pop()
showkey = True
self._stack.push()
elif isinstance(token, (tokens.TEMPLATE_PARAM_SEPARATOR,
tokens.TEMPLATE_CLOSE)):
self._tokens.insert(0, token)
value = self._pop()
return Parameter(key, value, showkey)
else:
self._stack.write(self._handle_token())

def _handle_template(self):
params = []
int_keys = set()
int_key_range = {1}
self._stack.push()
while self._tokens:
token = self._tokens.pop(0)
if isinstance(token, tokens.TEMPLATE_PARAM_SEPARATOR):
if not params:
name = self._pop()
param = self._handle_parameter(min(int_key_range - int_keys))
if re.match(r"[1-9][0-9]*$", param.key.strip()):
int_keys.add(int(param.key))
int_key_range.add(len(int_keys) + 1)
params.append(param)
elif isinstance(token, tokens.TEMPLATE_CLOSE):
if not params:
name = self._pop()
return Template(name, params)
else:
self._stack.write(self._handle_token())

def _handle_token(self):
token = self._tokens.pop(0)
if isinstance(token, tokens.TEXT):
return Text(token.text)
elif isinstance(token, tokens.TEMPLATE_OPEN):
return self._handle_template()

def build(self, tokens):
self._tokens = tokens
self._stack.push()
while self._tokens:
self._stack.write(self._handle_token())
return self._pop()

+ 0
- 53
mwparserfromhell/parser/demo.py View File

@@ -1,53 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from ..nodes import Template, Text
from ..nodes.extras import Parameter
from ..smart_list import SmartList
from ..wikicode import Wikicode

__all__ = ["DemoParser"]

class DemoParser(object):
def __init__(self, text):
self.text = text

def _tokenize(self):
return []

def parse(self):
# Ensure text is unicode!
text = u"This is a {{test}} message with a {{template|with|foo={{params}}}}."

node1 = Text(u"This is a ")
node2 = Template(Wikicode([Text(u"test")]))
node3 = Text(u" message with a ")
node4_param1_name = Wikicode([Text(u"1")])
node4_param1_value = Wikicode([Text(u"with")])
node4_param1 = Parameter(node4_param1_name, node4_param1_value, showkey=False)
node4_param2_name = Wikicode([Text(u"foo")])
node4_param2_value = Wikicode([Template(Wikicode([Text(u"params")]))])
node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True)
node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2])
node5 = Text(u".")
parsed = Wikicode(SmartList([node1, node2, node3, node4, node5]))
return parsed

+ 30
- 0
mwparserfromhell/parser/tokenizer.py View File

@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from . import tokens

__all__ = ["Tokenizer"]

class Tokenizer(object):
def tokenize(self, text):
tokens = [tokens.Text(text=text)]
return tokens

+ 65
- 0
mwparserfromhell/parser/tokens.py View File

@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["Token"]

class Token(object):
def __init__(self, **kwargs):
self.__kwargs = kwargs

def __getattr__(self, key):
return self.__kwargs[key]

def __setattr__(self, key, value):
self.__kwargs[key] = value

def __delattr__(self, key):
del self.__kwargs[key]


def make(name):
__all__.append(name)
return type(name, (Token,), {})

TEXT = make("TEXT")

TEMPLATE_OPEN = make("TEMPLATE_OPEN") # {{
TEMPLATE_PARAM_SEPARATOR = make("TEMPLATE_PARAM_SEPARATOR") # |
TEMPLATE_PARAM_EQUALS = make("TEMPLATE_PARAM_EQUALS") # =
TEMPLATE_CLOSE = make("TEMPLATE_CLOSE") # }}

HTML_ENTITY_START = make("HTML_ENTITY_START") # &
HTML_ENTITY_NUMERIC = make("HTML_ENTITY_NUMERIC") # #
HTML_ENTITY_HEX = make("HTML_ENTITY_HEX") # x
HTML_ENTITY_END = make("HTML_ENTITY_END") # ;

HEADING_BLOCK = make("HEADING_BLOCK") # =...

TAG_OPEN_OPEN = make("TAG_OPEN_OPEN") # <
TAG_ATTR_EQUALS = make("TAG_ATTR_EQUALS") # =
TAG_ATTR_QUOTE = make("TAG_ATTR_QUOTE") # "
TAG_CLOSE_OPEN = make("TAG_CLOSE_OPEN") # >
TAG_CLOSE_SELFCLOSE = make("TAG_CLOSE_SELFCLOSE") # />
TAG_OPEN_CLOSE = make("TAG_OPEN_CLOSE") # </
TAG_CLOSE_CLOSE = make("TAG_CLOSE_CLOSE") # >

del make

Loading…
Cancel
Save