@@ -22,7 +22,7 @@ | |||||
from ..string_mixin import StringMixIn | from ..string_mixin import StringMixIn | ||||
__all__ = ["Node"] | |||||
__all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"] | |||||
class Node(StringMixIn): | class Node(StringMixIn): | ||||
def __unicode__(self): | def __unicode__(self): | ||||
@@ -26,7 +26,7 @@ __all__ = ["Attribute"] | |||||
class Attribute(StringMixIn): | class Attribute(StringMixIn): | ||||
def __init__(self, name, value=None, quoted=True): | def __init__(self, name, value=None, quoted=True): | ||||
super(Attribute, self).__init__(self) | |||||
super(Attribute, self).__init__() | |||||
self._name = name | self._name = name | ||||
self._value = value | self._value = value | ||||
self._quoted = quoted | self._quoted = quoted | ||||
@@ -27,7 +27,7 @@ __all__ = ["Parameter"] | |||||
class Parameter(StringMixIn): | class Parameter(StringMixIn): | ||||
def __init__(self, name, value, showkey=True): | def __init__(self, name, value, showkey=True): | ||||
super(Parameter, self).__init__(self) | |||||
super(Parameter, self).__init__() | |||||
self._name = name | self._name = name | ||||
self._value = value | self._value = value | ||||
self._showkey = showkey | self._showkey = showkey | ||||
@@ -26,12 +26,12 @@ __all__ = ["Heading"] | |||||
class Heading(Node): | class Heading(Node): | ||||
def __init__(self, title, level): | def __init__(self, title, level): | ||||
super(Heading, self).__init__(self) | |||||
super(Heading, self).__init__() | |||||
self._title = title | self._title = title | ||||
self._level = level | self._level = level | ||||
def __unicode__(self): | def __unicode__(self): | ||||
return ("=" * self.level) + self.title + ("=" * self.level) | |||||
return ("=" * self.level) + unicode(self.title) + ("=" * self.level) | |||||
def __iternodes__(self, getter): | def __iternodes__(self, getter): | ||||
yield None, self | yield None, self | ||||
@@ -26,9 +26,9 @@ from . import Node | |||||
__all__ = ["HTMLEntity"] | __all__ = ["HTMLEntity"] | ||||
class HTMLEntity(Node): | |||||
def __init__(self, value, named=None, hexadecimal=False): | |||||
super(HTMLEntity, self).__init__(self) | |||||
<<<<<<< HEAD | |||||
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): | |||||
super(HTMLEntity, self).__init__() | |||||
self._value = value | self._value = value | ||||
if named is None: # Try to guess whether or not the entity is named | if named is None: # Try to guess whether or not the entity is named | ||||
try: | try: | ||||
@@ -46,12 +46,13 @@ class HTMLEntity(Node): | |||||
else: | else: | ||||
self._named = named | self._named = named | ||||
self._hexadecimal = hexadecimal | self._hexadecimal = hexadecimal | ||||
self._hex_char = hex_char | |||||
def __unicode__(self): | def __unicode__(self): | ||||
if self.named: | if self.named: | ||||
return u"&{0};".format(self.value) | return u"&{0};".format(self.value) | ||||
if self.hexadecimal: | if self.hexadecimal: | ||||
return u"&#x{0};".format(self.value) | |||||
return u"&#{0}{1};".format(self.hex_char, self.value) | |||||
return u"&#{0};".format(self.value) | return u"&#{0};".format(self.value) | ||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
@@ -94,6 +95,10 @@ class HTMLEntity(Node): | |||||
def hexadecimal(self): | def hexadecimal(self): | ||||
return self._hexadecimal | return self._hexadecimal | ||||
@property | |||||
def hex_char(self): | |||||
return self._hex_char | |||||
def normalize(self): | def normalize(self): | ||||
if self.named: | if self.named: | ||||
return unichr(htmlentitydefs.name2codepoint[self.value]) | return unichr(htmlentitydefs.name2codepoint[self.value]) | ||||
@@ -67,9 +67,9 @@ class Tag(Node): | |||||
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | ||||
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | ||||
def __init__(self, type_, tag, contents, attrs=None, showtag=True, | |||||
def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | |||||
self_closing=False, open_padding=0, close_padding=0): | self_closing=False, open_padding=0, close_padding=0): | ||||
super(Tag, self).__init__(self) | |||||
super(Tag, self).__init__() | |||||
self._type = type_ | self._type = type_ | ||||
self._tag = tag | self._tag = tag | ||||
self._contents = contents | self._contents = contents | ||||
@@ -33,7 +33,7 @@ FLAGS = re.DOTALL | re.UNICODE | |||||
class Template(Node): | class Template(Node): | ||||
def __init__(self, name, params=None): | def __init__(self, name, params=None): | ||||
super(Template, self).__init__(self) | |||||
super(Template, self).__init__() | |||||
self._name = name | self._name = name | ||||
if params: | if params: | ||||
self._params = params | self._params = params | ||||
@@ -77,7 +77,7 @@ class Template(Node): | |||||
code.replace(node, node.replace(char, replacement)) | code.replace(node, node.replace(char, replacement)) | ||||
def _blank_param_value(self, value): | def _blank_param_value(self, value): | ||||
match = re.search("^(\s*).*?(\s*)$", unicode(value), FLAGS) | |||||
match = re.search(r"^(\s*).*?(\s*)$", unicode(value), FLAGS) | |||||
value.nodes = [Text(match.group(1)), Text(match.group(2))] | value.nodes = [Text(match.group(1)), Text(match.group(2))] | ||||
def _select_theory(self, theories): | def _select_theory(self, theories): | ||||
@@ -91,7 +91,7 @@ class Template(Node): | |||||
before_theories = defaultdict(lambda: 0) | before_theories = defaultdict(lambda: 0) | ||||
after_theories = defaultdict(lambda: 0) | after_theories = defaultdict(lambda: 0) | ||||
for param in self.params: | for param in self.params: | ||||
match = re.search("^(\s*).*?(\s*)$", unicode(param.value), FLAGS) | |||||
match = re.search(r"^(\s*).*?(\s*)$", unicode(param.value), FLAGS) | |||||
before, after = match.group(1), match.group(2) | before, after = match.group(1), match.group(2) | ||||
before_theories[before] += 1 | before_theories[before] += 1 | ||||
after_theories[after] += 1 | after_theories[after] += 1 | ||||
@@ -100,6 +100,21 @@ class Template(Node): | |||||
after = self._select_theory(after_theories) | after = self._select_theory(after_theories) | ||||
return before, after | return before, after | ||||
def _remove_with_field(self, param, i, name): | |||||
if param.showkey: | |||||
following = self.params[i+1:] | |||||
better_matches = [after.name.strip() == name and not after.showkey for after in following] | |||||
if any(better_matches): | |||||
return False | |||||
return True | |||||
def _remove_without_field(self, param, i, force_no_field): | |||||
if not param.showkey and not force_no_field: | |||||
dependents = [not after.showkey for after in self.params[i+1:]] | |||||
if any(dependents): | |||||
return False | |||||
return True | |||||
@property | @property | ||||
def name(self): | def name(self): | ||||
return self._name | return self._name | ||||
@@ -119,7 +134,7 @@ class Template(Node): | |||||
def get(self, name): | def get(self, name): | ||||
name = name.strip() if isinstance(name, basestring) else unicode(name) | name = name.strip() if isinstance(name, basestring) else unicode(name) | ||||
for param in self.params: | |||||
for param in reversed(self.params): | |||||
if param.name.strip() == name: | if param.name.strip() == name: | ||||
return param | return param | ||||
raise ValueError(name) | raise ValueError(name) | ||||
@@ -131,10 +146,10 @@ class Template(Node): | |||||
if self.has_param(name): | if self.has_param(name): | ||||
self.remove(name, keep_field=True) | self.remove(name, keep_field=True) | ||||
existing = self.get(name) | existing = self.get(name) | ||||
if showkey is None: # Infer showkey from current value | |||||
showkey = existing.showkey | |||||
if not showkey: | |||||
self._surface_escape(value, "=") | |||||
if showkey is not None: | |||||
if not showkey: | |||||
self._surface_escape(value, "=") | |||||
existing.showkey = showkey | |||||
nodes = existing.value.nodes | nodes = existing.value.nodes | ||||
if force_nonconformity: | if force_nonconformity: | ||||
existing.value = value | existing.value = value | ||||
@@ -144,10 +159,20 @@ class Template(Node): | |||||
if showkey is None: | if showkey is None: | ||||
try: | try: | ||||
int(name) | |||||
showkey = True | |||||
int_name = int(unicode(name)) | |||||
except ValueError: | except ValueError: | ||||
showkey = False | |||||
showkey = True | |||||
else: | |||||
int_keys = set() | |||||
for param in self.params: | |||||
if not param.showkey: | |||||
if re.match(r"[1-9][0-9]*$", param.name.strip()): | |||||
int_keys.add(int(unicode(param.name))) | |||||
expected = min(set(range(1, len(int_keys) + 2)) - int_keys) | |||||
if expected == int_name: | |||||
showkey = False | |||||
else: | |||||
showkey = True | |||||
if not showkey: | if not showkey: | ||||
self._surface_escape(value, "=") | self._surface_escape(value, "=") | ||||
if not force_nonconformity: | if not force_nonconformity: | ||||
@@ -164,12 +189,21 @@ class Template(Node): | |||||
def remove(self, name, keep_field=False, force_no_field=False): | def remove(self, name, keep_field=False, force_no_field=False): | ||||
name = name.strip() if isinstance(name, basestring) else unicode(name) | name = name.strip() if isinstance(name, basestring) else unicode(name) | ||||
removed = False | |||||
for i, param in enumerate(self.params): | for i, param in enumerate(self.params): | ||||
if param.name.strip() == name: | if param.name.strip() == name: | ||||
if keep_field: | if keep_field: | ||||
return self._blank_param_value(param.value) | |||||
dependent = [not after.showkey for after in self.params[i+1:]] | |||||
if any(dependent) and not param.showkey and not force_no_field: | |||||
return self._blank_param_value(param.value) | |||||
return self.params.remove(param) | |||||
raise ValueError(name) | |||||
if self._remove_with_field(param, i, name): | |||||
self._blank_param_value(param.value) | |||||
keep_field = False | |||||
else: | |||||
self.params.remove(param) | |||||
else: | |||||
if self._remove_without_field(param, i, force_no_field): | |||||
self.params.remove(param) | |||||
else: | |||||
self._blank_param_value(param.value) | |||||
if not removed: | |||||
removed = True | |||||
if not removed: | |||||
raise ValueError(name) |
@@ -26,7 +26,7 @@ __all__ = ["Text"] | |||||
class Text(Node): | class Text(Node): | ||||
def __init__(self, value): | def __init__(self, value): | ||||
super(Text, self).__init__(self) | |||||
super(Text, self).__init__() | |||||
self._value = value | self._value = value | ||||
def __unicode__(self): | def __unicode__(self): | ||||
@@ -20,4 +20,22 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from .demo import DemoParser as Parser | |||||
try: | |||||
from ._builder import CBuilder as Builder | |||||
from ._tokenizer import CTokenizer as Tokenizer | |||||
except ImportError: | |||||
from .builder import Builder | |||||
from .tokenizer import Tokenizer | |||||
__all__ = ["Parser"] | |||||
class Parser(object): | |||||
def __init__(self, text): | |||||
self.text = text | |||||
self._tokenizer = Tokenizer() | |||||
self._builder = Builder() | |||||
def parse(self): | |||||
tokens = self._tokenizer.tokenize(self.text) | |||||
code = self._builder.build(tokens) | |||||
return code |
@@ -0,0 +1,177 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from . import tokens | |||||
from ..nodes import Heading, HTMLEntity, Tag, Template, Text | |||||
from ..nodes.extras import Attribute, Parameter | |||||
from ..smart_list import SmartList | |||||
from ..wikicode import Wikicode | |||||
__all__ = ["Builder"] | |||||
class Builder(object): | |||||
def __init__(self): | |||||
self._tokens = [] | |||||
self._stacks = [] | |||||
def _wrap(self, nodes): | |||||
return Wikicode(SmartList(nodes)) | |||||
def _push(self): | |||||
self._stacks.append([]) | |||||
def _pop(self, wrap=True): | |||||
if wrap: | |||||
return self._wrap(self._stacks.pop()) | |||||
return self._stacks.pop() | |||||
def _write(self, item): | |||||
self._stacks[-1].append(item) | |||||
def _handle_parameter(self, default): | |||||
key = None | |||||
showkey = False | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.TemplateParamEquals): | |||||
key = self._pop() | |||||
showkey = True | |||||
self._push() | |||||
elif isinstance(token, (tokens.TemplateParamSeparator, | |||||
tokens.TemplateClose)): | |||||
self._tokens.append(token) | |||||
value = self._pop() | |||||
if not key: | |||||
key = self._wrap([Text(unicode(default))]) | |||||
return Parameter(key, value, showkey) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_template(self): | |||||
params = [] | |||||
default = 1 | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.TemplateParamSeparator): | |||||
if not params: | |||||
name = self._pop() | |||||
param = self._handle_parameter(default) | |||||
params.append(param) | |||||
if not param.showkey: | |||||
default += 1 | |||||
elif isinstance(token, tokens.TemplateClose): | |||||
if not params: | |||||
name = self._pop() | |||||
return Template(name, params) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_entity(self): | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.HTMLEntityNumeric): | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.HTMLEntityHex): | |||||
text = self._tokens.pop() | |||||
self._tokens.pop() # Remove HTMLEntityEnd | |||||
return HTMLEntity(text.text, named=False, hexadecimal=True, | |||||
hex_char=token.char) | |||||
self._tokens.pop() # Remove HTMLEntityEnd | |||||
return HTMLEntity(token.text, named=False, hexadecimal=False) | |||||
self._tokens.pop() # Remove HTMLEntityEnd | |||||
return HTMLEntity(token.text, named=True, hexadecimal=False) | |||||
def _handle_heading(self, token): | |||||
level = token.level | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.HeadingEnd): | |||||
title = self._pop() | |||||
return Heading(title, level) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_attribute(self): | |||||
name, quoted = None, False | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.TagAttrEquals): | |||||
name = self._pop() | |||||
self._push() | |||||
elif isinstance(token, tokens.TagAttrQuote): | |||||
quoted = True | |||||
elif isinstance(token, (tokens.TagAttrStart, | |||||
tokens.TagCloseOpen)): | |||||
self._tokens.append(token) | |||||
if name is not None: | |||||
return Attribute(name, self._pop(), quoted) | |||||
return Attribute(self._pop(), quoted=quoted) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_tag(self, token): | |||||
type_, showtag = token.type, token.showtag | |||||
attrs = [] | |||||
self._push() | |||||
while self._tokens: | |||||
token = self._tokens.pop() | |||||
if isinstance(token, tokens.TagAttrStart): | |||||
attrs.append(self._handle_attribute()) | |||||
elif isinstance(token, tokens.TagCloseOpen): | |||||
open_pad = token.padding | |||||
tag = self._pop() | |||||
self._push() | |||||
elif isinstance(token, tokens.TagCloseSelfclose): | |||||
tag = self._pop() | |||||
return Tag(type_, tag, attrs=attrs, showtag=showtag, | |||||
self_closing=True, open_padding=token.padding) | |||||
elif isinstance(token, tokens.TagOpenClose): | |||||
contents = self._pop() | |||||
elif isinstance(token, tokens.TagCloseClose): | |||||
return Tag(type_, tag, contents, attrs, showtag, False, | |||||
open_pad, token.padding) | |||||
else: | |||||
self._write(self._handle_token(token)) | |||||
def _handle_token(self, token): | |||||
if isinstance(token, tokens.Text): | |||||
return Text(token.text) | |||||
elif isinstance(token, tokens.TemplateOpen): | |||||
return self._handle_template() | |||||
elif isinstance(token, tokens.HTMLEntityStart): | |||||
return self._handle_entity() | |||||
elif isinstance(token, tokens.HeadingStart): | |||||
return self._handle_heading(token) | |||||
elif isinstance(token, tokens.TagOpenOpen): | |||||
return self._handle_tag(token) | |||||
def build(self, tokenlist): | |||||
self._tokens = tokenlist | |||||
self._tokens.reverse() | |||||
self._push() | |||||
while self._tokens: | |||||
node = self._handle_token(self._tokens.pop()) | |||||
self._write(node) | |||||
return self._pop() |
@@ -0,0 +1,41 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
# Local (stack-specific) contexts: | |||||
TEMPLATE = 0b000000111 | |||||
TEMPLATE_NAME = 0b000000001 | |||||
TEMPLATE_PARAM_KEY = 0b000000010 | |||||
TEMPLATE_PARAM_VALUE = 0b000000100 | |||||
HEADING = 0b111111000 | |||||
HEADING_LEVEL_1 = 0b000001000 | |||||
HEADING_LEVEL_2 = 0b000010000 | |||||
HEADING_LEVEL_3 = 0b000100000 | |||||
HEADING_LEVEL_4 = 0b001000000 | |||||
HEADING_LEVEL_5 = 0b010000000 | |||||
HEADING_LEVEL_6 = 0b100000000 | |||||
# Global contexts: | |||||
GL_HEADING = 0b1 |
@@ -1,53 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from ..nodes import Template, Text | |||||
from ..nodes.extras import Parameter | |||||
from ..smart_list import SmartList | |||||
from ..wikicode import Wikicode | |||||
__all__ = ["DemoParser"] | |||||
class DemoParser(object): | |||||
def __init__(self, text): | |||||
self.text = text | |||||
def _tokenize(self): | |||||
return [] | |||||
def parse(self): | |||||
# Ensure text is unicode! | |||||
text = u"This is a {{test}} message with a {{template|with|foo={{params}}}}." | |||||
node1 = Text(u"This is a ") | |||||
node2 = Template(Wikicode([Text(u"test")])) | |||||
node3 = Text(u" message with a ") | |||||
node4_param1_name = Wikicode([Text(u"1")]) | |||||
node4_param1_value = Wikicode([Text(u"with")]) | |||||
node4_param1 = Parameter(node4_param1_name, node4_param1_value, showkey=False) | |||||
node4_param2_name = Wikicode([Text(u"foo")]) | |||||
node4_param2_value = Wikicode([Template(Wikicode([Text(u"params")]))]) | |||||
node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True) | |||||
node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2]) | |||||
node5 = Text(u".") | |||||
parsed = Wikicode(SmartList([node1, node2, node3, node4, node5])) | |||||
return parsed |
@@ -0,0 +1,285 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
import htmlentitydefs | |||||
from math import log | |||||
import re | |||||
import string | |||||
from . import contexts | |||||
from . import tokens | |||||
__all__ = ["Tokenizer"] | |||||
class BadRoute(Exception): | |||||
pass | |||||
class Tokenizer(object): | |||||
START = object() | |||||
END = object() | |||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | |||||
"/", "-", "\n", END] | |||||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-\n])", flags=re.IGNORECASE) | |||||
def __init__(self): | |||||
self._text = None | |||||
self._head = 0 | |||||
self._stacks = [] | |||||
self._global = 0 | |||||
@property | |||||
def _stack(self): | |||||
return self._stacks[-1][0] | |||||
@property | |||||
def _context(self): | |||||
return self._stacks[-1][1] | |||||
@_context.setter | |||||
def _context(self, value): | |||||
self._stacks[-1][1] = value | |||||
@property | |||||
def _textbuffer(self): | |||||
return self._stacks[-1][2] | |||||
@_textbuffer.setter | |||||
def _textbuffer(self, value): | |||||
self._stacks[-1][2] = value | |||||
def _push(self, context=0): | |||||
self._stacks.append([[], context, []]) | |||||
def _push_textbuffer(self): | |||||
if self._textbuffer: | |||||
self._stack.append(tokens.Text(text="".join(self._textbuffer))) | |||||
self._textbuffer = [] | |||||
def _pop(self): | |||||
self._push_textbuffer() | |||||
return self._stacks.pop()[0] | |||||
def _fail_route(self): | |||||
self._pop() | |||||
raise BadRoute() | |||||
def _write(self, token): | |||||
self._push_textbuffer() | |||||
self._stack.append(token) | |||||
def _write_text(self, text): | |||||
self._textbuffer.append(text) | |||||
def _write_all(self, tokenlist): | |||||
if tokenlist and isinstance(tokenlist[0], tokens.Text): | |||||
self._write_text(tokenlist.pop(0).text) | |||||
self._push_textbuffer() | |||||
self._stack.extend(tokenlist) | |||||
def _read(self, delta=0, wrap=False, strict=False): | |||||
index = self._head + delta | |||||
if index < 0 and (not wrap or abs(index) > len(self._text)): | |||||
return self.START | |||||
try: | |||||
return self._text[index] | |||||
except IndexError: | |||||
if strict: | |||||
self._fail_route() | |||||
return self.END | |||||
def _parse_template(self): | |||||
reset = self._head | |||||
self._head += 2 | |||||
try: | |||||
template = self._parse(contexts.TEMPLATE_NAME) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._write_text(self._read()) | |||||
else: | |||||
self._write(tokens.TemplateOpen()) | |||||
self._write_all(template) | |||||
self._write(tokens.TemplateClose()) | |||||
def _verify_template_name(self): | |||||
self._push_textbuffer() | |||||
if self._stack: | |||||
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] | |||||
text = "".join([token.text for token in text]) | |||||
if text.strip() and "\n" in text.strip(): | |||||
self._fail_route() | |||||
def _handle_template_param(self): | |||||
if self._context & contexts.TEMPLATE_NAME: | |||||
self._verify_template_name() | |||||
self._context ^= contexts.TEMPLATE_NAME | |||||
if self._context & contexts.TEMPLATE_PARAM_VALUE: | |||||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||||
self._context |= contexts.TEMPLATE_PARAM_KEY | |||||
self._write(tokens.TemplateParamSeparator()) | |||||
def _handle_template_param_value(self): | |||||
self._context ^= contexts.TEMPLATE_PARAM_KEY | |||||
self._context |= contexts.TEMPLATE_PARAM_VALUE | |||||
self._write(tokens.TemplateParamEquals()) | |||||
def _handle_template_end(self): | |||||
if self._context & contexts.TEMPLATE_NAME: | |||||
self._verify_template_name() | |||||
self._head += 1 | |||||
return self._pop() | |||||
def _parse_heading(self): | |||||
self._global |= contexts.GL_HEADING | |||||
reset = self._head | |||||
self._head += 1 | |||||
best = 1 | |||||
while self._read() == "=": | |||||
best += 1 | |||||
self._head += 1 | |||||
context = contexts.HEADING_LEVEL_1 << min(best - 1, 5) | |||||
try: | |||||
title, level = self._parse(context) | |||||
except BadRoute: | |||||
self._head = reset + best - 1 | |||||
self._write_text("=" * best) | |||||
else: | |||||
self._write(tokens.HeadingStart(level=level)) | |||||
if level < best: | |||||
self._write_text("=" * (best - level)) | |||||
self._write_all(title) | |||||
self._write(tokens.HeadingEnd()) | |||||
finally: | |||||
self._global ^= contexts.GL_HEADING | |||||
def _handle_heading_end(self): | |||||
reset = self._head | |||||
self._head += 1 | |||||
best = 1 | |||||
while self._read() == "=": | |||||
best += 1 | |||||
self._head += 1 | |||||
current = int(log(self._context / contexts.HEADING_LEVEL_1, 2)) + 1 | |||||
level = min(current, min(best, 6)) | |||||
try: | |||||
after, after_level = self._parse(self._context) | |||||
except BadRoute: | |||||
if level < best: | |||||
self._write_text("=" * (best - level)) | |||||
self._head = reset + best - 1 | |||||
return self._pop(), level | |||||
else: | |||||
self._write_text("=" * best) | |||||
self._write_all(after) | |||||
return self._pop(), after_level | |||||
def _really_parse_entity(self): | |||||
self._write(tokens.HTMLEntityStart()) | |||||
self._head += 1 | |||||
this = self._read(strict=True) | |||||
if this == "#": | |||||
numeric = True | |||||
self._write(tokens.HTMLEntityNumeric()) | |||||
self._head += 1 | |||||
this = self._read(strict=True) | |||||
if this[0].lower() == "x": | |||||
hexadecimal = True | |||||
self._write(tokens.HTMLEntityHex(char=this[0])) | |||||
this = this[1:] | |||||
if not this: | |||||
self._fail_route() | |||||
else: | |||||
hexadecimal = False | |||||
else: | |||||
numeric = hexadecimal = False | |||||
valid = string.hexdigits if hexadecimal else string.digits | |||||
if not numeric and not hexadecimal: | |||||
valid += string.ascii_letters | |||||
if not all([char in valid for char in this]): | |||||
self._fail_route() | |||||
self._head += 1 | |||||
if self._read() != ";": | |||||
self._fail_route() | |||||
if numeric: | |||||
test = int(this, 16) if hexadecimal else int(this) | |||||
if test < 1 or test > 0x10FFFF: | |||||
self._fail_route() | |||||
else: | |||||
if this not in htmlentitydefs.entitydefs: | |||||
self._fail_route() | |||||
self._write(tokens.Text(text=this)) | |||||
self._write(tokens.HTMLEntityEnd()) | |||||
def _parse_entity(self): | |||||
reset = self._head | |||||
self._push() | |||||
try: | |||||
self._really_parse_entity() | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._write_text(self._read()) | |||||
else: | |||||
self._write_all(self._pop()) | |||||
def _parse(self, context=0): | |||||
self._push(context) | |||||
while True: | |||||
this = self._read() | |||||
if this not in self.MARKERS: | |||||
self._write_text(this) | |||||
self._head += 1 | |||||
continue | |||||
if this is self.END: | |||||
if self._context & (contexts.TEMPLATE | contexts.HEADING): | |||||
self._fail_route() | |||||
return self._pop() | |||||
prev, next = self._read(-1), self._read(1) | |||||
if this == next == "{": | |||||
self._parse_template() | |||||
elif this == "|" and self._context & contexts.TEMPLATE: | |||||
self._handle_template_param() | |||||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
self._handle_template_param_value() | |||||
elif this == next == "}" and self._context & contexts.TEMPLATE: | |||||
return self._handle_template_end() | |||||
elif (prev == "\n" or prev == self.START) and this == "=" and not self._global & contexts.GL_HEADING: | |||||
self._parse_heading() | |||||
elif this == "=" and self._context & contexts.HEADING: | |||||
return self._handle_heading_end() | |||||
elif this == "\n" and self._context & contexts.HEADING: | |||||
self._fail_route() | |||||
elif this == "&": | |||||
self._parse_entity() | |||||
else: | |||||
self._write_text(this) | |||||
self._head += 1 | |||||
def tokenize(self, text): | |||||
split = self.regex.split(text) | |||||
self._text = [segment for segment in split if segment] | |||||
return self._parse() |
@@ -0,0 +1,81 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
__all__ = ["Token"] | |||||
class Token(object): | |||||
def __init__(self, **kwargs): | |||||
super(Token, self).__setattr__("_kwargs", kwargs) | |||||
def __repr__(self): | |||||
args = [] | |||||
for key, value in self._kwargs.iteritems(): | |||||
if isinstance(value, basestring) and len(value) > 100: | |||||
args.append(key + "=" + repr(value[:97] + "...")) | |||||
else: | |||||
args.append(key + "=" + repr(value)) | |||||
return u"{0}({1})".format(type(self).__name__, u", ".join(args)) | |||||
def __eq__(self, other): | |||||
if isinstance(other, type(self)): | |||||
return self._kwargs == other._kwargs | |||||
return False | |||||
def __getattr__(self, key): | |||||
return self._kwargs[key] | |||||
def __setattr__(self, key, value): | |||||
self._kwargs[key] = value | |||||
def __delattr__(self, key): | |||||
del self._kwargs[key] | |||||
def make(name): | |||||
__all__.append(name) | |||||
return type(name, (Token,), {}) | |||||
Text = make("Text") | |||||
TemplateOpen = make("TemplateOpen") # {{ | |||||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||||
TemplateParamEquals = make("TemplateParamEquals") # = | |||||
TemplateClose = make("TemplateClose") # }} | |||||
HTMLEntityStart = make("HTMLEntityStart") # & | |||||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||||
HTMLEntityHex = make("HTMLEntityHex") # x | |||||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||||
HeadingStart = make("HeadingStart") # =... | |||||
HeadingEnd = make("HeadingEnd") # =... | |||||
TagOpenOpen = make("TagOpenOpen") # < | |||||
TagAttrStart = make("TagAttrStart") | |||||
TagAttrEquals = make("TagAttrEquals") # = | |||||
TagAttrQuote = make("TagAttrQuote") # " | |||||
TagCloseOpen = make("TagCloseOpen") # > | |||||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||||
TagOpenClose = make("TagOpenClose") # </ | |||||
TagCloseClose = make("TagCloseClose") # > | |||||
del make |
@@ -81,6 +81,7 @@ class SmartList(list): | |||||
def __iadd__(self, other): | def __iadd__(self, other): | ||||
self.extend(other) | self.extend(other) | ||||
return self | |||||
def append(self, item): | def append(self, item): | ||||
head = len(self) | head = len(self) | ||||
@@ -221,6 +222,7 @@ class _ListProxy(list): | |||||
def __iadd__(self, other): | def __iadd__(self, other): | ||||
self.extend(other) | self.extend(other) | ||||
return self | |||||
@property | @property | ||||
def _start(self): | def _start(self): | ||||
@@ -22,24 +22,25 @@ | |||||
import mwparserfromhell | import mwparserfromhell | ||||
from .nodes import Node | from .nodes import Node | ||||
from .smart_list import SmartList | |||||
def parse_anything(value): | def parse_anything(value): | ||||
wikicode = mwparserfromhell.wikicode.Wikicode | wikicode = mwparserfromhell.wikicode.Wikicode | ||||
if isinstance(value, wikicode): | if isinstance(value, wikicode): | ||||
return value | return value | ||||
if isinstance(value, Node): | if isinstance(value, Node): | ||||
return wikicode([value]) | |||||
return wikicode(SmartList([value])) | |||||
if isinstance(value, basestring): | if isinstance(value, basestring): | ||||
return mwparserfromhell.parse(value) | return mwparserfromhell.parse(value) | ||||
if isinstance(value, int): | if isinstance(value, int): | ||||
return mwparserfromhell.parse(unicode(value)) | return mwparserfromhell.parse(unicode(value)) | ||||
if value is None: | if value is None: | ||||
return wikicode([]) | |||||
return wikicode(SmartList()) | |||||
try: | try: | ||||
nodelist = [] | |||||
nodelist = SmartList() | |||||
for item in value: | for item in value: | ||||
nodelist += parse_anything(item).nodes | nodelist += parse_anything(item).nodes | ||||
except TypeError: | except TypeError: | ||||
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | ||||
raise ValueError(error.format(type(value), value)) | |||||
raise ValueError(error.format(type(value).__name__, value)) | |||||
return wikicode(nodelist) | return wikicode(nodelist) |
@@ -105,6 +105,10 @@ class Wikicode(StringMixIn): | |||||
def nodes(self): | def nodes(self): | ||||
return self._nodes | return self._nodes | ||||
@nodes.setter | |||||
def nodes(self, value): | |||||
self._nodes = value | |||||
def get(self, index): | def get(self, index): | ||||
return self.nodes[index] | return self.nodes[index] | ||||