@@ -22,7 +22,7 @@ | |||
from ..string_mixin import StringMixIn | |||
__all__ = ["Node"] | |||
__all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"] | |||
class Node(StringMixIn): | |||
def __unicode__(self): | |||
@@ -26,7 +26,7 @@ __all__ = ["Attribute"] | |||
class Attribute(StringMixIn): | |||
def __init__(self, name, value=None, quoted=True): | |||
super(Attribute, self).__init__(self) | |||
super(Attribute, self).__init__() | |||
self._name = name | |||
self._value = value | |||
self._quoted = quoted | |||
@@ -27,7 +27,7 @@ __all__ = ["Parameter"] | |||
class Parameter(StringMixIn): | |||
def __init__(self, name, value, showkey=True): | |||
super(Parameter, self).__init__(self) | |||
super(Parameter, self).__init__() | |||
self._name = name | |||
self._value = value | |||
self._showkey = showkey | |||
@@ -26,12 +26,12 @@ __all__ = ["Heading"] | |||
class Heading(Node): | |||
def __init__(self, title, level): | |||
super(Heading, self).__init__(self) | |||
super(Heading, self).__init__() | |||
self._title = title | |||
self._level = level | |||
def __unicode__(self): | |||
return ("=" * self.level) + self.title + ("=" * self.level) | |||
return ("=" * self.level) + unicode(self.title) + ("=" * self.level) | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
@@ -26,9 +26,9 @@ from . import Node | |||
__all__ = ["HTMLEntity"] | |||
class HTMLEntity(Node): | |||
def __init__(self, value, named=None, hexadecimal=False): | |||
super(HTMLEntity, self).__init__(self) | |||
<<<<<<< HEAD | |||
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): | |||
super(HTMLEntity, self).__init__() | |||
self._value = value | |||
if named is None: # Try to guess whether or not the entity is named | |||
try: | |||
@@ -46,12 +46,13 @@ class HTMLEntity(Node): | |||
else: | |||
self._named = named | |||
self._hexadecimal = hexadecimal | |||
self._hex_char = hex_char | |||
def __unicode__(self): | |||
if self.named: | |||
return u"&{0};".format(self.value) | |||
if self.hexadecimal: | |||
return u"&#x{0};".format(self.value) | |||
return u"&#{0}{1};".format(self.hex_char, self.value) | |||
return u"&#{0};".format(self.value) | |||
def __strip__(self, normalize, collapse): | |||
@@ -94,6 +95,10 @@ class HTMLEntity(Node): | |||
def hexadecimal(self): | |||
return self._hexadecimal | |||
@property | |||
def hex_char(self): | |||
return self._hex_char | |||
def normalize(self): | |||
if self.named: | |||
return unichr(htmlentitydefs.name2codepoint[self.value]) | |||
@@ -67,9 +67,9 @@ class Tag(Node): | |||
TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE)) | |||
TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE | |||
def __init__(self, type_, tag, contents, attrs=None, showtag=True, | |||
def __init__(self, type_, tag, contents=None, attrs=None, showtag=True, | |||
self_closing=False, open_padding=0, close_padding=0): | |||
super(Tag, self).__init__(self) | |||
super(Tag, self).__init__() | |||
self._type = type_ | |||
self._tag = tag | |||
self._contents = contents | |||
@@ -33,7 +33,7 @@ FLAGS = re.DOTALL | re.UNICODE | |||
class Template(Node): | |||
def __init__(self, name, params=None): | |||
super(Template, self).__init__(self) | |||
super(Template, self).__init__() | |||
self._name = name | |||
if params: | |||
self._params = params | |||
@@ -77,7 +77,7 @@ class Template(Node): | |||
code.replace(node, node.replace(char, replacement)) | |||
def _blank_param_value(self, value): | |||
match = re.search("^(\s*).*?(\s*)$", unicode(value), FLAGS) | |||
match = re.search(r"^(\s*).*?(\s*)$", unicode(value), FLAGS) | |||
value.nodes = [Text(match.group(1)), Text(match.group(2))] | |||
def _select_theory(self, theories): | |||
@@ -91,7 +91,7 @@ class Template(Node): | |||
before_theories = defaultdict(lambda: 0) | |||
after_theories = defaultdict(lambda: 0) | |||
for param in self.params: | |||
match = re.search("^(\s*).*?(\s*)$", unicode(param.value), FLAGS) | |||
match = re.search(r"^(\s*).*?(\s*)$", unicode(param.value), FLAGS) | |||
before, after = match.group(1), match.group(2) | |||
before_theories[before] += 1 | |||
after_theories[after] += 1 | |||
@@ -100,6 +100,21 @@ class Template(Node): | |||
after = self._select_theory(after_theories) | |||
return before, after | |||
def _remove_with_field(self, param, i, name): | |||
if param.showkey: | |||
following = self.params[i+1:] | |||
better_matches = [after.name.strip() == name and not after.showkey for after in following] | |||
if any(better_matches): | |||
return False | |||
return True | |||
def _remove_without_field(self, param, i, force_no_field): | |||
if not param.showkey and not force_no_field: | |||
dependents = [not after.showkey for after in self.params[i+1:]] | |||
if any(dependents): | |||
return False | |||
return True | |||
@property | |||
def name(self): | |||
return self._name | |||
@@ -119,7 +134,7 @@ class Template(Node): | |||
def get(self, name): | |||
name = name.strip() if isinstance(name, basestring) else unicode(name) | |||
for param in self.params: | |||
for param in reversed(self.params): | |||
if param.name.strip() == name: | |||
return param | |||
raise ValueError(name) | |||
@@ -131,10 +146,10 @@ class Template(Node): | |||
if self.has_param(name): | |||
self.remove(name, keep_field=True) | |||
existing = self.get(name) | |||
if showkey is None: # Infer showkey from current value | |||
showkey = existing.showkey | |||
if not showkey: | |||
self._surface_escape(value, "=") | |||
if showkey is not None: | |||
if not showkey: | |||
self._surface_escape(value, "=") | |||
existing.showkey = showkey | |||
nodes = existing.value.nodes | |||
if force_nonconformity: | |||
existing.value = value | |||
@@ -144,10 +159,20 @@ class Template(Node): | |||
if showkey is None: | |||
try: | |||
int(name) | |||
showkey = True | |||
int_name = int(unicode(name)) | |||
except ValueError: | |||
showkey = False | |||
showkey = True | |||
else: | |||
int_keys = set() | |||
for param in self.params: | |||
if not param.showkey: | |||
if re.match(r"[1-9][0-9]*$", param.name.strip()): | |||
int_keys.add(int(unicode(param.name))) | |||
expected = min(set(range(1, len(int_keys) + 2)) - int_keys) | |||
if expected == int_name: | |||
showkey = False | |||
else: | |||
showkey = True | |||
if not showkey: | |||
self._surface_escape(value, "=") | |||
if not force_nonconformity: | |||
@@ -164,12 +189,21 @@ class Template(Node): | |||
def remove(self, name, keep_field=False, force_no_field=False): | |||
name = name.strip() if isinstance(name, basestring) else unicode(name) | |||
removed = False | |||
for i, param in enumerate(self.params): | |||
if param.name.strip() == name: | |||
if keep_field: | |||
return self._blank_param_value(param.value) | |||
dependent = [not after.showkey for after in self.params[i+1:]] | |||
if any(dependent) and not param.showkey and not force_no_field: | |||
return self._blank_param_value(param.value) | |||
return self.params.remove(param) | |||
raise ValueError(name) | |||
if self._remove_with_field(param, i, name): | |||
self._blank_param_value(param.value) | |||
keep_field = False | |||
else: | |||
self.params.remove(param) | |||
else: | |||
if self._remove_without_field(param, i, force_no_field): | |||
self.params.remove(param) | |||
else: | |||
self._blank_param_value(param.value) | |||
if not removed: | |||
removed = True | |||
if not removed: | |||
raise ValueError(name) |
@@ -26,7 +26,7 @@ __all__ = ["Text"] | |||
class Text(Node): | |||
def __init__(self, value): | |||
super(Text, self).__init__(self) | |||
super(Text, self).__init__() | |||
self._value = value | |||
def __unicode__(self): | |||
@@ -20,4 +20,22 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from .demo import DemoParser as Parser | |||
try: | |||
from ._builder import CBuilder as Builder | |||
from ._tokenizer import CTokenizer as Tokenizer | |||
except ImportError: | |||
from .builder import Builder | |||
from .tokenizer import Tokenizer | |||
__all__ = ["Parser"] | |||
class Parser(object): | |||
def __init__(self, text): | |||
self.text = text | |||
self._tokenizer = Tokenizer() | |||
self._builder = Builder() | |||
def parse(self): | |||
tokens = self._tokenizer.tokenize(self.text) | |||
code = self._builder.build(tokens) | |||
return code |
@@ -0,0 +1,177 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from . import tokens | |||
from ..nodes import Heading, HTMLEntity, Tag, Template, Text | |||
from ..nodes.extras import Attribute, Parameter | |||
from ..smart_list import SmartList | |||
from ..wikicode import Wikicode | |||
__all__ = ["Builder"] | |||
class Builder(object): | |||
def __init__(self): | |||
self._tokens = [] | |||
self._stacks = [] | |||
def _wrap(self, nodes): | |||
return Wikicode(SmartList(nodes)) | |||
def _push(self): | |||
self._stacks.append([]) | |||
def _pop(self, wrap=True): | |||
if wrap: | |||
return self._wrap(self._stacks.pop()) | |||
return self._stacks.pop() | |||
def _write(self, item): | |||
self._stacks[-1].append(item) | |||
def _handle_parameter(self, default): | |||
key = None | |||
showkey = False | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.TemplateParamEquals): | |||
key = self._pop() | |||
showkey = True | |||
self._push() | |||
elif isinstance(token, (tokens.TemplateParamSeparator, | |||
tokens.TemplateClose)): | |||
self._tokens.append(token) | |||
value = self._pop() | |||
if not key: | |||
key = self._wrap([Text(unicode(default))]) | |||
return Parameter(key, value, showkey) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_template(self): | |||
params = [] | |||
default = 1 | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.TemplateParamSeparator): | |||
if not params: | |||
name = self._pop() | |||
param = self._handle_parameter(default) | |||
params.append(param) | |||
if not param.showkey: | |||
default += 1 | |||
elif isinstance(token, tokens.TemplateClose): | |||
if not params: | |||
name = self._pop() | |||
return Template(name, params) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_entity(self): | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.HTMLEntityNumeric): | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.HTMLEntityHex): | |||
text = self._tokens.pop() | |||
self._tokens.pop() # Remove HTMLEntityEnd | |||
return HTMLEntity(text.text, named=False, hexadecimal=True, | |||
hex_char=token.char) | |||
self._tokens.pop() # Remove HTMLEntityEnd | |||
return HTMLEntity(token.text, named=False, hexadecimal=False) | |||
self._tokens.pop() # Remove HTMLEntityEnd | |||
return HTMLEntity(token.text, named=True, hexadecimal=False) | |||
def _handle_heading(self, token): | |||
level = token.level | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.HeadingEnd): | |||
title = self._pop() | |||
return Heading(title, level) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_attribute(self): | |||
name, quoted = None, False | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.TagAttrEquals): | |||
name = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.TagAttrQuote): | |||
quoted = True | |||
elif isinstance(token, (tokens.TagAttrStart, | |||
tokens.TagCloseOpen)): | |||
self._tokens.append(token) | |||
if name is not None: | |||
return Attribute(name, self._pop(), quoted) | |||
return Attribute(self._pop(), quoted=quoted) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_tag(self, token): | |||
type_, showtag = token.type, token.showtag | |||
attrs = [] | |||
self._push() | |||
while self._tokens: | |||
token = self._tokens.pop() | |||
if isinstance(token, tokens.TagAttrStart): | |||
attrs.append(self._handle_attribute()) | |||
elif isinstance(token, tokens.TagCloseOpen): | |||
open_pad = token.padding | |||
tag = self._pop() | |||
self._push() | |||
elif isinstance(token, tokens.TagCloseSelfclose): | |||
tag = self._pop() | |||
return Tag(type_, tag, attrs=attrs, showtag=showtag, | |||
self_closing=True, open_padding=token.padding) | |||
elif isinstance(token, tokens.TagOpenClose): | |||
contents = self._pop() | |||
elif isinstance(token, tokens.TagCloseClose): | |||
return Tag(type_, tag, contents, attrs, showtag, False, | |||
open_pad, token.padding) | |||
else: | |||
self._write(self._handle_token(token)) | |||
def _handle_token(self, token): | |||
if isinstance(token, tokens.Text): | |||
return Text(token.text) | |||
elif isinstance(token, tokens.TemplateOpen): | |||
return self._handle_template() | |||
elif isinstance(token, tokens.HTMLEntityStart): | |||
return self._handle_entity() | |||
elif isinstance(token, tokens.HeadingStart): | |||
return self._handle_heading(token) | |||
elif isinstance(token, tokens.TagOpenOpen): | |||
return self._handle_tag(token) | |||
def build(self, tokenlist): | |||
self._tokens = tokenlist | |||
self._tokens.reverse() | |||
self._push() | |||
while self._tokens: | |||
node = self._handle_token(self._tokens.pop()) | |||
self._write(node) | |||
return self._pop() |
@@ -0,0 +1,41 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
# Local (stack-specific) contexts: | |||
TEMPLATE = 0b000000111 | |||
TEMPLATE_NAME = 0b000000001 | |||
TEMPLATE_PARAM_KEY = 0b000000010 | |||
TEMPLATE_PARAM_VALUE = 0b000000100 | |||
HEADING = 0b111111000 | |||
HEADING_LEVEL_1 = 0b000001000 | |||
HEADING_LEVEL_2 = 0b000010000 | |||
HEADING_LEVEL_3 = 0b000100000 | |||
HEADING_LEVEL_4 = 0b001000000 | |||
HEADING_LEVEL_5 = 0b010000000 | |||
HEADING_LEVEL_6 = 0b100000000 | |||
# Global contexts: | |||
GL_HEADING = 0b1 |
@@ -1,53 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from ..nodes import Template, Text | |||
from ..nodes.extras import Parameter | |||
from ..smart_list import SmartList | |||
from ..wikicode import Wikicode | |||
__all__ = ["DemoParser"] | |||
class DemoParser(object): | |||
def __init__(self, text): | |||
self.text = text | |||
def _tokenize(self): | |||
return [] | |||
def parse(self): | |||
# Ensure text is unicode! | |||
text = u"This is a {{test}} message with a {{template|with|foo={{params}}}}." | |||
node1 = Text(u"This is a ") | |||
node2 = Template(Wikicode([Text(u"test")])) | |||
node3 = Text(u" message with a ") | |||
node4_param1_name = Wikicode([Text(u"1")]) | |||
node4_param1_value = Wikicode([Text(u"with")]) | |||
node4_param1 = Parameter(node4_param1_name, node4_param1_value, showkey=False) | |||
node4_param2_name = Wikicode([Text(u"foo")]) | |||
node4_param2_value = Wikicode([Template(Wikicode([Text(u"params")]))]) | |||
node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True) | |||
node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2]) | |||
node5 = Text(u".") | |||
parsed = Wikicode(SmartList([node1, node2, node3, node4, node5])) | |||
return parsed |
@@ -0,0 +1,285 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import htmlentitydefs | |||
from math import log | |||
import re | |||
import string | |||
from . import contexts | |||
from . import tokens | |||
__all__ = ["Tokenizer"] | |||
class BadRoute(Exception): | |||
pass | |||
class Tokenizer(object): | |||
START = object() | |||
END = object() | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | |||
"/", "-", "\n", END] | |||
regex = re.compile(r"([{}\[\]<>|=&#*;:/\-\n])", flags=re.IGNORECASE) | |||
def __init__(self): | |||
self._text = None | |||
self._head = 0 | |||
self._stacks = [] | |||
self._global = 0 | |||
@property | |||
def _stack(self): | |||
return self._stacks[-1][0] | |||
@property | |||
def _context(self): | |||
return self._stacks[-1][1] | |||
@_context.setter | |||
def _context(self, value): | |||
self._stacks[-1][1] = value | |||
@property | |||
def _textbuffer(self): | |||
return self._stacks[-1][2] | |||
@_textbuffer.setter | |||
def _textbuffer(self, value): | |||
self._stacks[-1][2] = value | |||
def _push(self, context=0): | |||
self._stacks.append([[], context, []]) | |||
def _push_textbuffer(self): | |||
if self._textbuffer: | |||
self._stack.append(tokens.Text(text="".join(self._textbuffer))) | |||
self._textbuffer = [] | |||
def _pop(self): | |||
self._push_textbuffer() | |||
return self._stacks.pop()[0] | |||
def _fail_route(self): | |||
self._pop() | |||
raise BadRoute() | |||
def _write(self, token): | |||
self._push_textbuffer() | |||
self._stack.append(token) | |||
def _write_text(self, text): | |||
self._textbuffer.append(text) | |||
def _write_all(self, tokenlist): | |||
if tokenlist and isinstance(tokenlist[0], tokens.Text): | |||
self._write_text(tokenlist.pop(0).text) | |||
self._push_textbuffer() | |||
self._stack.extend(tokenlist) | |||
def _read(self, delta=0, wrap=False, strict=False): | |||
index = self._head + delta | |||
if index < 0 and (not wrap or abs(index) > len(self._text)): | |||
return self.START | |||
try: | |||
return self._text[index] | |||
except IndexError: | |||
if strict: | |||
self._fail_route() | |||
return self.END | |||
def _parse_template(self): | |||
reset = self._head | |||
self._head += 2 | |||
try: | |||
template = self._parse(contexts.TEMPLATE_NAME) | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text(self._read()) | |||
else: | |||
self._write(tokens.TemplateOpen()) | |||
self._write_all(template) | |||
self._write(tokens.TemplateClose()) | |||
def _verify_template_name(self): | |||
self._push_textbuffer() | |||
if self._stack: | |||
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] | |||
text = "".join([token.text for token in text]) | |||
if text.strip() and "\n" in text.strip(): | |||
self._fail_route() | |||
def _handle_template_param(self): | |||
if self._context & contexts.TEMPLATE_NAME: | |||
self._verify_template_name() | |||
self._context ^= contexts.TEMPLATE_NAME | |||
if self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||
self._context |= contexts.TEMPLATE_PARAM_KEY | |||
self._write(tokens.TemplateParamSeparator()) | |||
def _handle_template_param_value(self): | |||
self._context ^= contexts.TEMPLATE_PARAM_KEY | |||
self._context |= contexts.TEMPLATE_PARAM_VALUE | |||
self._write(tokens.TemplateParamEquals()) | |||
def _handle_template_end(self): | |||
if self._context & contexts.TEMPLATE_NAME: | |||
self._verify_template_name() | |||
self._head += 1 | |||
return self._pop() | |||
def _parse_heading(self): | |||
self._global |= contexts.GL_HEADING | |||
reset = self._head | |||
self._head += 1 | |||
best = 1 | |||
while self._read() == "=": | |||
best += 1 | |||
self._head += 1 | |||
context = contexts.HEADING_LEVEL_1 << min(best - 1, 5) | |||
try: | |||
title, level = self._parse(context) | |||
except BadRoute: | |||
self._head = reset + best - 1 | |||
self._write_text("=" * best) | |||
else: | |||
self._write(tokens.HeadingStart(level=level)) | |||
if level < best: | |||
self._write_text("=" * (best - level)) | |||
self._write_all(title) | |||
self._write(tokens.HeadingEnd()) | |||
finally: | |||
self._global ^= contexts.GL_HEADING | |||
def _handle_heading_end(self): | |||
reset = self._head | |||
self._head += 1 | |||
best = 1 | |||
while self._read() == "=": | |||
best += 1 | |||
self._head += 1 | |||
current = int(log(self._context / contexts.HEADING_LEVEL_1, 2)) + 1 | |||
level = min(current, min(best, 6)) | |||
try: | |||
after, after_level = self._parse(self._context) | |||
except BadRoute: | |||
if level < best: | |||
self._write_text("=" * (best - level)) | |||
self._head = reset + best - 1 | |||
return self._pop(), level | |||
else: | |||
self._write_text("=" * best) | |||
self._write_all(after) | |||
return self._pop(), after_level | |||
def _really_parse_entity(self): | |||
self._write(tokens.HTMLEntityStart()) | |||
self._head += 1 | |||
this = self._read(strict=True) | |||
if this == "#": | |||
numeric = True | |||
self._write(tokens.HTMLEntityNumeric()) | |||
self._head += 1 | |||
this = self._read(strict=True) | |||
if this[0].lower() == "x": | |||
hexadecimal = True | |||
self._write(tokens.HTMLEntityHex(char=this[0])) | |||
this = this[1:] | |||
if not this: | |||
self._fail_route() | |||
else: | |||
hexadecimal = False | |||
else: | |||
numeric = hexadecimal = False | |||
valid = string.hexdigits if hexadecimal else string.digits | |||
if not numeric and not hexadecimal: | |||
valid += string.ascii_letters | |||
if not all([char in valid for char in this]): | |||
self._fail_route() | |||
self._head += 1 | |||
if self._read() != ";": | |||
self._fail_route() | |||
if numeric: | |||
test = int(this, 16) if hexadecimal else int(this) | |||
if test < 1 or test > 0x10FFFF: | |||
self._fail_route() | |||
else: | |||
if this not in htmlentitydefs.entitydefs: | |||
self._fail_route() | |||
self._write(tokens.Text(text=this)) | |||
self._write(tokens.HTMLEntityEnd()) | |||
def _parse_entity(self): | |||
reset = self._head | |||
self._push() | |||
try: | |||
self._really_parse_entity() | |||
except BadRoute: | |||
self._head = reset | |||
self._write_text(self._read()) | |||
else: | |||
self._write_all(self._pop()) | |||
def _parse(self, context=0): | |||
self._push(context) | |||
while True: | |||
this = self._read() | |||
if this not in self.MARKERS: | |||
self._write_text(this) | |||
self._head += 1 | |||
continue | |||
if this is self.END: | |||
if self._context & (contexts.TEMPLATE | contexts.HEADING): | |||
self._fail_route() | |||
return self._pop() | |||
prev, next = self._read(-1), self._read(1) | |||
if this == next == "{": | |||
self._parse_template() | |||
elif this == "|" and self._context & contexts.TEMPLATE: | |||
self._handle_template_param() | |||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._handle_template_param_value() | |||
elif this == next == "}" and self._context & contexts.TEMPLATE: | |||
return self._handle_template_end() | |||
elif (prev == "\n" or prev == self.START) and this == "=" and not self._global & contexts.GL_HEADING: | |||
self._parse_heading() | |||
elif this == "=" and self._context & contexts.HEADING: | |||
return self._handle_heading_end() | |||
elif this == "\n" and self._context & contexts.HEADING: | |||
self._fail_route() | |||
elif this == "&": | |||
self._parse_entity() | |||
else: | |||
self._write_text(this) | |||
self._head += 1 | |||
def tokenize(self, text): | |||
split = self.regex.split(text) | |||
self._text = [segment for segment in split if segment] | |||
return self._parse() |
@@ -0,0 +1,81 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
__all__ = ["Token"] | |||
class Token(object): | |||
def __init__(self, **kwargs): | |||
super(Token, self).__setattr__("_kwargs", kwargs) | |||
def __repr__(self): | |||
args = [] | |||
for key, value in self._kwargs.iteritems(): | |||
if isinstance(value, basestring) and len(value) > 100: | |||
args.append(key + "=" + repr(value[:97] + "...")) | |||
else: | |||
args.append(key + "=" + repr(value)) | |||
return u"{0}({1})".format(type(self).__name__, u", ".join(args)) | |||
def __eq__(self, other): | |||
if isinstance(other, type(self)): | |||
return self._kwargs == other._kwargs | |||
return False | |||
def __getattr__(self, key): | |||
return self._kwargs[key] | |||
def __setattr__(self, key, value): | |||
self._kwargs[key] = value | |||
def __delattr__(self, key): | |||
del self._kwargs[key] | |||
def make(name): | |||
__all__.append(name) | |||
return type(name, (Token,), {}) | |||
Text = make("Text") | |||
TemplateOpen = make("TemplateOpen") # {{ | |||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||
TemplateParamEquals = make("TemplateParamEquals") # = | |||
TemplateClose = make("TemplateClose") # }} | |||
HTMLEntityStart = make("HTMLEntityStart") # & | |||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||
HTMLEntityHex = make("HTMLEntityHex") # x | |||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||
HeadingStart = make("HeadingStart") # =... | |||
HeadingEnd = make("HeadingEnd") # =... | |||
TagOpenOpen = make("TagOpenOpen") # < | |||
TagAttrStart = make("TagAttrStart") | |||
TagAttrEquals = make("TagAttrEquals") # = | |||
TagAttrQuote = make("TagAttrQuote") # " | |||
TagCloseOpen = make("TagCloseOpen") # > | |||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||
TagOpenClose = make("TagOpenClose") # </ | |||
TagCloseClose = make("TagCloseClose") # > | |||
del make |
@@ -81,6 +81,7 @@ class SmartList(list): | |||
def __iadd__(self, other): | |||
self.extend(other) | |||
return self | |||
def append(self, item): | |||
head = len(self) | |||
@@ -221,6 +222,7 @@ class _ListProxy(list): | |||
def __iadd__(self, other): | |||
self.extend(other) | |||
return self | |||
@property | |||
def _start(self): | |||
@@ -22,24 +22,25 @@ | |||
import mwparserfromhell | |||
from .nodes import Node | |||
from .smart_list import SmartList | |||
def parse_anything(value): | |||
wikicode = mwparserfromhell.wikicode.Wikicode | |||
if isinstance(value, wikicode): | |||
return value | |||
if isinstance(value, Node): | |||
return wikicode([value]) | |||
return wikicode(SmartList([value])) | |||
if isinstance(value, basestring): | |||
return mwparserfromhell.parse(value) | |||
if isinstance(value, int): | |||
return mwparserfromhell.parse(unicode(value)) | |||
if value is None: | |||
return wikicode([]) | |||
return wikicode(SmartList()) | |||
try: | |||
nodelist = [] | |||
nodelist = SmartList() | |||
for item in value: | |||
nodelist += parse_anything(item).nodes | |||
except TypeError: | |||
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | |||
raise ValueError(error.format(type(value), value)) | |||
raise ValueError(error.format(type(value).__name__, value)) | |||
return wikicode(nodelist) |
@@ -105,6 +105,10 @@ class Wikicode(StringMixIn): | |||
def nodes(self): | |||
return self._nodes | |||
@nodes.setter | |||
def nodes(self, value): | |||
self._nodes = value | |||
def get(self, index): | |||
return self.nodes[index] | |||