From 9d1e77000c34aab6389e8cc2b7dbd174c0c26008 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 21 Jun 2012 17:23:12 -0400 Subject: [PATCH] A bunch of updates from the past weeks. --- mwparserfromhell/__init__.py | 8 +- mwparserfromhell/node.py | 26 ++++++ mwparserfromhell/parameter.py | 108 +++---------------------- mwparserfromhell/parser.py | 23 ++++-- mwparserfromhell/string_mixin.py | 63 +++++++++++++++ mwparserfromhell/template.py | 69 ++++++---------- mwparserfromhell/text.py | 37 +++++++++ mwparserfromhell/wikicode.py | 171 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 355 insertions(+), 150 deletions(-) create mode 100644 mwparserfromhell/node.py create mode 100644 mwparserfromhell/string_mixin.py create mode 100644 mwparserfromhell/text.py create mode 100644 mwparserfromhell/wikicode.py diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 8c7be05..58a5d0b 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -32,5 +32,11 @@ __license__ = "MIT License" __version__ = "0.1.dev" __email__ = "ben.kurtovic@verizon.net" -from mwparserfromhell import parameter, parser, template +from mwparserfromhell.node import Node +from mwparserfromhell.parameter import Parameter from mwparserfromhell.parser import Parser +from mwparserfromhell.template import Template +from mwparserfromhell.text import Text +from mwparserfromhell.wikicode import Wikicode + +parse = Parser().parse diff --git a/mwparserfromhell/node.py b/mwparserfromhell/node.py new file mode 100644 index 0000000..b99b2c6 --- /dev/null +++ b/mwparserfromhell/node.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 by Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +__all__ = ["Node"] + +class Node(object): + pass diff --git a/mwparserfromhell/parameter.py b/mwparserfromhell/parameter.py index e323e5f..5e5f591 100644 --- a/mwparserfromhell/parameter.py +++ b/mwparserfromhell/parameter.py @@ -20,108 +20,20 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from mwparserfromhell.template import Template +from mwparserfromhell.string_mixin import StringMixin __all__ = ["Parameter"] -class Parameter(object): - def __init__(self, name, value, templates=None): +class Parameter(StringMixin): + def __init__(self, name, value=None, showkey=True): self._name = name self._value = value - if templates: - self._templates = templates - else: - self._templates = [] + self._showkey = showkey - def __repr__(self): - return repr(self.value) - - def __str__(self): - return self.value - - def __lt__(self, other): - if isinstance(other, Parameter): - return self.value < other.value - return self.value < other - - def __le__(self, other): - if isinstance(other, Parameter): - return self.value <= other.value - return self.value <= other - - def __eq__(self, other): - if isinstance(other, Parameter): - return (self.name == other.name and self.value == other.value and - self.templates == other.templates) - return self.value == other - - def __ne__(self, other): - if isinstance(other, Parameter): - return (self.name != other.name or self.value != other.value or - self.templates != other.templates) - return self.value != other - - def __gt__(self, other): - if isinstance(other, Parameter): - return self.value > other.value - return self.value > other - - def __ge__(self, other): - if isinstance(other, Parameter): - return self.value >= other.value - return self.value >= other - - def __nonzero__(self): - return bool(self.value) - - def __len__(self): - return len(self.value) - - def __iter__(self): - for char in self.value: - yield char - - def __getitem__(self, key): - return self.value[key] - - def __contains__(self, item): - return item in self.value or item in self.templates - - def __add__(self, other): - if isinstance(other, Parameter): - return Parameter(self.name, self.value + other.value, - self.templates + other.templates) - if isinstance(other, Template): - return Parameter(self.name, self.value + other.render(), - self.templates + [other]) - return self.value + other - - def __radd__(self, other): - if isinstance(other, Template): - return Template(other.name, other.params + [self]) - return other + self.value - - def __iadd__(self, other): - if isinstance(other, Parameter): - self.value += other.value - self.templates += other.templates - elif isinstance(other, Template): - self.value += other.render() - self.templates.append(other) - else: - self.value += other - return self - - def __mul__(self, other): - return Parameter(self.name, self.value * other, self.templates * other) - - def __rmul__(self, other): - return Parameter(self.name, other * self.value, other * self.templates) - - def __imul__(self, other): - self.value *= other - self.templates *= other - return self + def __unicode__(self): + if self.showkey: + return unicode(self.name) + "=" + unicode(self.value) + return unicode(self.value) @property def name(self): @@ -132,5 +44,5 @@ class Parameter(object): return self._value @property - def templates(self): - return self._templates + def showkey(self): + return self._showkey diff --git a/mwparserfromhell/parser.py b/mwparserfromhell/parser.py index 5f1622c..eff4bde 100644 --- a/mwparserfromhell/parser.py +++ b/mwparserfromhell/parser.py @@ -22,15 +22,28 @@ from mwparserfromhell.parameter import Parameter from mwparserfromhell.template import Template +from mwparserfromhell.text import Text +from mwparserfromhell.wikicode import Wikicode __all__ = ["Parser"] class Parser(object): def _tokenize(self, text): return text - + def parse(self, text): - tokens = self._tokenize(text) - params = [Parameter("1", "bar"), Parameter("2", "baz")] - templates = [Template(name="foo", params=params)] - return templates + text = u"This is a {{test}} message with a {{template|with|foo={{params}}}}." + + node1 = Text(u"This is a ") + node2 = Template(Wikicode([Text(u"test")])) + node3 = Text(u" message with a ") + node4_param1_name = Wikicode([Text(u"1")]) + node4_param1_value = Wikicode([Text(u"with")]) + node4_param1 = Parameter(node4_param1_name, node4_param1_value, showkey=False) + node4_param2_name = Wikicode([Text(u"foo")]) + node4_param2_value = Wikicode([Template(Wikicode([Text(u"params")]))]) + node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True) + node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2]) + node5 = Text(u".") + parsed = Wikicode([node1, node2, node3, node4, node5]) + return parsed diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py new file mode 100644 index 0000000..60e8fd5 --- /dev/null +++ b/mwparserfromhell/string_mixin.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 by Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without reunicodeiction, including without limitation the rights +# to use, copy, modify, merge, publish, diunicodeibute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +__all__ = ["StringMixin"] + +class StringMixin(object): + def __str__(self): + return str(unicode(self)) + + def __repr__(self): + return repr(unicode(self)) + + def __lt__(self, other): + if isinstance(other, unicodeingMixin): + return unicode(self) < unicode(other) + return unicode(self) < other + + def __le__(self, other): + if isinstance(other, unicodeingMixin): + return unicode(self) <= unicode(other) + return unicode(self) <= other + + def __eq__(self, other): + if isinstance(other, unicodeingMixin): + return unicode(self) == unicode(other) + return unicode(self) == other + + def __ne__(self, other): + if isinstance(other, unicodeingMixin): + return unicode(self) != unicode(other) + return unicode(self) != other + + def __gt__(self, other): + if isinstance(other, unicodeingMixin): + return unicode(self) > unicode(other) + return unicode(self) > other + + def __ge__(self, other): + if isinstance(other, unicodeingMixin): + return unicode(self) >= unicode(other) + return unicode(self) >= other + + def __getitem__(self, index): + return unicode(self)[index] diff --git a/mwparserfromhell/template.py b/mwparserfromhell/template.py index 7314ab7..3ff34ef 100644 --- a/mwparserfromhell/template.py +++ b/mwparserfromhell/template.py @@ -20,51 +20,25 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from collections import OrderedDict +from mwparserfromhell.node import Node +from mwparserfromhell.string_mixin import StringMixin __all__ = ["Template"] -class Template(object): +class Template(Node, StringMixin): def __init__(self, name, params=None): self._name = name - self._params = OrderedDict() if params: - for param in params: - self._params[param.name] = param - - def __repr__(self): - paramlist = [] - for name, param in self._params.iteritems(): - paramlist.append('"{0}": "{1}"'.format(name, str(param))) - params = "{" + ", ".join(paramlist) + "}" - return "Template(name={0}, params={1})".format(self.name, params) - - def __eq__(self, other): - if isinstance(other, Template): - return self.name == other.name and self.params == other.params - return self.render() == other - - def __ne__(self, other): - if isinstance(other, Template): - return self.name != other.name or self.params != other.params - return self.render() != other - - def __getitem__(self, key): - try: - return self._params[key] - except KeyError: # Try lookup by order in param list - return self._params.values()[key] + self._params = params + else: + self._params = [] - def __setitem__(self, key, value): - if isinstance(key, int): - if key > len(self._params): - raise IndexError("Index is too large") - elif key == len(self._params): # Simple addition to the end - self._params[key] = value - else: # We'll need to rebuild the OrderedDict - self._params + def __unicode__(self): + if self.params: + params = u"|".join([unicode(param) for param in self.params]) + return "{{" + unicode(self.name) + "|" + params + "}}" else: - self._params[key] = value + return "{{" + unicode(self.name) + "}}" @property def name(self): @@ -72,13 +46,16 @@ class Template(object): @property def params(self): - return self._params.values() + return self._params + + def has_param(self): + pass + + def get_param(self): + pass + + def add_param(self): + pass - def render(self): - params = "" - for param in self.params: - if param.name.isdigit() and "=" not in param.value: - params += "|" + param.value - else: - params += "|" + param.name + "=" + param.value - return "{{" + self.name + params + "}}" + def remove_param(self): + pass diff --git a/mwparserfromhell/text.py b/mwparserfromhell/text.py new file mode 100644 index 0000000..73dc779 --- /dev/null +++ b/mwparserfromhell/text.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 by Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from mwparserfromhell.node import Node +from mwparserfromhell.string_mixin import StringMixin + +__all__ = ["Text"] + +class Text(Node, StringMixin): + def __init__(self, value): + self._value = value + + def __unicode__(self): + return unicode(self.value) + + @property + def value(self): + return self._value diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py new file mode 100644 index 0000000..28b3524 --- /dev/null +++ b/mwparserfromhell/wikicode.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 by Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re + +import mwparserfromhell +from mwparserfromhell.node import Node +from mwparserfromhell.string_mixin import StringMixin +from mwparserfromhell.template import Template +from mwparserfromhell.text import Text + +__all__ = ["Wikicode"] + +FLAGS = re.I | re.S | re.U + +class Wikicode(StringMixin): + def __init__(self, nodes): + self._nodes = nodes + + def __unicode__(self): + return "".join([unicode(node) for node in self.nodes]) + + def _nodify(self, value): + if isinstance(value, Wikicode): + return value.nodes + if isinstance(value, Node): + return [value] + if isinstance(value, str) or isinstance(value, unicode): + return mwparserfromhell.parse(value).nodes + error = "Needs string, Node, or Wikicode object, but got {0}: {1}" + raise ValueError(error.format(type(value), value)) + + def _get_all_nodes(self, code): + for node in code.nodes: + yield node + if isinstance(node, Template): + for child in self._get_all_nodes(node.name): + yield child + for param in node.params: + if param.showkey: + for child in self._get_all_nodes(param.name): + yield child + for child in self._get_all_nodes(param.value): + yield child + + def _show_tree(self, code, lines, marker=None, indent=0): + def write(*args): + if lines and lines[-1] is marker: # Continue from the last line + lines.pop() # Remove the marker + last = lines.pop() + lines.append(last + " ".join(args)) + else: + lines.append(" " * indent + " ".join(args)) + + for node in code.nodes: + if isinstance(node, Template): + write("{{", ) + self._show_tree(node.name, lines, marker, indent + 1) + for param in node.params: + write(" | ") + lines.append(marker) # Continue from this line + self._show_tree(param.name, lines, marker, indent + 1) + write(" = ") + lines.append(marker) # Continue from this line + self._show_tree(param.value, lines, marker, indent + 1) + write("}}") + elif isinstance(node, Text): + write(unicode(node)) + else: + raise NotImplementedError(node) + return lines + + @property + def nodes(self): + return self._nodes + + def get(self, index): + return self.nodes[index] + + def set(self, index, value): + nodes = self._nodify(value) + if len(nodes) > 1: + raise ValueError("Cannot coerce multiple nodes into one index") + if index >= len(self.nodes) or -1 * index > len(self.nodes): + raise IndexError("List assignment index out of range") + self.nodex.pop(index) + if nodes: + self.nodes[index] = nodes[0] + + def index(self, obj): + if obj not in self.nodes: + raise ValueError(obj) + if isinstance(obj, Node): + for i, node in enumerate(self.nodes): + if node is obj: + return i + raise ValueError(obj) + return self.nodes.index(obj) + + def insert(self, index, value, recursive=True): + nodes = self._nodify(value) + for node in reversed(nodes): + self.nodes.insert(index, node) + + def insert_before(self, obj, value): + if obj not in self.nodes: + raise KeyError(obj) + self.insert(self.index(obj), value) + + def insert_after(self, obj, value): + if obj not in self.nodes: + raise KeyError(obj) + self.insert(self.index(obj) + 1, value) + + def append(self, value): + nodes = self._nodify(value) + for node in nodes: + self.nodes.append(node) + + def remove(self, node): + self.nodes.pop(self.index(node)) + + def ifilter(self, recursive=False, matches=None, flags=FLAGS, + forcetype=None): + if recursive: + nodes = self._get_all_nodes(self) + else: + nodes = self.nodes + for node in nodes: + if not forcetype or isinstance(node, forcetype): + if not matches or re.search(matches, unicode(node), flags): + yield node + + def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): + return self.filter(recursive, matches, flags, forcetype=Template) + + def ifilter_text(self, recursive=False, matches=None, flags=FLAGS): + return self.filter(recursive, matches, flags, forcetype=Text) + + def filter(self, recursive=False, matches=None, flags=FLAGS, + forcetype=None): + return list(self.ifilter(recursive, matches, flags, forcetype)) + + def filter_templates(self, recursive=False, matches=None, flags=FLAGS): + return list(self.ifilter_templates(recursive, matches, flags)) + + def filter_text(self, recursive=False, matches=None, flags=FLAGS): + return list(self.ifilter_text(recursive, matches, flags)) + + def show_tree(self): + marker = object() # Random object we can find with certainty in a list + print "\n".join(self._show_tree(self, [], marker))