Browse Source

A bunch of updates from the past weeks.

tags/v0.1
Ben Kurtovic 12 years ago
parent
commit
9d1e77000c
8 changed files with 355 additions and 150 deletions
  1. +7
    -1
      mwparserfromhell/__init__.py
  2. +26
    -0
      mwparserfromhell/node.py
  3. +10
    -98
      mwparserfromhell/parameter.py
  4. +18
    -5
      mwparserfromhell/parser.py
  5. +63
    -0
      mwparserfromhell/string_mixin.py
  6. +23
    -46
      mwparserfromhell/template.py
  7. +37
    -0
      mwparserfromhell/text.py
  8. +171
    -0
      mwparserfromhell/wikicode.py

+ 7
- 1
mwparserfromhell/__init__.py View File

@@ -32,5 +32,11 @@ __license__ = "MIT License"
__version__ = "0.1.dev"
__email__ = "ben.kurtovic@verizon.net"

from mwparserfromhell import parameter, parser, template
from mwparserfromhell.node import Node
from mwparserfromhell.parameter import Parameter
from mwparserfromhell.parser import Parser
from mwparserfromhell.template import Template
from mwparserfromhell.text import Text
from mwparserfromhell.wikicode import Wikicode

parse = Parser().parse

+ 26
- 0
mwparserfromhell/node.py View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["Node"]

class Node(object):
pass

+ 10
- 98
mwparserfromhell/parameter.py View File

@@ -20,108 +20,20 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.template import Template
from mwparserfromhell.string_mixin import StringMixin

__all__ = ["Parameter"]

class Parameter(object):
def __init__(self, name, value, templates=None):
class Parameter(StringMixin):
def __init__(self, name, value=None, showkey=True):
self._name = name
self._value = value
if templates:
self._templates = templates
else:
self._templates = []
self._showkey = showkey

def __repr__(self):
return repr(self.value)

def __str__(self):
return self.value

def __lt__(self, other):
if isinstance(other, Parameter):
return self.value < other.value
return self.value < other

def __le__(self, other):
if isinstance(other, Parameter):
return self.value <= other.value
return self.value <= other

def __eq__(self, other):
if isinstance(other, Parameter):
return (self.name == other.name and self.value == other.value and
self.templates == other.templates)
return self.value == other

def __ne__(self, other):
if isinstance(other, Parameter):
return (self.name != other.name or self.value != other.value or
self.templates != other.templates)
return self.value != other

def __gt__(self, other):
if isinstance(other, Parameter):
return self.value > other.value
return self.value > other

def __ge__(self, other):
if isinstance(other, Parameter):
return self.value >= other.value
return self.value >= other

def __nonzero__(self):
return bool(self.value)

def __len__(self):
return len(self.value)

def __iter__(self):
for char in self.value:
yield char

def __getitem__(self, key):
return self.value[key]

def __contains__(self, item):
return item in self.value or item in self.templates

def __add__(self, other):
if isinstance(other, Parameter):
return Parameter(self.name, self.value + other.value,
self.templates + other.templates)
if isinstance(other, Template):
return Parameter(self.name, self.value + other.render(),
self.templates + [other])
return self.value + other

def __radd__(self, other):
if isinstance(other, Template):
return Template(other.name, other.params + [self])
return other + self.value

def __iadd__(self, other):
if isinstance(other, Parameter):
self.value += other.value
self.templates += other.templates
elif isinstance(other, Template):
self.value += other.render()
self.templates.append(other)
else:
self.value += other
return self

def __mul__(self, other):
return Parameter(self.name, self.value * other, self.templates * other)

def __rmul__(self, other):
return Parameter(self.name, other * self.value, other * self.templates)

def __imul__(self, other):
self.value *= other
self.templates *= other
return self
def __unicode__(self):
if self.showkey:
return unicode(self.name) + "=" + unicode(self.value)
return unicode(self.value)

@property
def name(self):
@@ -132,5 +44,5 @@ class Parameter(object):
return self._value

@property
def templates(self):
return self._templates
def showkey(self):
return self._showkey

+ 18
- 5
mwparserfromhell/parser.py View File

@@ -22,15 +22,28 @@

from mwparserfromhell.parameter import Parameter
from mwparserfromhell.template import Template
from mwparserfromhell.text import Text
from mwparserfromhell.wikicode import Wikicode

__all__ = ["Parser"]

class Parser(object):
def _tokenize(self, text):
return text
def parse(self, text):
tokens = self._tokenize(text)
params = [Parameter("1", "bar"), Parameter("2", "baz")]
templates = [Template(name="foo", params=params)]
return templates
text = u"This is a {{test}} message with a {{template|with|foo={{params}}}}."

node1 = Text(u"This is a ")
node2 = Template(Wikicode([Text(u"test")]))
node3 = Text(u" message with a ")
node4_param1_name = Wikicode([Text(u"1")])
node4_param1_value = Wikicode([Text(u"with")])
node4_param1 = Parameter(node4_param1_name, node4_param1_value, showkey=False)
node4_param2_name = Wikicode([Text(u"foo")])
node4_param2_value = Wikicode([Template(Wikicode([Text(u"params")]))])
node4_param2 = Parameter(node4_param2_name, node4_param2_value, showkey=True)
node4 = Template(Wikicode([Text(u"template")]), [node4_param1, node4_param2])
node5 = Text(u".")
parsed = Wikicode([node1, node2, node3, node4, node5])
return parsed

+ 63
- 0
mwparserfromhell/string_mixin.py View File

@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without reunicodeiction, including without limitation the rights
# to use, copy, modify, merge, publish, diunicodeibute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["StringMixin"]

class StringMixin(object):
def __str__(self):
return str(unicode(self))

def __repr__(self):
return repr(unicode(self))

def __lt__(self, other):
if isinstance(other, unicodeingMixin):
return unicode(self) < unicode(other)
return unicode(self) < other

def __le__(self, other):
if isinstance(other, unicodeingMixin):
return unicode(self) <= unicode(other)
return unicode(self) <= other

def __eq__(self, other):
if isinstance(other, unicodeingMixin):
return unicode(self) == unicode(other)
return unicode(self) == other

def __ne__(self, other):
if isinstance(other, unicodeingMixin):
return unicode(self) != unicode(other)
return unicode(self) != other

def __gt__(self, other):
if isinstance(other, unicodeingMixin):
return unicode(self) > unicode(other)
return unicode(self) > other

def __ge__(self, other):
if isinstance(other, unicodeingMixin):
return unicode(self) >= unicode(other)
return unicode(self) >= other

def __getitem__(self, index):
return unicode(self)[index]

+ 23
- 46
mwparserfromhell/template.py View File

@@ -20,51 +20,25 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from collections import OrderedDict
from mwparserfromhell.node import Node
from mwparserfromhell.string_mixin import StringMixin

__all__ = ["Template"]

class Template(object):
class Template(Node, StringMixin):
def __init__(self, name, params=None):
self._name = name
self._params = OrderedDict()
if params:
for param in params:
self._params[param.name] = param

def __repr__(self):
paramlist = []
for name, param in self._params.iteritems():
paramlist.append('"{0}": "{1}"'.format(name, str(param)))
params = "{" + ", ".join(paramlist) + "}"
return "Template(name={0}, params={1})".format(self.name, params)

def __eq__(self, other):
if isinstance(other, Template):
return self.name == other.name and self.params == other.params
return self.render() == other

def __ne__(self, other):
if isinstance(other, Template):
return self.name != other.name or self.params != other.params
return self.render() != other

def __getitem__(self, key):
try:
return self._params[key]
except KeyError: # Try lookup by order in param list
return self._params.values()[key]
self._params = params
else:
self._params = []

def __setitem__(self, key, value):
if isinstance(key, int):
if key > len(self._params):
raise IndexError("Index is too large")
elif key == len(self._params): # Simple addition to the end
self._params[key] = value
else: # We'll need to rebuild the OrderedDict
self._params
def __unicode__(self):
if self.params:
params = u"|".join([unicode(param) for param in self.params])
return "{{" + unicode(self.name) + "|" + params + "}}"
else:
self._params[key] = value
return "{{" + unicode(self.name) + "}}"

@property
def name(self):
@@ -72,13 +46,16 @@ class Template(object):

@property
def params(self):
return self._params.values()
return self._params

def has_param(self):
pass

def get_param(self):
pass

def add_param(self):
pass

def render(self):
params = ""
for param in self.params:
if param.name.isdigit() and "=" not in param.value:
params += "|" + param.value
else:
params += "|" + param.name + "=" + param.value
return "{{" + self.name + params + "}}"
def remove_param(self):
pass

+ 37
- 0
mwparserfromhell/text.py View File

@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.node import Node
from mwparserfromhell.string_mixin import StringMixin

__all__ = ["Text"]

class Text(Node, StringMixin):
def __init__(self, value):
self._value = value

def __unicode__(self):
return unicode(self.value)

@property
def value(self):
return self._value

+ 171
- 0
mwparserfromhell/wikicode.py View File

@@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 by Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re

import mwparserfromhell
from mwparserfromhell.node import Node
from mwparserfromhell.string_mixin import StringMixin
from mwparserfromhell.template import Template
from mwparserfromhell.text import Text

__all__ = ["Wikicode"]

FLAGS = re.I | re.S | re.U

class Wikicode(StringMixin):
def __init__(self, nodes):
self._nodes = nodes

def __unicode__(self):
return "".join([unicode(node) for node in self.nodes])

def _nodify(self, value):
if isinstance(value, Wikicode):
return value.nodes
if isinstance(value, Node):
return [value]
if isinstance(value, str) or isinstance(value, unicode):
return mwparserfromhell.parse(value).nodes
error = "Needs string, Node, or Wikicode object, but got {0}: {1}"
raise ValueError(error.format(type(value), value))

def _get_all_nodes(self, code):
for node in code.nodes:
yield node
if isinstance(node, Template):
for child in self._get_all_nodes(node.name):
yield child
for param in node.params:
if param.showkey:
for child in self._get_all_nodes(param.name):
yield child
for child in self._get_all_nodes(param.value):
yield child

def _show_tree(self, code, lines, marker=None, indent=0):
def write(*args):
if lines and lines[-1] is marker: # Continue from the last line
lines.pop() # Remove the marker
last = lines.pop()
lines.append(last + " ".join(args))
else:
lines.append(" " * indent + " ".join(args))

for node in code.nodes:
if isinstance(node, Template):
write("{{", )
self._show_tree(node.name, lines, marker, indent + 1)
for param in node.params:
write(" | ")
lines.append(marker) # Continue from this line
self._show_tree(param.name, lines, marker, indent + 1)
write(" = ")
lines.append(marker) # Continue from this line
self._show_tree(param.value, lines, marker, indent + 1)
write("}}")
elif isinstance(node, Text):
write(unicode(node))
else:
raise NotImplementedError(node)
return lines

@property
def nodes(self):
return self._nodes

def get(self, index):
return self.nodes[index]

def set(self, index, value):
nodes = self._nodify(value)
if len(nodes) > 1:
raise ValueError("Cannot coerce multiple nodes into one index")
if index >= len(self.nodes) or -1 * index > len(self.nodes):
raise IndexError("List assignment index out of range")
self.nodex.pop(index)
if nodes:
self.nodes[index] = nodes[0]

def index(self, obj):
if obj not in self.nodes:
raise ValueError(obj)
if isinstance(obj, Node):
for i, node in enumerate(self.nodes):
if node is obj:
return i
raise ValueError(obj)
return self.nodes.index(obj)

def insert(self, index, value, recursive=True):
nodes = self._nodify(value)
for node in reversed(nodes):
self.nodes.insert(index, node)

def insert_before(self, obj, value):
if obj not in self.nodes:
raise KeyError(obj)
self.insert(self.index(obj), value)

def insert_after(self, obj, value):
if obj not in self.nodes:
raise KeyError(obj)
self.insert(self.index(obj) + 1, value)

def append(self, value):
nodes = self._nodify(value)
for node in nodes:
self.nodes.append(node)

def remove(self, node):
self.nodes.pop(self.index(node))

def ifilter(self, recursive=False, matches=None, flags=FLAGS,
forcetype=None):
if recursive:
nodes = self._get_all_nodes(self)
else:
nodes = self.nodes
for node in nodes:
if not forcetype or isinstance(node, forcetype):
if not matches or re.search(matches, unicode(node), flags):
yield node

def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
return self.filter(recursive, matches, flags, forcetype=Template)

def ifilter_text(self, recursive=False, matches=None, flags=FLAGS):
return self.filter(recursive, matches, flags, forcetype=Text)

def filter(self, recursive=False, matches=None, flags=FLAGS,
forcetype=None):
return list(self.ifilter(recursive, matches, flags, forcetype))

def filter_templates(self, recursive=False, matches=None, flags=FLAGS):
return list(self.ifilter_templates(recursive, matches, flags))

def filter_text(self, recursive=False, matches=None, flags=FLAGS):
return list(self.ifilter_text(recursive, matches, flags))

def show_tree(self):
marker = object() # Random object we can find with certainty in a list
print "\n".join(self._show_tree(self, [], marker))

Loading…
Cancel
Save