diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index f770af3..e6ae477 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -34,4 +34,4 @@ __email__ = "ben.kurtovic@verizon.net" from mwparserfromhell import nodes, parser, string_mixin, wikicode -parse = lambda text: parser.Parser().parse(text) +parse = lambda text: parser.Parser(text).parse() diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 6ab0319..8a53a98 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from mwparserfromhell.parser.utils import parse_anything from mwparserfromhell.string_mixin import StringMixIn __all__ = ["Parameter"] @@ -46,3 +47,15 @@ class Parameter(StringMixIn): @property def showkey(self): return self._showkey + + @name.setter + def name(self, newval): + self._name = parse_anything(newval) + + @value.setter + def value(self, newval): + self._value = parse_anything(newval) + + @showkey.setter + def showkey(self, newval): + self._showkey = newval diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index e150289..7aa5034 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -27,13 +27,16 @@ from mwparserfromhell.nodes import Node __all__ = ["HTMLEntity"] class HTMLEntity(Node): - def __init__(self, value, named): + def __init__(self, value, named, hexadecimal=False): self._value = value self._named = named + self._hexadecimal = hexadecimal def __unicode__(self): if self.named: return u"&{0};".format(self.value) + if self.hexadecimal: + return u"&#x{0};".format(self.value) return u"&#{0};".format(self.value) @property @@ -44,7 +47,13 @@ class HTMLEntity(Node): def named(self): return self._named + @property + def hexadecimal(self): + return self._hexadecimal + def normalize(self): if self.named: return unichr(htmlentitydefs.name2codepoint[self.value]) + if self.hexadecimal: + return unichr(int(str(self.value), 16)) return unichr(self.value) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 7def606..426a8d9 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -21,6 +21,8 @@ # SOFTWARE. from mwparserfromhell.nodes import Node +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.parser.utils import parse_anything __all__ = ["Template"] @@ -39,6 +41,9 @@ class Template(Node): else: return "{{" + unicode(self.name) + "}}" + def _blank_param_value(self, value): # TODO + pass + @property def name(self): return self._name @@ -47,14 +52,53 @@ class Template(Node): def params(self): return self._params - def has_param(self): - pass + def has_param(self, name): + name = name.strip() if isinstance(name, basestring) else unicode(name) + for param in self.params: + if param.name.strip() == name: + return True + return False - def get_param(self): - pass + def get_param(self, name): + name = name.strip() if isinstance(name, basestring) else unicode(name) + for param in self.params: + if param.name.strip() == name: + return param + raise ValueError(name) - def add_param(self): - pass + def add_param(self, name, value, showkey=None): + name, value = parse_anything(name), parse_anything(value) + surface_text = value.filter_text(recursive=False) + for node in surface_text: + node.replace("|", "|") # INSERT AS HTMLEntity INSTEAD OF RAW TEXT - def remove_param(self): - pass + if showkey is None: + if any(["=" in node for node in surface_text]): + showkey = True + else: + try: + int(name) + except ValueError: + showkey = False + else: + showkey = True + elif not showkey: + for node in surface_text: + node.replace("=", "|") # INSERT AS HTMLEntity INSTEAD OF RAW TEXT + + if self.has_param(name): + self.remove_param(name, keep_field=True) + existing = self.get_param(name).value + self.get_param(name).value = value # CONFORM TO FORMATTING? + else: + self.params.append(Parameter(name, value, showkey)) # CONFORM TO FORMATTING CONVENTIONS? + + def remove_param(self, name, keep_field=False): # DON'T MESS UP NUMBERING WITH show_key = False AND keep_field = False + name = name.strip() if isinstance(name, basestring) else unicode(name) + for param in self.params: + if param.name.strip() == name: + if keep_field: + return self._blank_param_value(param.value) + else: + return self.params.remove(param) + raise ValueError(name) diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index afdc235..2d5079b 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -34,3 +34,6 @@ class Text(Node): @property def value(self): return self._value + + def replace(self, old, new, count): # TODO + pass diff --git a/mwparserfromhell/parser/demo.py b/mwparserfromhell/parser/demo.py index 95e4e76..fe683e6 100644 --- a/mwparserfromhell/parser/demo.py +++ b/mwparserfromhell/parser/demo.py @@ -27,10 +27,13 @@ from mwparserfromhell.wikicode import Wikicode __all__ = ["DemoParser"] class DemoParser(object): - def _tokenize(self, text): - return text + def __init__(self, text): + self.text = text - def parse(self, text): + def _tokenize(self): + return [] + + def parse(self): # Ensure text is unicode! text = u"This is a {{test}} message with a {{template|with|foo={{params}}}}." diff --git a/mwparserfromhell/parser/utils.py b/mwparserfromhell/parser/utils.py new file mode 100644 index 0000000..b345e5c --- /dev/null +++ b/mwparserfromhell/parser/utils.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import mwparserfromhell +from mwparserfromhell.nodes import Node +from mwparserfromhell.wikicode import Wikicode + +def parse_anything(value): + if isinstance(value, Wikicode): + return value + if isinstance(value, Node): + return Wikicode([value]) + if isinstance(value, basestring): + return mwparserfromhell.parse(value) + if isinstance(value, int): + return mwparserfromhell.parse(unicode(value)) + if value is None: + return Wikicode([]) + try: + nodelist = [] + for item in value: + nodelist += parse_anything(item).nodes + except TypeError: + error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" + raise ValueError(error.format(type(value), value)) + return Wikicode(nodelist) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 9243b9a..38bb737 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -22,8 +22,8 @@ import re -import mwparserfromhell from mwparserfromhell.nodes import HTMLEntity, Node, Template, Text +from mwparserfromhell.parser.utils import parse_anything from mwparserfromhell.string_mixin import StringMixIn __all__ = ["Wikicode"] @@ -37,24 +37,6 @@ class Wikicode(StringMixIn): def __unicode__(self): return "".join([unicode(node) for node in self.nodes]) - def _nodify(self, value): - if isinstance(value, Wikicode): - return value.nodes - if isinstance(value, Node): - return [value] - if isinstance(value, basestring): - return mwparserfromhell.parse(value).nodes - - try: - nodelist = list(value) - except TypeError: - error = "Needs string, Node, iterable of Nodes, or Wikicode object, but got {0}: {1}" - raise ValueError(error.format(type(value), value)) - if not all([isinstance(node, Node) for node in nodelist]): - error = "Was passed an interable {0}, but it did not contain all Nodes: {1}" - raise ValueError(error.format(type(value), value)) - return nodelist - def _iterate_over_children(self, node): yield (None, node) if isinstance(node, Template): @@ -147,7 +129,7 @@ class Wikicode(StringMixIn): return self.nodes[index] def set(self, index, value): - nodes = self._nodify(value) + nodes = parse_anything(value).nodes if len(nodes) > 1: raise ValueError("Cannot coerce multiple nodes into one index") if index >= len(self.nodes) or -1 * index > len(self.nodes): @@ -169,7 +151,7 @@ class Wikicode(StringMixIn): raise ValueError(obj) def insert(self, index, value): - nodes = self._nodify(value) + nodes = parse_anything(value).nodes for node in reversed(nodes): self.nodes.insert(index, node) @@ -189,7 +171,7 @@ class Wikicode(StringMixIn): self._do_search(obj, recursive, callback, self, value) def append(self, value): - nodes = self._nodify(value) + nodes = parse_anything(value).nodes for node in nodes: self.nodes.append(node)