Browse Source

Improve a few things about nodes; simply a method in Wikicode.

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
8db40689ed
3 changed files with 43 additions and 17 deletions
  1. +38
    -9
      mwparserfromhell/nodes/html_entity.py
  2. +4
    -1
      mwparserfromhell/nodes/wikilink.py
  3. +1
    -7
      mwparserfromhell/wikicode.py

+ 38
- 9
mwparserfromhell/nodes/html_entity.py View File

@@ -63,7 +63,8 @@ class HTMLEntity(Node):
return self.normalize() return self.normalize()
return self return self


def _unichr(self, value):
@staticmethod
def _unichr(value):
"""Implement the builtin unichr() with support for non-BMP code points. """Implement the builtin unichr() with support for non-BMP code points.


On wide Python builds, this functions like the normal unichr(). On On wide Python builds, this functions like the normal unichr(). On
@@ -119,19 +120,47 @@ class HTMLEntity(Node):
@value.setter @value.setter
def value(self, newval): def value(self, newval):
newval = str(newval) newval = str(newval)
if newval not in htmlentities.entitydefs:
test = int(self.value, 16)
if test < 0 or (test > 0x10FFFF and int(self.value) > 0x10FFFF):
raise ValueError(newval)
try:
int(newval)
except ValueError:
try:
int(newval, 16)
except ValueError:
if newval not in htmlentities.entitydefs:
raise ValueError("entity value is not a valid name")
self._named = True
self._hexadecimal = False
else:
if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF:
raise ValueError("entity value is not in range(0x110000)")
self._named = False
self._hexadecimal = True
else:
test = int(newval, 16 if self.hexadecimal else 10)
if test < 0 or test > 0x10FFFF:
raise ValueError("entity value is not in range(0x110000)")
self._named = False
self._value = newval self._value = newval


@named.setter @named.setter
def named(self, newval): def named(self, newval):
self._named = bool(newval)
newval = bool(newval)
if newval and self.value not in htmlentities.entitydefs:
raise ValueError("entity value is not a valid name")
if not newval:
try:
int(self.value, 16)
except ValueError:
err = "current entity value is not a valid Unicode codepoint"
raise ValueError(err)
self._named = newval


@hexadecimal.setter @hexadecimal.setter
def hexadecimal(self, newval): def hexadecimal(self, newval):
self._hexadecimal = bool(newval)
newval = bool(newval)
if newval and self.named:
raise ValueError("a named entity cannot be hexadecimal")
self._hexadecimal = newval


@hex_char.setter @hex_char.setter
def hex_char(self, newval): def hex_char(self, newval):
@@ -145,5 +174,5 @@ class HTMLEntity(Node):
if self.named: if self.named:
return unichr(htmlentities.name2codepoint[self.value]) return unichr(htmlentities.name2codepoint[self.value])
if self.hexadecimal: if self.hexadecimal:
return self._unichr(int(self.value, 16))
return self._unichr(int(self.value))
return HTMLEntity._unichr(int(self.value, 16))
return HTMLEntity._unichr(int(self.value))

+ 4
- 1
mwparserfromhell/nodes/wikilink.py View File

@@ -79,4 +79,7 @@ class Wikilink(Node):


@text.setter @text.setter
def text(self, value): def text(self, value):
self._text = parse_anything(value)
if value is None:
self._text = None
else:
self._text = parse_anything(value)

+ 1
- 7
mwparserfromhell/wikicode.py View File

@@ -88,13 +88,7 @@ class Wikicode(StringMixIn):
If *obj* is a ``Node``, the function will test whether they are the If *obj* is a ``Node``, the function will test whether they are the
same object, otherwise it will compare them with ``==``. same object, otherwise it will compare them with ``==``.
""" """
if isinstance(obj, Node):
if node is obj:
return True
else:
if node == obj:
return True
return False
return (node is obj) if isinstance(obj, Node) else (node == obj)


def _contains(self, nodes, obj): def _contains(self, nodes, obj):
"""Return ``True`` if *obj* is inside of *nodes*, else ``False``. """Return ``True`` if *obj* is inside of *nodes*, else ``False``.


Loading…
Cancel
Save