Browse Source

Implement Python-build-agnostic version of unichr().

tags/v0.1
Ben Kurtovic 12 years ago
parent
commit
43fc28e8e8
1 changed files with 24 additions and 2 deletions
  1. +24
    -2
      mwparserfromhell/nodes/html_entity.py

+ 24
- 2
mwparserfromhell/nodes/html_entity.py View File

@@ -41,6 +41,28 @@ class HTMLEntity(Node):
return u"&#x{0};".format(self.value) return u"&#x{0};".format(self.value)
return u"&#{0};".format(self.value) return u"&#{0};".format(self.value)


def _unichr(self, value):
"""Implement the builtin unichr() with support for non-BMP code points.

On wide Python builds, this functions like the normal unichr(). On
narrow builds, this returns the value's corresponding surrogate pair.
"""
try:
return unichr(value)
except ValueError:
# Test whether we're on the wide or narrow Python build. Check the
# length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY):
if len(u"\U0001F64A") == 2:
# Ensure this code point is within the range we can encode:
if value > 0x10FFFF:
raise ValueError("unichr() arg not in range(0x110000)")
if value >= 0x10000:
code = value - 0x10000
lead = 0xD800 + (code >> 10)
trail = 0xDC00 + (code % (1 << 10))
return unichr(lead) + unichr(trail)
raise

@property @property
def value(self): def value(self):
return self._value return self._value
@@ -57,5 +79,5 @@ class HTMLEntity(Node):
if self.named: if self.named:
return unichr(htmlentitydefs.name2codepoint[self.value]) return unichr(htmlentitydefs.name2codepoint[self.value])
if self.hexadecimal: if self.hexadecimal:
return unichr(int(str(self.value), 16))
return unichr(self.value)
return self._unichr(int(self.value, 16))
return self._unichr(int(self.value))

Loading…
Cancel
Save