diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py
index 495b14a..8276d7e 100644
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -30,9 +30,21 @@ class HTMLEntity(Node):
def __init__(self, value, named=None, hexadecimal=False):
self._value = value
if named is None: # Try to guess whether or not the entity is named
- named = False if isinstance(value, int) else True
- self._named = named
- self._hexadecimal = hexadecimal
+ try:
+ int(value)
+ self._named = False
+ self._hexadecimal = False
+ except ValueError:
+ try:
+ int(value, 16)
+ self._named = False
+ self._hexadecimal = True
+ except ValueError:
+ self._named = True
+ self._hexadecimal = False
+ else:
+ self._named = named
+ self._hexadecimal = hexadecimal
def __unicode__(self):
if self.named:
@@ -53,14 +65,15 @@ class HTMLEntity(Node):
# Test whether we're on the wide or narrow Python build. Check the
# length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY):
if len(u"\U0001F64A") == 2:
- # Ensure this code point is within the range we can encode:
+ # Ensure this is within the range we can encode:
if value > 0x10FFFF:
raise ValueError("unichr() arg not in range(0x110000)")
- if value >= 0x10000:
- code = value - 0x10000
- lead = 0xD800 + (code >> 10)
- trail = 0xDC00 + (code % (1 << 10))
- return unichr(lead) + unichr(trail)
+ code = value - 0x10000
+ if value < 0: # Invalid code point
+ raise
+ lead = 0xD800 + (code >> 10)
+ trail = 0xDC00 + (code % (1 << 10))
+ return unichr(lead) + unichr(trail)
raise
@property