From 17053e47019979c9ea2d0c2d0aba97d96e15b71a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 Aug 2012 19:53:49 -0400 Subject: [PATCH] Support &#Xhex; in addition to &#xhex;. --- mwparserfromhell/nodes/html_entity.py | 9 +++++++-- mwparserfromhell/parser/builder.py | 5 +++-- mwparserfromhell/parser/tokens.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index af046ea..8ba2cf6 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -27,7 +27,7 @@ from . import Node __all__ = ["HTMLEntity"] class HTMLEntity(Node): - def __init__(self, value, named=None, hexadecimal=False): + def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): self._value = value if named is None: # Try to guess whether or not the entity is named try: @@ -45,12 +45,13 @@ class HTMLEntity(Node): else: self._named = named self._hexadecimal = hexadecimal + self._hex_char = hex_char def __unicode__(self): if self.named: return u"&{0};".format(self.value) if self.hexadecimal: - return u"&#x{0};".format(self.value) + return u"&#{0}{1};".format(self.hex_char, self.value) return u"&#{0};".format(self.value) def __strip__(self, normalize, collapse): @@ -93,6 +94,10 @@ class HTMLEntity(Node): def hexadecimal(self): return self._hexadecimal + @property + def hex_char(self): + return self._hex_char + def normalize(self): if self.named: return unichr(htmlentitydefs.name2codepoint[self.value]) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 80354a9..9ac6a70 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -88,8 +88,9 @@ class Builder(object): if isinstance(token, tokens.HTMLEntityNumeric): token = self._tokens.pop(0) if isinstance(token, tokens.HTMLEntityHex): - token = self._tokens.pop(0) - return HTMLEntity(token.text, named=False, hexadecimal=True) + text = self._tokens.pop(0) + return HTMLEntity(text.text, named=False, hexadecimal=True, + hex_char=token.char) return HTMLEntity(token.text, named=False, hexadecimal=False) return HTMLEntity(token.text, named=True, hexadecimal=False) diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 322b801..6c77a5f 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -49,7 +49,7 @@ TemplateClose = make("TemplateClose") # }} HTMLEntityStart = make("HTMLEntityStart") # & HTMLEntityNumeric = make("HTMLEntityNumeric") # # -HTMLEntityHex = make("HTMLEntityHex") # X +HTMLEntityHex = make("HTMLEntityHex") # x HTMLEntityEnd = make("HTMLEntityEnd") # ; HeadingBlock = make("HeadingBlock") # =...