Browse Source

Support &#Xhex; in addition to &#xhex;.

tags/v0.1
Ben Kurtovic 12 years ago
parent
commit
17053e4701
3 changed files with 11 additions and 5 deletions
  1. +7
    -2
      mwparserfromhell/nodes/html_entity.py
  2. +3
    -2
      mwparserfromhell/parser/builder.py
  3. +1
    -1
      mwparserfromhell/parser/tokens.py

+ 7
- 2
mwparserfromhell/nodes/html_entity.py View File

@@ -27,7 +27,7 @@ from . import Node
__all__ = ["HTMLEntity"] __all__ = ["HTMLEntity"]


class HTMLEntity(Node): class HTMLEntity(Node):
def __init__(self, value, named=None, hexadecimal=False):
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"):
self._value = value self._value = value
if named is None: # Try to guess whether or not the entity is named if named is None: # Try to guess whether or not the entity is named
try: try:
@@ -45,12 +45,13 @@ class HTMLEntity(Node):
else: else:
self._named = named self._named = named
self._hexadecimal = hexadecimal self._hexadecimal = hexadecimal
self._hex_char = hex_char


def __unicode__(self): def __unicode__(self):
if self.named: if self.named:
return u"&{0};".format(self.value) return u"&{0};".format(self.value)
if self.hexadecimal: if self.hexadecimal:
return u"&#x{0};".format(self.value)
return u"&#{0}{1};".format(self.hex_char, self.value)
return u"&#{0};".format(self.value) return u"&#{0};".format(self.value)


def __strip__(self, normalize, collapse): def __strip__(self, normalize, collapse):
@@ -93,6 +94,10 @@ class HTMLEntity(Node):
def hexadecimal(self): def hexadecimal(self):
return self._hexadecimal return self._hexadecimal


@property
def hex_char(self):
return self._hex_char

def normalize(self): def normalize(self):
if self.named: if self.named:
return unichr(htmlentitydefs.name2codepoint[self.value]) return unichr(htmlentitydefs.name2codepoint[self.value])


+ 3
- 2
mwparserfromhell/parser/builder.py View File

@@ -88,8 +88,9 @@ class Builder(object):
if isinstance(token, tokens.HTMLEntityNumeric): if isinstance(token, tokens.HTMLEntityNumeric):
token = self._tokens.pop(0) token = self._tokens.pop(0)
if isinstance(token, tokens.HTMLEntityHex): if isinstance(token, tokens.HTMLEntityHex):
token = self._tokens.pop(0)
return HTMLEntity(token.text, named=False, hexadecimal=True)
text = self._tokens.pop(0)
return HTMLEntity(text.text, named=False, hexadecimal=True,
hex_char=token.char)
return HTMLEntity(token.text, named=False, hexadecimal=False) return HTMLEntity(token.text, named=False, hexadecimal=False)
return HTMLEntity(token.text, named=True, hexadecimal=False) return HTMLEntity(token.text, named=True, hexadecimal=False)




+ 1
- 1
mwparserfromhell/parser/tokens.py View File

@@ -49,7 +49,7 @@ TemplateClose = make("TemplateClose") # }}


HTMLEntityStart = make("HTMLEntityStart") # & HTMLEntityStart = make("HTMLEntityStart") # &
HTMLEntityNumeric = make("HTMLEntityNumeric") # # HTMLEntityNumeric = make("HTMLEntityNumeric") # #
HTMLEntityHex = make("HTMLEntityHex") # X
HTMLEntityHex = make("HTMLEntityHex") # x
HTMLEntityEnd = make("HTMLEntityEnd") # ; HTMLEntityEnd = make("HTMLEntityEnd") # ;


HeadingBlock = make("HeadingBlock") # =... HeadingBlock = make("HeadingBlock") # =...


Loading…
Cancel
Save