@@ -20,6 +20,17 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | |||||
This package contains :py:class:`~mwparserfromhell.wikicode.Wikicode` "nodes", | |||||
which represent a single unit of wikitext, such as a Template, an HTML tag, | |||||
a Heading, or plain text. The node "tree" is far from flat, as most types can | |||||
contain additional :py:class:`~mwparserfromhell.wikicode.Wikicode` types within | |||||
them - and with that, more nodes. For example, the name of a | |||||
:py:class:`~mwparserfromhell.nodes.template.Template` is a | |||||
:py:class:`~mwparserfromhell.wikicode.Wikicode` object that can contain text or | |||||
more templates. | |||||
""" | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from ..compat import str | from ..compat import str | ||||
@@ -28,6 +39,20 @@ from ..string_mixin import StringMixIn | |||||
__all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"] | __all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"] | ||||
class Node(StringMixIn): | class Node(StringMixIn): | ||||
"""Represents the base Node type, demonstrating the methods to override. | |||||
:py:meth:`__unicode__` must be overridden. It should return a ``unicode`` | |||||
or (``str`` in py3k) representation of the node. If the node contains | |||||
:py:class:`~mwparserfromhell.wikicode.Wikicode` objects inside of it, | |||||
:py:meth:`__iternodes__` should be overridden to yield tuples of | |||||
(``wikicode``, ``node_in_wikicode``) for each node in each wikicode, as | |||||
well as the node itself (``None``, ``self``). If the node is printable, | |||||
:py:meth:`__strip__` should be overridden to return the printable version | |||||
of the node - it does not have to be a string, but something that can be | |||||
converted to a string with ``str()``. Finally, :py:meth:`__showtree__` can | |||||
be overridden to build a nice tree representation of the node, if desired, | |||||
for :py:meth:`~mwparserfromhell.wikicode.Wikicode.get_tree`. | |||||
""" | |||||
def __unicode__(self): | def __unicode__(self): | ||||
raise NotImplementedError() | raise NotImplementedError() | ||||
@@ -28,6 +28,7 @@ from ..compat import str | |||||
__all__ = ["Heading"] | __all__ = ["Heading"] | ||||
class Heading(Node): | class Heading(Node): | ||||
"""Represents a section heading in wikicode, like ``== Foo ==``.""" | |||||
def __init__(self, title, level): | def __init__(self, title, level): | ||||
super(Heading, self).__init__() | super(Heading, self).__init__() | ||||
self._title = title | self._title = title | ||||
@@ -51,8 +52,10 @@ class Heading(Node): | |||||
@property | @property | ||||
def title(self): | def title(self): | ||||
"""The title of the heading itself, as a ``Wikicode`` object.""" | |||||
return self._title | return self._title | ||||
@property | @property | ||||
def level(self): | def level(self): | ||||
"""The heading level, as an integer between 1 and 6, inclusive.""" | |||||
return self._level | return self._level |
@@ -28,6 +28,7 @@ from ..compat import htmlentities | |||||
__all__ = ["HTMLEntity"] | __all__ = ["HTMLEntity"] | ||||
class HTMLEntity(Node): | class HTMLEntity(Node): | ||||
"""Represents an HTML entity, like `` ``, either named or unnamed.""" | |||||
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): | def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): | ||||
super(HTMLEntity, self).__init__() | super(HTMLEntity, self).__init__() | ||||
self._value = value | self._value = value | ||||
@@ -86,21 +87,36 @@ class HTMLEntity(Node): | |||||
@property | @property | ||||
def value(self): | def value(self): | ||||
"""The string value of the HTML entity.""" | |||||
return self._value | return self._value | ||||
@property | @property | ||||
def named(self): | def named(self): | ||||
"""Whether the entity is a string name for a codepoint or an integer. | |||||
For example, ``Σ``, ``Σ``, and ``Σ`` refer to the same | |||||
character, but only the first is "named", while the others are integer | |||||
representations of the codepoint. | |||||
""" | |||||
return self._named | return self._named | ||||
@property | @property | ||||
def hexadecimal(self): | def hexadecimal(self): | ||||
"""If unnamed, this is whether the value is hexadecimal or decimal.""" | |||||
return self._hexadecimal | return self._hexadecimal | ||||
@property | @property | ||||
def hex_char(self): | def hex_char(self): | ||||
"""If the value is hexadecimal, this is the letter denoting that. | |||||
For example, the hex_char of ``"ሴ"`` is ``"x"``, whereas the | |||||
hex_char of ``"ሴ"`` is ``"X"``. Lowercase and uppercase ``x`` | |||||
are the only values supported. | |||||
""" | |||||
return self._hex_char | return self._hex_char | ||||
def normalize(self): | def normalize(self): | ||||
"""Return the unicode character represented by the HTML entity.""" | |||||
if self.named: | if self.named: | ||||
return unichr(htmlentities.name2codepoint[self.value]) | return unichr(htmlentities.name2codepoint[self.value]) | ||||
if self.hexadecimal: | if self.hexadecimal: | ||||