@@ -20,6 +20,17 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
This package contains :py:class:`~mwparserfromhell.wikicode.Wikicode` "nodes", | |||
which represent a single unit of wikitext, such as a Template, an HTML tag, | |||
a Heading, or plain text. The node "tree" is far from flat, as most types can | |||
contain additional :py:class:`~mwparserfromhell.wikicode.Wikicode` types within | |||
them - and with that, more nodes. For example, the name of a | |||
:py:class:`~mwparserfromhell.nodes.template.Template` is a | |||
:py:class:`~mwparserfromhell.wikicode.Wikicode` object that can contain text or | |||
more templates. | |||
""" | |||
from __future__ import unicode_literals | |||
from ..compat import str | |||
@@ -28,6 +39,20 @@ from ..string_mixin import StringMixIn | |||
__all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"] | |||
class Node(StringMixIn): | |||
"""Represents the base Node type, demonstrating the methods to override. | |||
:py:meth:`__unicode__` must be overridden. It should return a ``unicode`` | |||
or (``str`` in py3k) representation of the node. If the node contains | |||
:py:class:`~mwparserfromhell.wikicode.Wikicode` objects inside of it, | |||
:py:meth:`__iternodes__` should be overridden to yield tuples of | |||
(``wikicode``, ``node_in_wikicode``) for each node in each wikicode, as | |||
well as the node itself (``None``, ``self``). If the node is printable, | |||
:py:meth:`__strip__` should be overridden to return the printable version | |||
of the node - it does not have to be a string, but something that can be | |||
converted to a string with ``str()``. Finally, :py:meth:`__showtree__` can | |||
be overridden to build a nice tree representation of the node, if desired, | |||
for :py:meth:`~mwparserfromhell.wikicode.Wikicode.get_tree`. | |||
""" | |||
def __unicode__(self): | |||
raise NotImplementedError() | |||
@@ -28,6 +28,7 @@ from ..compat import str | |||
__all__ = ["Heading"] | |||
class Heading(Node): | |||
"""Represents a section heading in wikicode, like ``== Foo ==``.""" | |||
def __init__(self, title, level): | |||
super(Heading, self).__init__() | |||
self._title = title | |||
@@ -51,8 +52,10 @@ class Heading(Node): | |||
@property | |||
def title(self): | |||
"""The title of the heading itself, as a ``Wikicode`` object.""" | |||
return self._title | |||
@property | |||
def level(self): | |||
"""The heading level, as an integer between 1 and 6, inclusive.""" | |||
return self._level |
@@ -28,6 +28,7 @@ from ..compat import htmlentities | |||
__all__ = ["HTMLEntity"] | |||
class HTMLEntity(Node): | |||
"""Represents an HTML entity, like `` ``, either named or unnamed.""" | |||
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): | |||
super(HTMLEntity, self).__init__() | |||
self._value = value | |||
@@ -86,21 +87,36 @@ class HTMLEntity(Node): | |||
@property | |||
def value(self): | |||
"""The string value of the HTML entity.""" | |||
return self._value | |||
@property | |||
def named(self): | |||
"""Whether the entity is a string name for a codepoint or an integer. | |||
For example, ``Σ``, ``Σ``, and ``Σ`` refer to the same | |||
character, but only the first is "named", while the others are integer | |||
representations of the codepoint. | |||
""" | |||
return self._named | |||
@property | |||
def hexadecimal(self): | |||
"""If unnamed, this is whether the value is hexadecimal or decimal.""" | |||
return self._hexadecimal | |||
@property | |||
def hex_char(self): | |||
"""If the value is hexadecimal, this is the letter denoting that. | |||
For example, the hex_char of ``"ሴ"`` is ``"x"``, whereas the | |||
hex_char of ``"ሴ"`` is ``"X"``. Lowercase and uppercase ``x`` | |||
are the only values supported. | |||
""" | |||
return self._hex_char | |||
def normalize(self): | |||
"""Return the unicode character represented by the HTML entity.""" | |||
if self.named: | |||
return unichr(htmlentities.name2codepoint[self.value]) | |||
if self.hexadecimal: | |||