Bladeren bron

Document mwparserfromhell.nodes, Node, Heading, and HTMLEntity.

tags/v0.1
Ben Kurtovic 12 jaren geleden
bovenliggende
commit
0d224e1a63
3 gewijzigde bestanden met toevoegingen van 44 en 0 verwijderingen
  1. +25
    -0
      mwparserfromhell/nodes/__init__.py
  2. +3
    -0
      mwparserfromhell/nodes/heading.py
  3. +16
    -0
      mwparserfromhell/nodes/html_entity.py

+ 25
- 0
mwparserfromhell/nodes/__init__.py Bestand weergeven

@@ -20,6 +20,17 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
This package contains :py:class:`~mwparserfromhell.wikicode.Wikicode` "nodes",
which represent a single unit of wikitext, such as a Template, an HTML tag,
a Heading, or plain text. The node "tree" is far from flat, as most types can
contain additional :py:class:`~mwparserfromhell.wikicode.Wikicode` types within
them - and with that, more nodes. For example, the name of a
:py:class:`~mwparserfromhell.nodes.template.Template` is a
:py:class:`~mwparserfromhell.wikicode.Wikicode` object that can contain text or
more templates.
"""

from __future__ import unicode_literals

from ..compat import str
@@ -28,6 +39,20 @@ from ..string_mixin import StringMixIn
__all__ = ["Node", "Text", "Heading", "HTMLEntity", "Tag", "Template"]

class Node(StringMixIn):
"""Represents the base Node type, demonstrating the methods to override.

:py:meth:`__unicode__` must be overridden. It should return a ``unicode``
or (``str`` in py3k) representation of the node. If the node contains
:py:class:`~mwparserfromhell.wikicode.Wikicode` objects inside of it,
:py:meth:`__iternodes__` should be overridden to yield tuples of
(``wikicode``, ``node_in_wikicode``) for each node in each wikicode, as
well as the node itself (``None``, ``self``). If the node is printable,
:py:meth:`__strip__` should be overridden to return the printable version
of the node - it does not have to be a string, but something that can be
converted to a string with ``str()``. Finally, :py:meth:`__showtree__` can
be overridden to build a nice tree representation of the node, if desired,
for :py:meth:`~mwparserfromhell.wikicode.Wikicode.get_tree`.
"""
def __unicode__(self):
raise NotImplementedError()



+ 3
- 0
mwparserfromhell/nodes/heading.py Bestand weergeven

@@ -28,6 +28,7 @@ from ..compat import str
__all__ = ["Heading"]

class Heading(Node):
"""Represents a section heading in wikicode, like ``== Foo ==``."""
def __init__(self, title, level):
super(Heading, self).__init__()
self._title = title
@@ -51,8 +52,10 @@ class Heading(Node):

@property
def title(self):
"""The title of the heading itself, as a ``Wikicode`` object."""
return self._title

@property
def level(self):
"""The heading level, as an integer between 1 and 6, inclusive."""
return self._level

+ 16
- 0
mwparserfromhell/nodes/html_entity.py Bestand weergeven

@@ -28,6 +28,7 @@ from ..compat import htmlentities
__all__ = ["HTMLEntity"]

class HTMLEntity(Node):
"""Represents an HTML entity, like `` ``, either named or unnamed."""
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"):
super(HTMLEntity, self).__init__()
self._value = value
@@ -86,21 +87,36 @@ class HTMLEntity(Node):

@property
def value(self):
"""The string value of the HTML entity."""
return self._value

@property
def named(self):
"""Whether the entity is a string name for a codepoint or an integer.

For example, ``Σ``, ``Σ``, and ``Σ`` refer to the same
character, but only the first is "named", while the others are integer
representations of the codepoint.
"""
return self._named

@property
def hexadecimal(self):
"""If unnamed, this is whether the value is hexadecimal or decimal."""
return self._hexadecimal

@property
def hex_char(self):
"""If the value is hexadecimal, this is the letter denoting that.

For example, the hex_char of ``"ሴ"`` is ``"x"``, whereas the
hex_char of ``"ሴ"`` is ``"X"``. Lowercase and uppercase ``x``
are the only values supported.
"""
return self._hex_char

def normalize(self):
"""Return the unicode character represented by the HTML entity."""
if self.named:
return unichr(htmlentities.name2codepoint[self.value])
if self.hexadecimal:


Laden…
Annuleren
Opslaan