Parcourir la source

Document Builder and contexts.

tags/v0.1
Ben Kurtovic il y a 12 ans
Parent
révision
975be475d3
3 fichiers modifiés avec 39 ajouts et 1 suppressions
  1. +26
    -0
      mwparserfromhell/parser/builder.py
  2. +12
    -0
      mwparserfromhell/parser/contexts.py
  3. +1
    -1
      mwparserfromhell/parser/tokens.py

+ 26
- 0
mwparserfromhell/parser/builder.py Voir le fichier

@@ -32,25 +32,44 @@ from ..wikicode import Wikicode
__all__ = ["Builder"]

class Builder(object):
"""Combines a sequence of tokens into a tree of ``Wikicode`` objects.

To use, pass a list of :py:class:`~mwparserfromhell.parser.tokens.Token`\ s
to the :py:meth:`build` method. The list will be exhausted as it is parsed
and a :py:class:`~mwparserfromhell.wikicode.Wikicode` object will be
returned.
"""

def __init__(self):
self._tokens = []
self._stacks = []

def _wrap(self, nodes):
"""Properly wrap a list of nodes in a ``Wikicode`` object."""
return Wikicode(SmartList(nodes))

def _push(self):
"""Push a new node list onto the stack."""
self._stacks.append([])

def _pop(self, wrap=True):
"""Pop the topmost node list off of the stack.

If *wrap* is ``True``, we will call :py:meth:`_wrap` on the list.
"""
if wrap:
return self._wrap(self._stacks.pop())
return self._stacks.pop()

def _write(self, item):
"""Append a node to the topmost node list."""
self._stacks[-1].append(item)

def _handle_parameter(self, default):
"""Handle a case where a parameter is at the head of the tokens.

*default* is the value to use if no parameter name is defined.
"""
key = None
showkey = False
self._push()
@@ -71,6 +90,7 @@ class Builder(object):
self._write(self._handle_token(token))

def _handle_template(self):
"""Handle a case where a template is at the head of the tokens."""
params = []
default = 1
self._push()
@@ -91,6 +111,7 @@ class Builder(object):
self._write(self._handle_token(token))

def _handle_entity(self):
"""Handle a case where a HTML entity is at the head of the tokens."""
token = self._tokens.pop()
if isinstance(token, tokens.HTMLEntityNumeric):
token = self._tokens.pop()
@@ -105,6 +126,7 @@ class Builder(object):
return HTMLEntity(token.text, named=True, hexadecimal=False)

def _handle_heading(self, token):
"""Handle a case where a heading is at the head of the tokens."""
level = token.level
self._push()
while self._tokens:
@@ -116,6 +138,7 @@ class Builder(object):
self._write(self._handle_token(token))

def _handle_attribute(self):
"""Handle a case where a tag attribute is at the head of the tokens."""
name, quoted = None, False
self._push()
while self._tokens:
@@ -135,6 +158,7 @@ class Builder(object):
self._write(self._handle_token(token))

def _handle_tag(self, token):
"""Handle a case where a tag is at the head of the tokens."""
type_, showtag = token.type, token.showtag
attrs = []
self._push()
@@ -159,6 +183,7 @@ class Builder(object):
self._write(self._handle_token(token))

def _handle_token(self, token):
"""Handle a single token."""
if isinstance(token, tokens.Text):
return Text(token.text)
elif isinstance(token, tokens.TemplateOpen):
@@ -171,6 +196,7 @@ class Builder(object):
return self._handle_tag(token)

def build(self, tokenlist):
"""Build a Wikicode object from a list tokens and return it."""
self._tokens = tokenlist
self._tokens.reverse()
self._push()


+ 12
- 0
mwparserfromhell/parser/contexts.py Voir le fichier

@@ -20,6 +20,18 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
This module contains various "context" definitions, which are essentially flags
set during the tokenization process, either on the current parse stack (local
contexts) or affecting all stacks (global contexts). They represent the context
the tokenizer is in, such as inside a template's name definition, or inside a
heading of level two. This is used to determine what tokens are valid at the
current point and also if the current parsing route is invalid.

The tokenizer stores context as an integer, with these definitions bitwise OR'd
to add them, AND'd to check if they're set, and XOR'd to remove them.
"""

# Local (stack-specific) contexts:

TEMPLATE = 0b000000111


+ 1
- 1
mwparserfromhell/parser/tokens.py Voir le fichier

@@ -22,7 +22,7 @@

from __future__ import unicode_literals

from ..compat import basestring, py3k, str
from ..compat import basestring, py3k

__all__ = ["Token"]



Chargement…
Annuler
Enregistrer