Browse Source

Document Builder and contexts.

tags/v0.1
Ben Kurtovic 11 years ago
parent
commit
975be475d3
3 changed files with 39 additions and 1 deletions
  1. +26
    -0
      mwparserfromhell/parser/builder.py
  2. +12
    -0
      mwparserfromhell/parser/contexts.py
  3. +1
    -1
      mwparserfromhell/parser/tokens.py

+ 26
- 0
mwparserfromhell/parser/builder.py View File

@@ -32,25 +32,44 @@ from ..wikicode import Wikicode
__all__ = ["Builder"] __all__ = ["Builder"]


class Builder(object): class Builder(object):
"""Combines a sequence of tokens into a tree of ``Wikicode`` objects.

To use, pass a list of :py:class:`~mwparserfromhell.parser.tokens.Token`\ s
to the :py:meth:`build` method. The list will be exhausted as it is parsed
and a :py:class:`~mwparserfromhell.wikicode.Wikicode` object will be
returned.
"""

def __init__(self): def __init__(self):
self._tokens = [] self._tokens = []
self._stacks = [] self._stacks = []


def _wrap(self, nodes): def _wrap(self, nodes):
"""Properly wrap a list of nodes in a ``Wikicode`` object."""
return Wikicode(SmartList(nodes)) return Wikicode(SmartList(nodes))


def _push(self): def _push(self):
"""Push a new node list onto the stack."""
self._stacks.append([]) self._stacks.append([])


def _pop(self, wrap=True): def _pop(self, wrap=True):
"""Pop the topmost node list off of the stack.

If *wrap* is ``True``, we will call :py:meth:`_wrap` on the list.
"""
if wrap: if wrap:
return self._wrap(self._stacks.pop()) return self._wrap(self._stacks.pop())
return self._stacks.pop() return self._stacks.pop()


def _write(self, item): def _write(self, item):
"""Append a node to the topmost node list."""
self._stacks[-1].append(item) self._stacks[-1].append(item)


def _handle_parameter(self, default): def _handle_parameter(self, default):
"""Handle a case where a parameter is at the head of the tokens.

*default* is the value to use if no parameter name is defined.
"""
key = None key = None
showkey = False showkey = False
self._push() self._push()
@@ -71,6 +90,7 @@ class Builder(object):
self._write(self._handle_token(token)) self._write(self._handle_token(token))


def _handle_template(self): def _handle_template(self):
"""Handle a case where a template is at the head of the tokens."""
params = [] params = []
default = 1 default = 1
self._push() self._push()
@@ -91,6 +111,7 @@ class Builder(object):
self._write(self._handle_token(token)) self._write(self._handle_token(token))


def _handle_entity(self): def _handle_entity(self):
"""Handle a case where a HTML entity is at the head of the tokens."""
token = self._tokens.pop() token = self._tokens.pop()
if isinstance(token, tokens.HTMLEntityNumeric): if isinstance(token, tokens.HTMLEntityNumeric):
token = self._tokens.pop() token = self._tokens.pop()
@@ -105,6 +126,7 @@ class Builder(object):
return HTMLEntity(token.text, named=True, hexadecimal=False) return HTMLEntity(token.text, named=True, hexadecimal=False)


def _handle_heading(self, token): def _handle_heading(self, token):
"""Handle a case where a heading is at the head of the tokens."""
level = token.level level = token.level
self._push() self._push()
while self._tokens: while self._tokens:
@@ -116,6 +138,7 @@ class Builder(object):
self._write(self._handle_token(token)) self._write(self._handle_token(token))


def _handle_attribute(self): def _handle_attribute(self):
"""Handle a case where a tag attribute is at the head of the tokens."""
name, quoted = None, False name, quoted = None, False
self._push() self._push()
while self._tokens: while self._tokens:
@@ -135,6 +158,7 @@ class Builder(object):
self._write(self._handle_token(token)) self._write(self._handle_token(token))


def _handle_tag(self, token): def _handle_tag(self, token):
"""Handle a case where a tag is at the head of the tokens."""
type_, showtag = token.type, token.showtag type_, showtag = token.type, token.showtag
attrs = [] attrs = []
self._push() self._push()
@@ -159,6 +183,7 @@ class Builder(object):
self._write(self._handle_token(token)) self._write(self._handle_token(token))


def _handle_token(self, token): def _handle_token(self, token):
"""Handle a single token."""
if isinstance(token, tokens.Text): if isinstance(token, tokens.Text):
return Text(token.text) return Text(token.text)
elif isinstance(token, tokens.TemplateOpen): elif isinstance(token, tokens.TemplateOpen):
@@ -171,6 +196,7 @@ class Builder(object):
return self._handle_tag(token) return self._handle_tag(token)


def build(self, tokenlist): def build(self, tokenlist):
"""Build a Wikicode object from a list tokens and return it."""
self._tokens = tokenlist self._tokens = tokenlist
self._tokens.reverse() self._tokens.reverse()
self._push() self._push()


+ 12
- 0
mwparserfromhell/parser/contexts.py View File

@@ -20,6 +20,18 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


"""
This module contains various "context" definitions, which are essentially flags
set during the tokenization process, either on the current parse stack (local
contexts) or affecting all stacks (global contexts). They represent the context
the tokenizer is in, such as inside a template's name definition, or inside a
heading of level two. This is used to determine what tokens are valid at the
current point and also if the current parsing route is invalid.

The tokenizer stores context as an integer, with these definitions bitwise OR'd
to add them, AND'd to check if they're set, and XOR'd to remove them.
"""

# Local (stack-specific) contexts: # Local (stack-specific) contexts:


TEMPLATE = 0b000000111 TEMPLATE = 0b000000111


+ 1
- 1
mwparserfromhell/parser/tokens.py View File

@@ -22,7 +22,7 @@


from __future__ import unicode_literals from __future__ import unicode_literals


from ..compat import basestring, py3k, str
from ..compat import basestring, py3k


__all__ = ["Token"] __all__ = ["Token"]




Loading…
Cancel
Save