From 975be475d3034eabe14f355125fb4eb74807e24c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 21 Aug 2012 19:01:04 -0400 Subject: [PATCH] Document Builder and contexts. --- mwparserfromhell/parser/builder.py | 26 ++++++++++++++++++++++++++ mwparserfromhell/parser/contexts.py | 12 ++++++++++++ mwparserfromhell/parser/tokens.py | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 12faa20..4636114 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -32,25 +32,44 @@ from ..wikicode import Wikicode __all__ = ["Builder"] class Builder(object): + """Combines a sequence of tokens into a tree of ``Wikicode`` objects. + + To use, pass a list of :py:class:`~mwparserfromhell.parser.tokens.Token`\ s + to the :py:meth:`build` method. The list will be exhausted as it is parsed + and a :py:class:`~mwparserfromhell.wikicode.Wikicode` object will be + returned. + """ + def __init__(self): self._tokens = [] self._stacks = [] def _wrap(self, nodes): + """Properly wrap a list of nodes in a ``Wikicode`` object.""" return Wikicode(SmartList(nodes)) def _push(self): + """Push a new node list onto the stack.""" self._stacks.append([]) def _pop(self, wrap=True): + """Pop the topmost node list off of the stack. + + If *wrap* is ``True``, we will call :py:meth:`_wrap` on the list. + """ if wrap: return self._wrap(self._stacks.pop()) return self._stacks.pop() def _write(self, item): + """Append a node to the topmost node list.""" self._stacks[-1].append(item) def _handle_parameter(self, default): + """Handle a case where a parameter is at the head of the tokens. + + *default* is the value to use if no parameter name is defined. + """ key = None showkey = False self._push() @@ -71,6 +90,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_template(self): + """Handle a case where a template is at the head of the tokens.""" params = [] default = 1 self._push() @@ -91,6 +111,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_entity(self): + """Handle a case where a HTML entity is at the head of the tokens.""" token = self._tokens.pop() if isinstance(token, tokens.HTMLEntityNumeric): token = self._tokens.pop() @@ -105,6 +126,7 @@ class Builder(object): return HTMLEntity(token.text, named=True, hexadecimal=False) def _handle_heading(self, token): + """Handle a case where a heading is at the head of the tokens.""" level = token.level self._push() while self._tokens: @@ -116,6 +138,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_attribute(self): + """Handle a case where a tag attribute is at the head of the tokens.""" name, quoted = None, False self._push() while self._tokens: @@ -135,6 +158,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_tag(self, token): + """Handle a case where a tag is at the head of the tokens.""" type_, showtag = token.type, token.showtag attrs = [] self._push() @@ -159,6 +183,7 @@ class Builder(object): self._write(self._handle_token(token)) def _handle_token(self, token): + """Handle a single token.""" if isinstance(token, tokens.Text): return Text(token.text) elif isinstance(token, tokens.TemplateOpen): @@ -171,6 +196,7 @@ class Builder(object): return self._handle_tag(token) def build(self, tokenlist): + """Build a Wikicode object from a list tokens and return it.""" self._tokens = tokenlist self._tokens.reverse() self._push() diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 6369ee2..f06ad96 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -20,6 +20,18 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +""" +This module contains various "context" definitions, which are essentially flags +set during the tokenization process, either on the current parse stack (local +contexts) or affecting all stacks (global contexts). They represent the context +the tokenizer is in, such as inside a template's name definition, or inside a +heading of level two. This is used to determine what tokens are valid at the +current point and also if the current parsing route is invalid. + +The tokenizer stores context as an integer, with these definitions bitwise OR'd +to add them, AND'd to check if they're set, and XOR'd to remove them. +""" + # Local (stack-specific) contexts: TEMPLATE = 0b000000111 diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 060f1b2..f687170 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -22,7 +22,7 @@ from __future__ import unicode_literals -from ..compat import basestring, py3k, str +from ..compat import basestring, py3k __all__ = ["Token"]