From c01f78aee1a4d1f8c83e6295a3e9c917e8578ab2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 22 Aug 2012 02:06:33 -0400 Subject: [PATCH] Implement arguments in tokenizer - preliminary results are promising. --- mwparserfromhell/nodes/heading.py | 2 +- mwparserfromhell/parser/contexts.py | 53 +++++++++++++++++++++--------------- mwparserfromhell/parser/tokenizer.py | 52 ++++++++++++++++++++++++++++++----- 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index a2b1ca0..97878b2 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -24,7 +24,7 @@ from __future__ import unicode_literals from . import Node from ..compat import str -from ...utils import parse_anything +from ..utils import parse_anything __all__ = ["Heading"] diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 9719cbc..5969239 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -35,20 +35,25 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``). Local (stack-specific) contexts: -* :py:const:`TEMPLATE` (``0b000000111``) +* :py:const:`TEMPLATE` (``0b00000000111``) - * :py:const:`TEMPLATE_NAME` (``0b000000001``) - * :py:const:`TEMPLATE_PARAM_KEY` (``0b000000010``) - * :py:const:`TEMPLATE_PARAM_VALUE` (``0b000000100``) + * :py:const:`TEMPLATE_NAME` (``0b00000000001``) + * :py:const:`TEMPLATE_PARAM_KEY` (``0b00000000010``) + * :py:const:`TEMPLATE_PARAM_VALUE` (``0b00000000100``) + +* :py:const:`ARGUMENT` (``0b00000011000``) + + * :py:const:`ARGUMENT_NAME` (``0b00000001000``) + * :py:const:`ARGUMENT_DEFAULT` (``0b00000010000``) * :py:const:`HEADING` (``0b111111000``) - * :py:const:`HEADING_LEVEL_1` (``0b000001000``) - * :py:const:`HEADING_LEVEL_2` (``0b000010000``) - * :py:const:`HEADING_LEVEL_3` (``0b000100000``) - * :py:const:`HEADING_LEVEL_4` (``0b001000000``) - * :py:const:`HEADING_LEVEL_5` (``0b010000000``) - * :py:const:`HEADING_LEVEL_6` (``0b100000000``) + * :py:const:`HEADING_LEVEL_1` (``0b00000100000``) + * :py:const:`HEADING_LEVEL_2` (``0b00001000000``) + * :py:const:`HEADING_LEVEL_3` (``0b00010000000``) + * :py:const:`HEADING_LEVEL_4` (``0b00100000000``) + * :py:const:`HEADING_LEVEL_5` (``0b01000000000``) + * :py:const:`HEADING_LEVEL_6` (``0b10000000000``) Global contexts: @@ -57,18 +62,22 @@ Global contexts: # Local contexts: -TEMPLATE = 0b000000111 -TEMPLATE_NAME = 0b000000001 -TEMPLATE_PARAM_KEY = 0b000000010 -TEMPLATE_PARAM_VALUE = 0b000000100 - -HEADING = 0b111111000 -HEADING_LEVEL_1 = 0b000001000 -HEADING_LEVEL_2 = 0b000010000 -HEADING_LEVEL_3 = 0b000100000 -HEADING_LEVEL_4 = 0b001000000 -HEADING_LEVEL_5 = 0b010000000 -HEADING_LEVEL_6 = 0b100000000 +TEMPLATE = 0b00000000111 +TEMPLATE_NAME = 0b00000000001 +TEMPLATE_PARAM_KEY = 0b00000000010 +TEMPLATE_PARAM_VALUE = 0b00000000100 + +ARGUMENT = 0b00000011000 +ARGUMENT_NAME = 0b00000001000 +ARGUMENT_DEFAULT = 0b00000010000 + +HEADING = 0b11111100000 +HEADING_LEVEL_1 = 0b00000100000 +HEADING_LEVEL_2 = 0b00001000000 +HEADING_LEVEL_3 = 0b00010000000 +HEADING_LEVEL_4 = 0b00100000000 +HEADING_LEVEL_5 = 0b01000000000 +HEADING_LEVEL_6 = 0b10000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index b002c10..d6c3444 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -135,10 +135,23 @@ class Tokenizer(object): self._fail_route() return self.END - def _parse_template(self): + def _parse_template_or_argument(self): """Parse a template at the head of the wikicode string.""" reset = self._head self._head += 2 + + if self._read() == "{": + self._head += 1 + try: + argument = self._parse(contexts.ARGUMENT_NAME) + except BadRoute: + pass + else: + self._write(tokens.ArgumentOpen()) + self._write_all(argument) + self._write(tokens.ArgumentClose()) + return + try: template = self._parse(contexts.TEMPLATE_NAME) except BadRoute: @@ -181,12 +194,26 @@ class Tokenizer(object): self._write(tokens.TemplateParamEquals()) def _handle_template_end(self): - """Handle the end of the template at the head of the string.""" + """Handle the end of a template at the head of the string.""" if self._context & contexts.TEMPLATE_NAME: self._verify_no_newlines() self._head += 1 return self._pop() + def _handle_argument_separator(self): + """Handle the separator between an argument's name and default.""" + self._verify_no_newlines() + self._context ^= contexts.ARGUMENT_NAME + self._context |= contexts.ARGUMENT_DEFAULT + self._write(tokens.ArgumentSeparator()) + + def _handle_argument_end(self): + """Handle the end of an argument at the head of the string.""" + if self._context & contexts.TEMPLATE_NAME: + self._verify_no_newlines() + self._head += 2 + return self._pop() + def _parse_heading(self): """Parse a section heading at the head of the wikicode string.""" self._global |= contexts.GL_HEADING @@ -299,20 +326,31 @@ class Tokenizer(object): self._head += 1 continue if this is self.END: - if self._context & (contexts.TEMPLATE | contexts.HEADING): + fail = contexts.TEMPLATE | contexts.ARGUMENT | contexts.HEADING + if self._context & fail: self._fail_route() return self._pop() - prev, next = self._read(-1), self._read(1) + next = self._read(1) if this == next == "{": - self._parse_template() + self._parse_template_or_argument() elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() elif this == next == "}" and self._context & contexts.TEMPLATE: return self._handle_template_end() - elif (prev == "\n" or prev == self.START) and this == "=" and not self._global & contexts.GL_HEADING: - self._parse_heading() + elif this == "|" and self._context & contexts.ARGUMENT_NAME: + self._handle_argument_separator() + elif this == next == "}" and self._context & contexts.ARGUMENT: + if self._read(2) == "}": + return self._handle_argument_end() + else: + self._write_text("}") + elif this == "=" and not self._global & contexts.GL_HEADING: + if self._read(-1) in ("\n", self.START): + self._parse_heading() + else: + self._write_text("=") elif this == "=" and self._context & contexts.HEADING: return self._handle_heading_end() elif this == "\n" and self._context & contexts.HEADING: